From 104aa20dbf05dee9719098722bbda357536d8429 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 10:29:16 -0800 Subject: [PATCH 01/10] Adds dependencies Signed-off-by: Peter Alfonsi --- plugins/cache-ehcache/build.gradle | 5 + .../licenses/RoaringBitmap-0.9.49.jar.sha1 | 1 + .../licenses/RoaringBitmap-LICENSE.txt | 191 ++++++++++++++++++ .../licenses/RoaringBitmap-NOTICE.txt | 0 .../licenses/shims-0.9.49.jar.sha1 | 1 + .../cache-ehcache/licenses/shims-LICENSE.txt | 191 ++++++++++++++++++ .../cache-ehcache/licenses/shims-NOTICE.txt | 0 7 files changed, 389 insertions(+) create mode 100644 plugins/cache-ehcache/licenses/RoaringBitmap-0.9.49.jar.sha1 create mode 100644 plugins/cache-ehcache/licenses/RoaringBitmap-LICENSE.txt create mode 100644 plugins/cache-ehcache/licenses/RoaringBitmap-NOTICE.txt create mode 100644 plugins/cache-ehcache/licenses/shims-0.9.49.jar.sha1 create mode 100644 plugins/cache-ehcache/licenses/shims-LICENSE.txt create mode 100644 plugins/cache-ehcache/licenses/shims-NOTICE.txt diff --git a/plugins/cache-ehcache/build.gradle b/plugins/cache-ehcache/build.gradle index 65e7daaaacf26..27e4e28a0b35f 100644 --- a/plugins/cache-ehcache/build.gradle +++ b/plugins/cache-ehcache/build.gradle @@ -23,7 +23,12 @@ versions << [ ] dependencies { + // For ehcache api "org.ehcache:ehcache:${versions.ehcache}" + + // For roaring bitmaps + api 'org.roaringbitmap:RoaringBitmap:0.9.49' + runtimeOnly 'org.roaringbitmap:shims:0.9.49' } thirdPartyAudit { diff --git a/plugins/cache-ehcache/licenses/RoaringBitmap-0.9.49.jar.sha1 b/plugins/cache-ehcache/licenses/RoaringBitmap-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..919a73c074b6a --- /dev/null +++ b/plugins/cache-ehcache/licenses/RoaringBitmap-0.9.49.jar.sha1 @@ -0,0 +1 @@ +b45b49c1ec5c5fc48580412d0ca635e1833110ea \ No newline at end of file diff --git a/plugins/cache-ehcache/licenses/RoaringBitmap-LICENSE.txt b/plugins/cache-ehcache/licenses/RoaringBitmap-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/plugins/cache-ehcache/licenses/RoaringBitmap-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/plugins/cache-ehcache/licenses/RoaringBitmap-NOTICE.txt b/plugins/cache-ehcache/licenses/RoaringBitmap-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/plugins/cache-ehcache/licenses/shims-0.9.49.jar.sha1 b/plugins/cache-ehcache/licenses/shims-0.9.49.jar.sha1 new file mode 100644 index 0000000000000..9e76614ca5207 --- /dev/null +++ b/plugins/cache-ehcache/licenses/shims-0.9.49.jar.sha1 @@ -0,0 +1 @@ +8bd7794fbdaa9536354dd2d8d961d9503beb9460 \ No newline at end of file diff --git a/plugins/cache-ehcache/licenses/shims-LICENSE.txt b/plugins/cache-ehcache/licenses/shims-LICENSE.txt new file mode 100644 index 0000000000000..a890d4a062fad --- /dev/null +++ b/plugins/cache-ehcache/licenses/shims-LICENSE.txt @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/plugins/cache-ehcache/licenses/shims-NOTICE.txt b/plugins/cache-ehcache/licenses/shims-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d From 0da7c2967a8ce512569d9588a0ad26c2c9f6ec10 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 10:47:50 -0800 Subject: [PATCH 02/10] Added keystore interface Signed-off-by: Peter Alfonsi --- .../common/cache/keystore/KeyLookupStore.java | 80 +++++++++++++++++++ .../common/cache/keystore/package-info.java | 10 +++ 2 files changed, 90 insertions(+) create mode 100644 server/src/main/java/org/opensearch/common/cache/keystore/KeyLookupStore.java create mode 100644 server/src/main/java/org/opensearch/common/cache/keystore/package-info.java diff --git a/server/src/main/java/org/opensearch/common/cache/keystore/KeyLookupStore.java b/server/src/main/java/org/opensearch/common/cache/keystore/KeyLookupStore.java new file mode 100644 index 0000000000000..f4ee6ce78d7e6 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/keystore/KeyLookupStore.java @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.cache.keystore; + +/** + * An interface for objects that hold an in-memory record of hashes of keys in the disk cache. + * These objects have some internal data structure which stores some transformation of added + * int values. The internal representations may have collisions. Example transformations include a modulo + * or -abs(value), or some combination. + */ +public interface KeyLookupStore { + + /** + * Transforms the input value into the internal representation for this keystore + * and adds it to the internal data structure. + * @param value The value to add. + * @return true if the value was added, false if it wasn't added because of a + * collision or if it was already present. + */ + boolean add(T value); + + /** + * Checks if the transformation of the value is in the keystore. + * @param value The value to check. + * @return true if the value was found, false otherwise. Due to collisions, false positives are + * possible, but there should be no false negatives unless forceRemove() is called. + */ + boolean contains(T value); + + /** + * Attempts to safely remove a value from the internal structure, maintaining the property that contains(value) + * will never return a false negative. If removing would lead to a false negative, the value won't be removed. + * Classes may not implement safe removal. + * @param value The value to attempt to remove. + * @return true if the value was removed, false if it wasn't. + */ + boolean remove(T value); + + /** + * Returns the number of distinct values stored in the internal data structure. + * Does not count values which weren't successfully added due to collisions. + * @return The number of values + */ + int getSize(); + + /** + * Returns an estimate of the store's memory usage. + * @return The memory usage + */ + long getMemorySizeInBytes(); + + /** + * Returns the cap for the store's memory usage. + * @return The cap, in bytes + */ + long getMemorySizeCapInBytes(); + + /** + * Returns whether the store is at memory capacity and can't accept more entries + */ + boolean isFull(); + + /** + * Deletes the internal data structure and regenerates it from the values passed in. + * Also resets all stats related to adding. + * @param newValues The keys that should be in the reset structure. + */ + void regenerateStore(T[] newValues); + + /** + * Deletes all keys and resets all stats related to adding. + */ + void clear(); +} diff --git a/server/src/main/java/org/opensearch/common/cache/keystore/package-info.java b/server/src/main/java/org/opensearch/common/cache/keystore/package-info.java new file mode 100644 index 0000000000000..e48e0aa1c03b2 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/cache/keystore/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Package for keystores. */ +package org.opensearch.common.cache.keystore; From 810096f9b781fae3872ea0b6169af9c658053b38 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 10:51:17 -0800 Subject: [PATCH 03/10] Added RBM keystore impl Signed-off-by: Peter Alfonsi --- .../cache/keystore/KeyStoreStats.java | 44 ++ .../cache/keystore/RBMIntKeyLookupStore.java | 381 ++++++++++++++++ .../keystore/RBMIntKeyLookupStoreTests.java | 409 ++++++++++++++++++ 3 files changed, 834 insertions(+) create mode 100644 plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java create mode 100644 plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java create mode 100644 plugins/cache-ehcache/src/test/java/org/opensearch/cache/keystore/RBMIntKeyLookupStoreTests.java diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java new file mode 100644 index 0000000000000..37fdf47163779 --- /dev/null +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cache.keystore; + +import org.opensearch.common.metrics.CounterMetric; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A stats holder for use in KeyLookupStore implementations. + * Getters should be exposed by the KeyLookupStore which uses it. + */ +public class KeyStoreStats { + // Number of entries + protected CounterMetric size; + // Memory cap in bytes + protected long memSizeCapInBytes; + // Number of add attempts + protected CounterMetric numAddAttempts; + // Number of collisions + protected CounterMetric numCollisions; + // True if the store is at capacity + protected AtomicBoolean atCapacity; + // Number of removal attempts + protected CounterMetric numRemovalAttempts; + // Number of successful removal attempts + protected CounterMetric numSuccessfulRemovals; + + protected KeyStoreStats(long memSizeCapInBytes) { + this.size = new CounterMetric(); + this.numAddAttempts = new CounterMetric(); + this.numCollisions = new CounterMetric(); + this.memSizeCapInBytes = memSizeCapInBytes; + this.atCapacity = new AtomicBoolean(false); + this.numRemovalAttempts = new CounterMetric(); + this.numSuccessfulRemovals = new CounterMetric(); + } +} diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java new file mode 100644 index 0000000000000..fe7f64712eda6 --- /dev/null +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java @@ -0,0 +1,381 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cache.keystore; + +import org.opensearch.common.cache.keystore.KeyLookupStore; +import org.opensearch.common.metrics.CounterMetric; +import org.opensearch.core.common.unit.ByteSizeValue; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.roaringbitmap.RoaringBitmap; + +/** + * This class implements KeyLookupStore using a roaring bitmap with a modulo applied to values. + * The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28. + * It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be + * safely removed from the store. The fraction of collided values should be low, + * about 0.5% for a store with 10^7 values and a modulo of 2^28. + * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. + */ +public class RBMIntKeyLookupStore implements KeyLookupStore { + /** + * An enum representing modulo values for use in the keystore + */ + public enum KeystoreModuloValue { + NONE(0), // No modulo applied + TWO_TO_THIRTY_ONE((int) Math.pow(2, 31)), + TWO_TO_TWENTY_NINE((int) Math.pow(2, 29)), // recommended value + TWO_TO_TWENTY_EIGHT((int) Math.pow(2, 28)), + TWO_TO_TWENTY_SIX((int) Math.pow(2, 26)); + + private final int value; + + private KeystoreModuloValue(int value) { + this.value = value; + } + + public int getValue() { + return this.value; + } + } + + // The modulo applied to values before adding into the RBM + protected final int modulo; + private final int modulo_bitmask; + // Since our modulo is always a power of two we can optimize it by ANDing with a particular bitmask + KeyStoreStats stats; + private RoaringBitmap rbm; + private HashMap collidedIntCounters; + private HashMap> removalSets; + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + private final Lock readLock = lock.readLock(); + private final Lock writeLock = lock.writeLock(); + private long mostRecentByteEstimate; + + // Refresh size estimate every X new elements. Refreshes use the RBM's internal size estimator, which takes ~0.01 ms, + // so we don't want to do it on every get(), and it doesn't matter much if there are +- 10000 keys in this store + // in terms of storage impact + static final int REFRESH_SIZE_EST_INTERVAL = 10_000; + + // Use this constructor to specify memory cap with default modulo = 2^28, which we found in experiments + // to be the best tradeoff between lower memory usage and risk of collisions + public RBMIntKeyLookupStore(long memSizeCapInBytes) { + this(KeystoreModuloValue.TWO_TO_TWENTY_EIGHT, memSizeCapInBytes); + } + + // Use this constructor to specify memory cap and modulo + public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { + this.modulo = moduloValue.getValue(); + if (modulo > 0) { + this.modulo_bitmask = modulo - 1; // keep last log_2(modulo) bits + } else { + this.modulo_bitmask = -1; // -1 in twos complement is all ones -> includes all bits -> same as no modulo + } + this.stats = new KeyStoreStats(memSizeCapInBytes); + this.rbm = new RoaringBitmap(); + this.collidedIntCounters = new HashMap<>(); + this.removalSets = new HashMap<>(); + this.mostRecentByteEstimate = 0L; + } + + private int transform(int value) { + return value & modulo_bitmask; + } + + private void handleCollisions(int transformedValue) { + stats.numCollisions.inc(); + CounterMetric numCollisions = collidedIntCounters.get(transformedValue); + if (numCollisions == null) { // First time the transformedValue has had a collision + numCollisions = new CounterMetric(); + numCollisions.inc(2); // initialize to 2, since the first collision means 2 keys have collided + collidedIntCounters.put(transformedValue, numCollisions); + } else { + numCollisions.inc(); + } + } + + private boolean shouldUpdateByteEstimate() { + return getSize() % REFRESH_SIZE_EST_INTERVAL == 0; + } + + private boolean isAtCapacityLimit() { + return getMemorySizeCapInBytes() > 0 && mostRecentByteEstimate > getMemorySizeCapInBytes(); + } + + @Override + public boolean add(Integer value) { + if (value == null) { + return false; + } + stats.numAddAttempts.inc(); + + if (shouldUpdateByteEstimate()) { + mostRecentByteEstimate = computeMemorySizeInBytes(); + } + if (isAtCapacityLimit()) { + stats.atCapacity.set(true); + return false; + } + int transformedValue = transform(value); + + writeLock.lock(); + try { + if (!rbm.contains(transformedValue)) { + rbm.add(transformedValue); + stats.size.inc(); + return true; + } + // If the value is already pending removal, take it out of the removalList + HashSet removalSet = removalSets.get(transformedValue); + if (removalSet != null) { + removalSet.remove(value); + // Don't increment the counter - this is handled by handleCollisions() later + if (removalSet.isEmpty()) { + removalSets.remove(transformedValue); + } + } + + handleCollisions(transformedValue); + return false; + } finally { + writeLock.unlock(); + } + } + + @Override + public boolean contains(Integer value) { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + return rbm.contains(transformedValue); + } finally { + readLock.unlock(); + } + } + + public Integer getInternalRepresentation(Integer value) { + if (value == null) { + return 0; + } + return Integer.valueOf(transform(value)); + } + + /** + * Attempts to remove a value from the keystore. WARNING: Removing keys which have not been added to the keystore + * may cause undefined behavior, including future false negatives!! + * @param value The value to attempt to remove. + * @return true if the value was removed, false otherwise + */ + @Override + public boolean remove(Integer value) { + if (value == null) { + return false; + } + int transformedValue = transform(value); + readLock.lock(); + try { + if (!rbm.contains(transformedValue)) { // saves additional transform() call + return false; + } + stats.numRemovalAttempts.inc(); + } finally { + readLock.unlock(); + } + writeLock.lock(); + try { + CounterMetric numCollisions = collidedIntCounters.get(transformedValue); + if (numCollisions != null) { + // This transformed value has had a collision before + HashSet removalSet = removalSets.get(transformedValue); + if (removalSet == null) { + // First time a removal has been attempted for this transformed value + HashSet newRemovalSet = new HashSet<>(); + newRemovalSet.add(value); // Add the key value, not the transformed value, to the list of attempted removals for this + // transformedValue + removalSets.put(transformedValue, newRemovalSet); + numCollisions.dec(); + } else { + if (removalSet.contains(value)) { + return false; // We have already attempted to remove this value. Do nothing + } + removalSet.add(value); + numCollisions.dec(); + // If numCollisions has reached zero, we can safely remove all values in removalList + if (numCollisions.count() == 0) { + removeFromRBM(transformedValue); + collidedIntCounters.remove(transformedValue); + removalSets.remove(transformedValue); + return true; + } + } + return false; + } + // Otherwise, there's not been a collision for this transformedValue, so we can safely remove + removeFromRBM(transformedValue); + return true; + } finally { + writeLock.unlock(); + } + } + + // Helper fn for remove() + private void removeFromRBM(int transformedValue) { + if (!lock.isWriteLockedByCurrentThread()) { + throw new IllegalStateException("Write Lock must be held when calling this method"); + } + rbm.remove(transformedValue); + stats.size.dec(); + stats.numSuccessfulRemovals.inc(); + } + + @Override + public int getSize() { + readLock.lock(); + try { + return (int) stats.size.count(); + } finally { + readLock.unlock(); + } + } + + public int getAddAttempts() { + return (int) stats.numAddAttempts.count(); + } + + public int getCollisions() { + return (int) stats.numCollisions.count(); + } + + public boolean isCollision(Integer value1, Integer value2) { + if (value1 == null || value2 == null) { + return false; + } + return transform(value1) == transform(value2); + } + + /* + The built-in RBM size estimator is known to work very badly for randomly-distributed data, like the hashes we will be using. + See https://github.com/RoaringBitmap/RoaringBitmap/issues/257. + We ran tests to determine what multiplier you need to get true size from reported size, as a function of log10(# entries / modulo), + and found this piecewise linear function was a good approximation across different modulos. + */ + static double getRBMSizeMultiplier(int numEntries, int modulo) { + double effectiveModulo = (double) modulo / 2; + /* This model was created when we used % operator to calculate modulo. This has range (-modulo, modulo). + Now we have optimized to use a bitmask, which has range [0, modulo). So the number of possible values stored + is halved. */ + if (modulo == 0) { + effectiveModulo = Math.pow(2, 32); + } + double x = Math.log10((double) numEntries / effectiveModulo); + if (x < -5) { + return 7.0; + } + if (x < -2.75) { + return -2.5 * x - 5.5; + } + if (x <= 0) { + return -3.0 / 22.0 * x + 1; + } + return 1; + } + + /** + * Return the most recent memory size estimate, without updating it. + * @return the size estimate (bytes) + */ + @Override + public long getMemorySizeInBytes() { + return mostRecentByteEstimate; + } + + /** + * Calculate a new memory size estimate. This is somewhat expensive, so we don't call this every time we run get(). + * @return a new size estimate (bytes) + */ + private long computeMemorySizeInBytes() { + double multiplier = getRBMSizeMultiplier((int) stats.size.count(), modulo); + return (long) (rbm.getSizeInBytes() * multiplier); + } + + @Override + public long getMemorySizeCapInBytes() { + return stats.memSizeCapInBytes; + } + + @Override + public boolean isFull() { + return stats.atCapacity.get(); + } + + @Override + public void regenerateStore(Integer[] newValues) { + rbm.clear(); + collidedIntCounters = new HashMap<>(); + removalSets = new HashMap<>(); + stats.size = new CounterMetric(); + stats.numAddAttempts = new CounterMetric(); + stats.numCollisions = new CounterMetric(); + stats.numRemovalAttempts = new CounterMetric(); + stats.numSuccessfulRemovals = new CounterMetric(); + for (int i = 0; i < newValues.length; i++) { + if (newValues[i] != null) { + add(newValues[i]); + } + } + } + + @Override + public void clear() { + regenerateStore(new Integer[] {}); + } + + public int getNumRemovalAttempts() { + return (int) stats.numRemovalAttempts.count(); + } + + public int getNumSuccessfulRemovals() { + return (int) stats.numSuccessfulRemovals.count(); + } + + public boolean valueHasHadCollision(Integer value) { + if (value == null) { + return false; + } + return collidedIntCounters.containsKey(transform(value)); + } + + CounterMetric getNumCollisionsForValue(int value) { // package private for testing + return collidedIntCounters.get(transform(value)); + } + + HashSet getRemovalSetForValue(int value) { + return removalSets.get(transform(value)); + } + + /** + * Function to set a new memory size cap. + * TODO: Integrate this with the tiered caching cluster settings PR once this is raised. + * @param newMemSizeCap The new cap size. + */ + protected void setMemSizeCap(ByteSizeValue newMemSizeCap) { + stats.memSizeCapInBytes = newMemSizeCap.getBytes(); + mostRecentByteEstimate = getMemorySizeInBytes(); + if (mostRecentByteEstimate > getMemorySizeCapInBytes()) { + stats.atCapacity.set(true); + } + } +} diff --git a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/keystore/RBMIntKeyLookupStoreTests.java b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/keystore/RBMIntKeyLookupStoreTests.java new file mode 100644 index 0000000000000..f1a65cfafc3e7 --- /dev/null +++ b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/keystore/RBMIntKeyLookupStoreTests.java @@ -0,0 +1,409 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cache.keystore; + +import org.opensearch.common.Randomness; +import org.opensearch.common.metrics.CounterMetric; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; + +import org.roaringbitmap.RoaringBitmap; + +public class RBMIntKeyLookupStoreTests extends OpenSearchTestCase { + + final int BYTES_IN_MB = 1048576; + + public void testInit() { + long memCap = 100 * BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(memCap); + assertEquals(0, kls.getSize()); + assertEquals(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_EIGHT.getValue(), kls.modulo); + assertEquals(memCap, kls.getMemorySizeCapInBytes()); + } + + public void testTransformationLogic() throws Exception { + int modulo = (int) Math.pow(2, 29); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + int offset = 3; + for (int i = 0; i < 4; i++) { // after this we run into max value, but thats not a flaw with the class design + int posValue = i * modulo + offset; + kls.add(posValue); + assertEquals(offset, (int) kls.getInternalRepresentation(posValue)); + int negValue = -(i * modulo + offset); + kls.add(negValue); + assertEquals(modulo - offset, (int) kls.getInternalRepresentation(negValue)); + } + assertEquals(2, kls.getSize()); + int[] testVals = new int[] { 0, 1, -1, -23495, 23058, modulo, -modulo, Integer.MAX_VALUE, Integer.MIN_VALUE }; + for (int value : testVals) { + assertTrue(kls.getInternalRepresentation(value) < modulo); + assertTrue(kls.getInternalRepresentation(value) >= 0); + } + RBMIntKeyLookupStore no_modulo_kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L); + Random rand = Randomness.get(); + for (int i = 0; i < 100; i++) { + int val = rand.nextInt(); + assertEquals(val, (int) no_modulo_kls.getInternalRepresentation(val)); + } + } + + public void testContains() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + RBMIntKeyLookupStore noModuloKls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, 0L); + for (int i = 0; i < RBMIntKeyLookupStore.REFRESH_SIZE_EST_INTERVAL + 1000; i++) { + // set upper bound > number of elements to trigger a size check, ensuring we test that too + kls.add(i); + assertTrue(kls.contains(i)); + noModuloKls.add(i); + assertTrue(noModuloKls.contains(i)); + } + } + + public void testAddingStatsGetters() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + kls.add(15); + kls.add(-15); + assertEquals(2, kls.getAddAttempts()); + assertEquals(0, kls.getCollisions()); + + int offset = 1; + for (int i = 0; i < 10; i++) { + kls.add(i * moduloValue.getValue() + offset); + } + assertEquals(12, kls.getAddAttempts()); + assertEquals(9, kls.getCollisions()); + } + + public void testRegenerateStore() throws Exception { + int numToAdd = 10000000; + Random rand = Randomness.get(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + assertEquals(numToAdd, kls.getSize()); + Integer[] newVals = new Integer[1000]; // margin accounts for collisions + for (int j = 0; j < newVals.length; j++) { + newVals[j] = rand.nextInt(); + } + kls.regenerateStore(newVals); + assertTrue(Math.abs(kls.getSize() - newVals.length) < 3); // inexact due to collisions + + // test clear() + kls.clear(); + assertEquals(0, kls.getSize()); + } + + public void testAddingDuplicates() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0L); + int numToAdd = 4820411; + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + kls.add(i); + } + for (int j = 0; j < 1000; j++) { + kls.add(577); + } + assertEquals(numToAdd, kls.getSize()); + } + + public void testMemoryCapBlocksAdd() throws Exception { + // Now that we're using a modified version of rbm.getSizeInBytes(), which doesn't provide an inverse function, + // we have to test filling just an RBM with random test values first so that we can get the resulting memory cap limit + // to use with our modified size estimate. + // This is much noisier so the precision is lower. + + // It is necessary to use randomly distributed integers for both parts of this test, as we would do with hashes in the cache, + // as that's what our size estimator is designed for. + // If we add a run of integers, our size estimator is not valid, especially for small RBMs. + + int[] maxEntriesArr = new int[] { 1342000, 100000, 3000000 }; + long[] rbmReportedSizes = new long[4]; + Random rand = Randomness.get(); + for (int j = 0; j < maxEntriesArr.length; j++) { + RoaringBitmap rbm = new RoaringBitmap(); + for (int i = 0; i < maxEntriesArr[j]; i++) { + rbm.add(rand.nextInt()); + } + rbmReportedSizes[j] = rbm.getSizeInBytes(); + } + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE; + for (int i = 0; i < maxEntriesArr.length; i++) { + double multiplier = RBMIntKeyLookupStore.getRBMSizeMultiplier(maxEntriesArr[i], moduloValue.getValue()); + long memSizeCapInBytes = (long) (rbmReportedSizes[i] * multiplier); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, memSizeCapInBytes); + for (int j = 0; j < maxEntriesArr[i] + 5000; j++) { + kls.add(rand.nextInt()); + } + assertTrue(Math.abs(maxEntriesArr[i] - kls.getSize()) < (double) maxEntriesArr[i] / 10); + } + } + + public void testConcurrency() throws Exception { + Random rand = Randomness.get(); + for (int j = 0; j < 5; j++) { // test with different numbers of threads + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + int numThreads = rand.nextInt(50) + 1; + ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads); + // In this test we want to add the first 200K numbers and check they're all correctly there. + // We do some duplicates too to ensure those aren't incorrectly added. + int amountToAdd = 200000; + ArrayList> wasAdded = new ArrayList<>(amountToAdd); + ArrayList> duplicatesWasAdded = new ArrayList<>(); + for (int i = 0; i < amountToAdd; i++) { + wasAdded.add(null); + } + for (int i = 0; i < amountToAdd; i++) { + final int val = i; + Future fut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + wasAdded.set(val, fut); + if (val % 1000 == 0) { + // do a duplicate add + Future duplicateFut = executor.submit(() -> { + boolean didAdd; + try { + didAdd = kls.add(val); + } catch (Exception e) { + throw new RuntimeException(e); + } + return didAdd; + }); + duplicatesWasAdded.add(duplicateFut); + } + } + int originalAdds = 0; + int duplicateAdds = 0; + for (Future fut : wasAdded) { + if (fut.get()) { + originalAdds++; + } + } + for (Future duplicateFut : duplicatesWasAdded) { + if (duplicateFut.get()) { + duplicateAdds++; + } + } + for (int i = 0; i < amountToAdd; i++) { + assertTrue(kls.contains(i)); + } + assertEquals(amountToAdd, originalAdds + duplicateAdds); + assertEquals(amountToAdd, kls.getSize()); + assertEquals(amountToAdd / 1000, kls.getCollisions()); + executor.shutdown(); + } + } + + public void testRemoveNoCollisions() throws Exception { + long memCap = 100L * BYTES_IN_MB; + int numToAdd = 195000; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.NONE, memCap); + // there should be no collisions for sequential positive numbers up to modulo + for (int i = 0; i < numToAdd; i++) { + kls.add(i); + } + for (int i = 0; i < 1000; i++) { + assertTrue(kls.remove(i)); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + assertEquals(numToAdd - 1000, kls.getSize()); + } + + public void testRemoveWithCollisions() throws Exception { + int modulo = (int) Math.pow(2, 26); + long memCap = 100L * BYTES_IN_MB; + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX, memCap); + for (int i = 0; i < 10; i++) { + kls.add(i); + if (i % 2 == 1) { + kls.add(-i); + assertFalse(kls.valueHasHadCollision(i)); + kls.add(i + modulo); + assertTrue(kls.valueHasHadCollision(i)); + } else { + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(15, kls.getSize()); + for (int i = 0; i < 10; i++) { + boolean didRemove = kls.remove(i); + if (i % 2 == 1) { + // we expect a collision with i + modulo, so we can't remove + assertFalse(didRemove); + assertTrue(kls.contains(i)); + // but we should be able to remove -i + boolean didRemoveNegative = kls.remove(-i); + assertTrue(didRemoveNegative); + assertFalse(kls.contains(-i)); + } else { + // we expect no collision + assertTrue(didRemove); + assertFalse(kls.contains(i)); + assertFalse(kls.valueHasHadCollision(i)); + } + } + assertEquals(5, kls.getSize()); + int offset = 12; + kls.add(offset); + for (int j = 1; j < 5; j++) { + kls.add(offset + j * modulo); + } + assertEquals(6, kls.getSize()); + assertFalse(kls.remove(offset + modulo)); + assertTrue(kls.valueHasHadCollision(offset + 15 * modulo)); + assertTrue(kls.contains(offset + 17 * modulo)); + } + + public void testNullInputs() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_NINE, 0L); + assertFalse(kls.add(null)); + assertFalse(kls.contains(null)); + assertEquals(0, (int) kls.getInternalRepresentation(null)); + assertFalse(kls.remove(null)); + assertFalse(kls.isCollision(null, null)); + assertEquals(0, kls.getAddAttempts()); + Integer[] newVals = new Integer[] { 1, 17, -2, null, -4, null }; + kls.regenerateStore(newVals); + assertEquals(4, kls.getSize()); + } + + public void testRemovalLogic() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + int modulo = moduloValue.getValue(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + + // Test standard sequence: add K1, K2, K3 which all transform to C, then: + // Remove K3 + // Remove K2, re-add it, re-remove it twice (duplicate should do nothing) + // Remove K1, which should finally actually remove everything + int c = -42; + int k1 = c + modulo; + int k2 = c + 2 * modulo; + int k3 = c + 3 * modulo; + kls.add(k1); + assertTrue(kls.contains(k1)); + assertTrue(kls.contains(k3)); + kls.add(k2); + CounterMetric numCollisions = kls.getNumCollisionsForValue(k2); + assertNotNull(numCollisions); + assertEquals(2, numCollisions.count()); + kls.add(k3); + assertEquals(3, numCollisions.count()); + assertEquals(1, kls.getSize()); + + boolean removed = kls.remove(k3); + assertFalse(removed); + HashSet removalSet = kls.getRemovalSetForValue(k3); + assertEquals(1, removalSet.size()); + assertTrue(removalSet.contains(k3)); + assertEquals(2, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + kls.add(k2); + assertEquals(1, removalSet.size()); + assertFalse(removalSet.contains(k2)); + assertEquals(2, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k2); + assertFalse(removed); + assertEquals(2, removalSet.size()); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + assertEquals(1, kls.getSize()); + + removed = kls.remove(k1); + assertTrue(removed); + assertNull(kls.getRemovalSetForValue(k1)); + assertNull(kls.getNumCollisionsForValue(k1)); + assertEquals(0, kls.getSize()); + } + + public void testRemovalLogicWithHashCollision() throws Exception { + RBMIntKeyLookupStore.KeystoreModuloValue moduloValue = RBMIntKeyLookupStore.KeystoreModuloValue.TWO_TO_TWENTY_SIX; + int modulo = moduloValue.getValue(); + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(moduloValue, 0L); + + // Test adding K1 twice (maybe two keys hash to K1), then removing it twice. + // We expect it to be unable to remove the last one, but there should be no false negatives. + int c = 77; + int k1 = c + modulo; + int k2 = c + 2 * modulo; + kls.add(k1); + kls.add(k2); + CounterMetric numCollisions = kls.getNumCollisionsForValue(k1); + assertEquals(2, numCollisions.count()); + kls.add(k1); + assertEquals(3, numCollisions.count()); + + boolean removed = kls.remove(k1); + assertFalse(removed); + HashSet removalSet = kls.getRemovalSetForValue(k1); + assertTrue(removalSet.contains(k1)); + assertEquals(2, numCollisions.count()); + + removed = kls.remove(k2); + assertFalse(removed); + assertTrue(removalSet.contains(k2)); + assertEquals(1, numCollisions.count()); + + removed = kls.remove(k1); + assertFalse(removed); + assertTrue(removalSet.contains(k1)); + assertEquals(1, numCollisions.count()); + assertTrue(kls.contains(k1)); + assertTrue(kls.contains(k2)); + } + + public void testSetMemSizeCap() throws Exception { + RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(0L); // no memory cap + Random rand = Randomness.get(); + for (int i = 0; i < RBMIntKeyLookupStore.REFRESH_SIZE_EST_INTERVAL * 3; i++) { + kls.add(rand.nextInt()); + } + long memSize = kls.getMemorySizeInBytes(); + assertEquals(0, kls.getMemorySizeCapInBytes()); + kls.setMemSizeCap(new ByteSizeValue(memSize / 2, ByteSizeUnit.BYTES)); + // check the keystore is now full and has its lower cap + assertTrue(kls.isFull()); + assertEquals(memSize / 2, kls.getMemorySizeCapInBytes()); + assertFalse(kls.add(rand.nextInt())); + } +} From 775bd9bfdbaab43a07a0811acf3b6ee11ed58c3a Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 11:22:25 -0800 Subject: [PATCH 04/10] Integrated with ehcache Signed-off-by: Peter Alfonsi --- .../cache/EhcacheDiskCacheSettings.java | 57 ++++++++++++------- .../cache/store/disk/EhcacheDiskCache.java | 31 ++++++++-- 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java index 837fd6b268ce6..d1d3aaad79b49 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java @@ -12,10 +12,12 @@ import org.opensearch.common.cache.CacheType; import org.opensearch.common.settings.Setting; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeValue; import java.util.HashMap; import java.util.Map; +import static org.opensearch.common.settings.Setting.Property.Dynamic; import static org.opensearch.common.settings.Setting.Property.NodeScope; /** @@ -110,6 +112,22 @@ public class EhcacheDiskCacheSettings { (key) -> Setting.boolSetting(key, false, NodeScope) ); + /** + * Defines whether to use an in-memory keystore to check for probable presence of keys before having to go to disk. + */ + public static final Setting.AffixSetting USE_RBM_KEYSTORE_SETTING = Setting.suffixKeySetting( + EhcacheDiskCache.EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME + "use_keystore", + (key) -> Setting.boolSetting(key, true, NodeScope) + ); + + /** + * Defines the max size of the RBM keystore if used (as a percentage of heap memory) + */ + public static final Setting.AffixSetting RBM_KEYSTORE_SIZE_SETTING = Setting.suffixKeySetting( + EhcacheDiskCache.EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME + "keystore_size", + (key) -> Setting.memorySizeSetting(key, "0.05%", NodeScope) + ); + /** * Key for disk segment. */ @@ -150,29 +168,30 @@ public class EhcacheDiskCacheSettings { * Key for listener mode */ public static final String DISK_LISTENER_MODE_SYNC_KEY = "disk_listener_mode"; + /** + * Key for whether to use RBM keystore + */ + public static final String USE_RBM_KEYSTORE_KEY = "use_keystore"; + /** + * Key for the keystore size in bytes + */ + public static final String RBM_KEYSTORE_SIZE_KEY = "keystore_size"; /** * Map of key to setting. */ - private static final Map> KEY_SETTING_MAP = Map.of( - DISK_SEGMENT_KEY, - DISK_SEGMENTS_SETTING, - DISK_CACHE_EXPIRE_AFTER_ACCESS_KEY, - DISK_CACHE_EXPIRE_AFTER_ACCESS_SETTING, - DISK_CACHE_ALIAS_KEY, - DISK_CACHE_ALIAS_SETTING, - DISK_WRITE_CONCURRENCY_KEY, - DISK_WRITE_CONCURRENCY_SETTING, - DISK_WRITE_MAXIMUM_THREADS_KEY, - DISK_WRITE_MAXIMUM_THREADS_SETTING, - DISK_WRITE_MIN_THREADS_KEY, - DISK_WRITE_MINIMUM_THREADS_SETTING, - DISK_STORAGE_PATH_KEY, - DISK_STORAGE_PATH_SETTING, - DISK_MAX_SIZE_IN_BYTES_KEY, - DISK_CACHE_MAX_SIZE_IN_BYTES_SETTING, - DISK_LISTENER_MODE_SYNC_KEY, - DISK_CACHE_LISTENER_MODE_SYNC_SETTING + private static final Map> KEY_SETTING_MAP = Map.ofEntries( + Map.entry(DISK_SEGMENT_KEY, DISK_SEGMENTS_SETTING), + Map.entry(DISK_CACHE_EXPIRE_AFTER_ACCESS_KEY, DISK_CACHE_EXPIRE_AFTER_ACCESS_SETTING), + Map.entry(DISK_CACHE_ALIAS_KEY, DISK_CACHE_ALIAS_SETTING), + Map.entry(DISK_WRITE_CONCURRENCY_KEY, DISK_WRITE_CONCURRENCY_SETTING), + Map.entry(DISK_WRITE_MAXIMUM_THREADS_KEY, DISK_WRITE_MAXIMUM_THREADS_SETTING), + Map.entry(DISK_WRITE_MIN_THREADS_KEY, DISK_WRITE_MINIMUM_THREADS_SETTING), + Map.entry(DISK_STORAGE_PATH_KEY, DISK_STORAGE_PATH_SETTING), + Map.entry(DISK_MAX_SIZE_IN_BYTES_KEY, DISK_CACHE_MAX_SIZE_IN_BYTES_SETTING), + Map.entry(DISK_LISTENER_MODE_SYNC_KEY, DISK_CACHE_LISTENER_MODE_SYNC_SETTING), + Map.entry(USE_RBM_KEYSTORE_KEY, USE_RBM_KEYSTORE_SETTING), + Map.entry(RBM_KEYSTORE_SIZE_KEY, RBM_KEYSTORE_SIZE_SETTING) ); /** diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java index ddfd5b838e927..134899546c9f3 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchException; import org.opensearch.cache.EhcacheDiskCacheSettings; +import org.opensearch.cache.keystore.RBMIntKeyLookupStore; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.cache.CacheType; @@ -20,6 +21,7 @@ import org.opensearch.common.cache.RemovalListener; import org.opensearch.common.cache.RemovalNotification; import org.opensearch.common.cache.RemovalReason; +import org.opensearch.common.cache.keystore.KeyLookupStore; import org.opensearch.common.cache.store.builders.ICacheBuilder; import org.opensearch.common.cache.store.config.CacheConfig; import org.opensearch.common.collect.Tuple; @@ -57,6 +59,7 @@ import org.ehcache.impl.config.store.disk.OffHeapDiskStoreConfiguration; import org.ehcache.spi.loaderwriter.CacheLoadingException; import org.ehcache.spi.loaderwriter.CacheWritingException; +import org.opensearch.core.common.unit.ByteSizeValue; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_CACHE_ALIAS_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_CACHE_EXPIRE_AFTER_ACCESS_KEY; @@ -67,6 +70,8 @@ import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_WRITE_CONCURRENCY_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_WRITE_MAXIMUM_THREADS_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_WRITE_MIN_THREADS_KEY; +import static org.opensearch.cache.EhcacheDiskCacheSettings.RBM_KEYSTORE_SIZE_KEY; +import static org.opensearch.cache.EhcacheDiskCacheSettings.USE_RBM_KEYSTORE_KEY; /** * This variant of disk cache uses Ehcache underneath. @@ -111,6 +116,8 @@ public class EhcacheDiskCache implements ICache { */ Map>> completableFutureMap = new ConcurrentHashMap<>(); + KeyLookupStore keystore = null; + private EhcacheDiskCache(Builder builder) { this.keyType = Objects.requireNonNull(builder.keyType, "Key type shouldn't be null"); this.valueType = Objects.requireNonNull(builder.valueType, "Value type shouldn't be null"); @@ -140,6 +147,15 @@ private EhcacheDiskCache(Builder builder) { this.removalListener = builder.getRemovalListener(); this.ehCacheEventListener = new EhCacheEventListener(builder.getRemovalListener()); this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder); + boolean useRBMKeystore = (Boolean) EhcacheDiskCacheSettings.getSettingListForCacheType(cacheType) + .get(USE_RBM_KEYSTORE_KEY) + .get(settings); + if (useRBMKeystore) { + long keystoreSize = ((ByteSizeValue) EhcacheDiskCacheSettings.getSettingListForCacheType(cacheType) + .get(RBM_KEYSTORE_SIZE_KEY) + .get(settings)).getBytes(); + this.keystore = new RBMIntKeyLookupStore(keystoreSize); + } } private Cache buildCache(Duration expireAfterAccess, Builder builder) { @@ -236,11 +252,13 @@ public V get(K key) { if (key == null) { throw new IllegalArgumentException("Key passed to ehcache disk cache was null."); } - V value; - try { - value = cache.get(key); - } catch (CacheLoadingException ex) { - throw new OpenSearchException("Exception occurred while trying to fetch item from ehcache disk cache"); + V value = null; + if (keystore == null || keystore.contains(key.hashCode()) || keystore.isFull()) { + try { + value = cache.get(key); + } catch (CacheLoadingException ex) { + throw new OpenSearchException("Exception occurred while trying to fetch item from ehcache disk cache"); + } } return value; } @@ -254,6 +272,9 @@ public V get(K key) { public void put(K key, V value) { try { cache.put(key, value); + if (keystore != null) { + keystore.add(key.hashCode()); + } } catch (CacheWritingException ex) { throw new OpenSearchException("Exception occurred while put item to ehcache disk cache"); } From 2e01c312e871a046c66c427dc9fa2aa6c3dcde89 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 11:24:33 -0800 Subject: [PATCH 05/10] spotlessApply Signed-off-by: Peter Alfonsi --- .../java/org/opensearch/cache/EhcacheDiskCacheSettings.java | 1 - .../java/org/opensearch/cache/store/disk/EhcacheDiskCache.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java index d1d3aaad79b49..bdcc03d2e4ab8 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java @@ -17,7 +17,6 @@ import java.util.HashMap; import java.util.Map; -import static org.opensearch.common.settings.Setting.Property.Dynamic; import static org.opensearch.common.settings.Setting.Property.NodeScope; /** diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java index 134899546c9f3..a71ebb397a1d2 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java @@ -29,6 +29,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeValue; import java.io.File; import java.time.Duration; @@ -59,7 +60,6 @@ import org.ehcache.impl.config.store.disk.OffHeapDiskStoreConfiguration; import org.ehcache.spi.loaderwriter.CacheLoadingException; import org.ehcache.spi.loaderwriter.CacheWritingException; -import org.opensearch.core.common.unit.ByteSizeValue; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_CACHE_ALIAS_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_CACHE_EXPIRE_AFTER_ACCESS_KEY; From b4c8d87d57877d7f0f95ba5184d5eae9513e3a6d Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 14:00:10 -0800 Subject: [PATCH 06/10] Fixed missing javadoc Signed-off-by: Peter Alfonsi --- .../cache/keystore/KeyStoreStats.java | 15 +++++---- .../cache/keystore/RBMIntKeyLookupStore.java | 32 +++++++++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java index 37fdf47163779..15002ebd5a018 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java @@ -17,21 +17,22 @@ * Getters should be exposed by the KeyLookupStore which uses it. */ public class KeyStoreStats { - // Number of entries + /** Number of entries */ protected CounterMetric size; - // Memory cap in bytes + /** Memory cap in bytes */ protected long memSizeCapInBytes; - // Number of add attempts + /** Number of add attempts */ protected CounterMetric numAddAttempts; - // Number of collisions + /** Number of collisions */ protected CounterMetric numCollisions; - // True if the store is at capacity + /** True if the store is at capacity */ protected AtomicBoolean atCapacity; - // Number of removal attempts + /** Number of removal attempts */ protected CounterMetric numRemovalAttempts; - // Number of successful removal attempts + /** Number of successful removal attempts */ protected CounterMetric numSuccessfulRemovals; + /** Constructor using memory size cap. */ protected KeyStoreStats(long memSizeCapInBytes) { this.size = new CounterMetric(); this.numAddAttempts = new CounterMetric(); diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java index fe7f64712eda6..ac3abb36e94ba 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java @@ -32,24 +32,30 @@ public class RBMIntKeyLookupStore implements KeyLookupStore { * An enum representing modulo values for use in the keystore */ public enum KeystoreModuloValue { - NONE(0), // No modulo applied + /** No modulo applied */ + NONE(0), + /** 2^31 */ TWO_TO_THIRTY_ONE((int) Math.pow(2, 31)), - TWO_TO_TWENTY_NINE((int) Math.pow(2, 29)), // recommended value + /** 2^29 */ + TWO_TO_TWENTY_NINE((int) Math.pow(2, 29)), + /** 2^28, recommended value */ TWO_TO_TWENTY_EIGHT((int) Math.pow(2, 28)), + /** 2^26 */ TWO_TO_TWENTY_SIX((int) Math.pow(2, 26)); private final int value; - private KeystoreModuloValue(int value) { + KeystoreModuloValue(int value) { this.value = value; } + /** Get the numerical value. */ public int getValue() { return this.value; } } - // The modulo applied to values before adding into the RBM + /** The modulo applied to values before adding into the RBM */ protected final int modulo; private final int modulo_bitmask; // Since our modulo is always a power of two we can optimize it by ANDing with a particular bitmask @@ -67,13 +73,15 @@ public int getValue() { // in terms of storage impact static final int REFRESH_SIZE_EST_INTERVAL = 10_000; - // Use this constructor to specify memory cap with default modulo = 2^28, which we found in experiments - // to be the best tradeoff between lower memory usage and risk of collisions + /** + Use this constructor to specify memory cap with default modulo = 2^28, which we found in experiments + to be the best tradeoff between lower memory usage and risk of collisions + */ public RBMIntKeyLookupStore(long memSizeCapInBytes) { this(KeystoreModuloValue.TWO_TO_TWENTY_EIGHT, memSizeCapInBytes); } - // Use this constructor to specify memory cap and modulo + /** Use this constructor to specify memory cap and modulo */ public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { this.modulo = moduloValue.getValue(); if (modulo > 0) { @@ -166,7 +174,7 @@ public boolean contains(Integer value) { } } - public Integer getInternalRepresentation(Integer value) { + Integer getInternalRepresentation(Integer value) { if (value == null) { return 0; } @@ -251,14 +259,17 @@ public int getSize() { } } + /** Get the number of add attempts */ public int getAddAttempts() { return (int) stats.numAddAttempts.count(); } + /** Get the number of collisions that have happened */ public int getCollisions() { return (int) stats.numCollisions.count(); } + /** Returns true if the two values would collide */ public boolean isCollision(Integer value1, Integer value2) { if (value1 == null || value2 == null) { return false; @@ -343,15 +354,17 @@ public void clear() { regenerateStore(new Integer[] {}); } + /** Get the number of removal attempts so far */ public int getNumRemovalAttempts() { return (int) stats.numRemovalAttempts.count(); } + /** Get the number of successful removal attempts os far */ public int getNumSuccessfulRemovals() { return (int) stats.numSuccessfulRemovals.count(); } - public boolean valueHasHadCollision(Integer value) { + boolean valueHasHadCollision(Integer value) { if (value == null) { return false; } @@ -368,7 +381,6 @@ HashSet getRemovalSetForValue(int value) { /** * Function to set a new memory size cap. - * TODO: Integrate this with the tiered caching cluster settings PR once this is raised. * @param newMemSizeCap The new cap size. */ protected void setMemSizeCap(ByteSizeValue newMemSizeCap) { From 928da04ecd6565716d79fd5cfd341ee9e843cd20 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 15:21:33 -0800 Subject: [PATCH 07/10] more javadocs Signed-off-by: Peter Alfonsi --- .../cache/keystore/KeyStoreStats.java | 5 ++++- .../cache/keystore/RBMIntKeyLookupStore.java | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java index 15002ebd5a018..5ed13d857e8a3 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/KeyStoreStats.java @@ -32,7 +32,10 @@ public class KeyStoreStats { /** Number of successful removal attempts */ protected CounterMetric numSuccessfulRemovals; - /** Constructor using memory size cap. */ + /** + * Constructor using memory cap. + * @param memSizeCapInBytes The memory cap in bytes. + */ protected KeyStoreStats(long memSizeCapInBytes) { this.size = new CounterMetric(); this.numAddAttempts = new CounterMetric(); diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java index ac3abb36e94ba..bf3b7e83fdc17 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java @@ -74,14 +74,19 @@ public int getValue() { static final int REFRESH_SIZE_EST_INTERVAL = 10_000; /** - Use this constructor to specify memory cap with default modulo = 2^28, which we found in experiments - to be the best tradeoff between lower memory usage and risk of collisions + * Use this constructor to specify memory cap with default modulo = 2^28, which we found in experiments + * to be the best tradeoff between lower memory usage and risk of collisions + * @param memSizeCapInBytes The memory size cap in bytes. */ public RBMIntKeyLookupStore(long memSizeCapInBytes) { this(KeystoreModuloValue.TWO_TO_TWENTY_EIGHT, memSizeCapInBytes); } - /** Use this constructor to specify memory cap and modulo */ + /** + * Use this constructor to specify memory cap and modulo + * @param moduloValue The modulo value. + * @param memSizeCapInBytes The memory size cap in bytes. + */ public RBMIntKeyLookupStore(KeystoreModuloValue moduloValue, long memSizeCapInBytes) { this.modulo = moduloValue.getValue(); if (modulo > 0) { @@ -269,7 +274,12 @@ public int getCollisions() { return (int) stats.numCollisions.count(); } - /** Returns true if the two values would collide */ + /** + * Returns true if the two values would collide + * @param value1 value 1 + * @param value2 value 2 + * @return whether there is a collision + */ public boolean isCollision(Integer value1, Integer value2) { if (value1 == null || value2 == null) { return false; From 5b26d35fa61e6ea3e75ec8d6ac72feed5423ad6a Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 15:45:30 -0800 Subject: [PATCH 08/10] Even more javadocs Signed-off-by: Peter Alfonsi --- .../java/org/opensearch/cache/keystore/package-info.java | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java new file mode 100644 index 0000000000000..f9d1b43d08481 --- /dev/null +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java @@ -0,0 +1,9 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +/** Package for keystores. */ +package org.opensearch.cache.keystore; From af35c6df4b473ff7ea88fd0f04d568d5208aba10 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 4 Mar 2024 15:55:40 -0800 Subject: [PATCH 09/10] another spotlessapply Signed-off-by: Peter Alfonsi --- .../main/java/org/opensearch/cache/keystore/package-info.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java index f9d1b43d08481..5829d9afaba06 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/package-info.java @@ -5,5 +5,5 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -/** Package for keystores. */ +/** Package for keystores. */ package org.opensearch.cache.keystore; From 1f7f460c4ddef381e653f2c77612a2a7816a05af Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Fri, 8 Mar 2024 14:12:38 -0800 Subject: [PATCH 10/10] Addressed Sagar's comment Signed-off-by: Peter Alfonsi --- .../cache/EhcacheDiskCacheSettings.java | 19 ++-- .../cache/keystore/DummyKeystore.java | 57 +++++++++++ .../cache/keystore/RBMIntKeyLookupStore.java | 3 + .../cache/store/disk/EhcacheDiskCache.java | 29 +++--- .../store/disk/EhCacheDiskCacheTests.java | 94 +++++++++++++++++++ 5 files changed, 182 insertions(+), 20 deletions(-) create mode 100644 plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/DummyKeystore.java diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java index bdcc03d2e4ab8..5eef831047e43 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/EhcacheDiskCacheSettings.java @@ -8,6 +8,7 @@ package org.opensearch.cache; +import org.opensearch.cache.keystore.RBMIntKeyLookupStore; import org.opensearch.cache.store.disk.EhcacheDiskCache; import org.opensearch.common.cache.CacheType; import org.opensearch.common.settings.Setting; @@ -114,16 +115,16 @@ public class EhcacheDiskCacheSettings { /** * Defines whether to use an in-memory keystore to check for probable presence of keys before having to go to disk. */ - public static final Setting.AffixSetting USE_RBM_KEYSTORE_SETTING = Setting.suffixKeySetting( - EhcacheDiskCache.EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME + "use_keystore", - (key) -> Setting.boolSetting(key, true, NodeScope) + public static final Setting.AffixSetting USE_KEYSTORE_SETTING = Setting.suffixKeySetting( + EhcacheDiskCache.EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME + ".keystore", + (key) -> Setting.simpleString(key, RBMIntKeyLookupStore.KEYSTORE_NAME, NodeScope) ); /** * Defines the max size of the RBM keystore if used (as a percentage of heap memory) */ - public static final Setting.AffixSetting RBM_KEYSTORE_SIZE_SETTING = Setting.suffixKeySetting( - EhcacheDiskCache.EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME + "keystore_size", + public static final Setting.AffixSetting KEYSTORE_SIZE_SETTING = Setting.suffixKeySetting( + EhcacheDiskCache.EhcacheDiskCacheFactory.EHCACHE_DISK_CACHE_NAME + ".keystore_size", (key) -> Setting.memorySizeSetting(key, "0.05%", NodeScope) ); @@ -170,11 +171,11 @@ public class EhcacheDiskCacheSettings { /** * Key for whether to use RBM keystore */ - public static final String USE_RBM_KEYSTORE_KEY = "use_keystore"; + public static final String USE_KEYSTORE_KEY = "use_keystore"; /** * Key for the keystore size in bytes */ - public static final String RBM_KEYSTORE_SIZE_KEY = "keystore_size"; + public static final String KEYSTORE_SIZE_KEY = "keystore_size"; /** * Map of key to setting. @@ -189,8 +190,8 @@ public class EhcacheDiskCacheSettings { Map.entry(DISK_STORAGE_PATH_KEY, DISK_STORAGE_PATH_SETTING), Map.entry(DISK_MAX_SIZE_IN_BYTES_KEY, DISK_CACHE_MAX_SIZE_IN_BYTES_SETTING), Map.entry(DISK_LISTENER_MODE_SYNC_KEY, DISK_CACHE_LISTENER_MODE_SYNC_SETTING), - Map.entry(USE_RBM_KEYSTORE_KEY, USE_RBM_KEYSTORE_SETTING), - Map.entry(RBM_KEYSTORE_SIZE_KEY, RBM_KEYSTORE_SIZE_SETTING) + Map.entry(USE_KEYSTORE_KEY, USE_KEYSTORE_SETTING), + Map.entry(KEYSTORE_SIZE_KEY, KEYSTORE_SIZE_SETTING) ); /** diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/DummyKeystore.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/DummyKeystore.java new file mode 100644 index 0000000000000..cb679a009312b --- /dev/null +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/DummyKeystore.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cache.keystore; + +import org.opensearch.common.cache.keystore.KeyLookupStore; + +/** + * A dummy keystore, which will always report that a key is contained in it. + */ +public class DummyKeystore implements KeyLookupStore { + @Override + public boolean add(Integer value) { + return true; + } + + @Override + public boolean contains(Integer value) { + return true; + } + + @Override + public boolean remove(Integer value) { + return true; + } + + @Override + public int getSize() { + return 0; + } + + @Override + public long getMemorySizeInBytes() { + return 0; + } + + @Override + public long getMemorySizeCapInBytes() { + return 0; + } + + @Override + public boolean isFull() { + return false; + } + + @Override + public void regenerateStore(Integer[] newValues) {} + + @Override + public void clear() {} +} diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java index bf3b7e83fdc17..617390fb3921f 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/keystore/RBMIntKeyLookupStore.java @@ -28,6 +28,9 @@ * The store estimates its memory footprint and will stop adding more values once it reaches its memory cap. */ public class RBMIntKeyLookupStore implements KeyLookupStore { + /** Used in settings to distinguish between keystore types. */ + public static final String KEYSTORE_NAME = "rbm"; + /** * An enum representing modulo values for use in the keystore */ diff --git a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java index a71ebb397a1d2..da089a5ddd87b 100644 --- a/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java +++ b/plugins/cache-ehcache/src/main/java/org/opensearch/cache/store/disk/EhcacheDiskCache.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchException; import org.opensearch.cache.EhcacheDiskCacheSettings; +import org.opensearch.cache.keystore.DummyKeystore; import org.opensearch.cache.keystore.RBMIntKeyLookupStore; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.annotation.ExperimentalApi; @@ -70,8 +71,8 @@ import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_WRITE_CONCURRENCY_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_WRITE_MAXIMUM_THREADS_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_WRITE_MIN_THREADS_KEY; -import static org.opensearch.cache.EhcacheDiskCacheSettings.RBM_KEYSTORE_SIZE_KEY; -import static org.opensearch.cache.EhcacheDiskCacheSettings.USE_RBM_KEYSTORE_KEY; +import static org.opensearch.cache.EhcacheDiskCacheSettings.KEYSTORE_SIZE_KEY; +import static org.opensearch.cache.EhcacheDiskCacheSettings.USE_KEYSTORE_KEY; /** * This variant of disk cache uses Ehcache underneath. @@ -147,14 +148,14 @@ private EhcacheDiskCache(Builder builder) { this.removalListener = builder.getRemovalListener(); this.ehCacheEventListener = new EhCacheEventListener(builder.getRemovalListener()); this.cache = buildCache(Duration.ofMillis(expireAfterAccess.getMillis()), builder); - boolean useRBMKeystore = (Boolean) EhcacheDiskCacheSettings.getSettingListForCacheType(cacheType) - .get(USE_RBM_KEYSTORE_KEY) - .get(settings); - if (useRBMKeystore) { + String keystoreType = (String) EhcacheDiskCacheSettings.getSettingListForCacheType(cacheType).get(USE_KEYSTORE_KEY).get(settings); + if (keystoreType.equals(RBMIntKeyLookupStore.KEYSTORE_NAME)) { long keystoreSize = ((ByteSizeValue) EhcacheDiskCacheSettings.getSettingListForCacheType(cacheType) - .get(RBM_KEYSTORE_SIZE_KEY) + .get(KEYSTORE_SIZE_KEY) .get(settings)).getBytes(); this.keystore = new RBMIntKeyLookupStore(keystoreSize); + } else { + this.keystore = new DummyKeystore(); } } @@ -253,7 +254,7 @@ public V get(K key) { throw new IllegalArgumentException("Key passed to ehcache disk cache was null."); } V value = null; - if (keystore == null || keystore.contains(key.hashCode()) || keystore.isFull()) { + if (keystore.contains(key.hashCode()) || keystore.isFull()) { try { value = cache.get(key); } catch (CacheLoadingException ex) { @@ -272,9 +273,7 @@ public V get(K key) { public void put(K key, V value) { try { cache.put(key, value); - if (keystore != null) { - keystore.add(key.hashCode()); - } + keystore.add(key.hashCode()); } catch (CacheWritingException ex) { throw new OpenSearchException("Exception occurred while put item to ehcache disk cache"); } @@ -404,6 +403,14 @@ public void close() { } } + /** + * Returns keystore, for testing. + * @return keystore + */ + KeyLookupStore getKeystore() { + return keystore; + } + /** * This iterator wraps ehCache iterator and only iterates over its keys. * @param Type of key diff --git a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java index d5f5fbb9293bc..bd151e1ff9a93 100644 --- a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java +++ b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java @@ -9,6 +9,8 @@ package org.opensearch.cache.store.disk; import org.opensearch.cache.EhcacheDiskCacheSettings; +import org.opensearch.cache.keystore.DummyKeystore; +import org.opensearch.cache.keystore.RBMIntKeyLookupStore; import org.opensearch.common.cache.CacheType; import org.opensearch.common.cache.ICache; import org.opensearch.common.cache.LoadAwareCacheLoader; @@ -35,6 +37,7 @@ import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_LISTENER_MODE_SYNC_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_MAX_SIZE_IN_BYTES_KEY; import static org.opensearch.cache.EhcacheDiskCacheSettings.DISK_STORAGE_PATH_KEY; +import static org.opensearch.cache.EhcacheDiskCacheSettings.USE_KEYSTORE_KEY; import static org.hamcrest.CoreMatchers.instanceOf; public class EhCacheDiskCacheTests extends OpenSearchSingleNodeTestCase { @@ -481,6 +484,97 @@ public String load(String key) throws Exception { } } + public void testKeystoreSettings() throws Exception { + Settings useRBMsettings = Settings.builder() + .put( + EhcacheDiskCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE).get(USE_KEYSTORE_KEY).getKey(), + RBMIntKeyLookupStore.KEYSTORE_NAME + ) + .build(); + + MockRemovalListener removalListener = new MockRemovalListener<>(); + try (NodeEnvironment env = newNodeEnvironment(Settings.EMPTY)) { + EhcacheDiskCache ehcacheTest = (EhcacheDiskCache) new EhcacheDiskCache.Builder() + .setDiskCacheAlias("test1") + .setThreadPoolAlias("ehcacheTest") + .setIsEventListenerModeSync(true) + .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache") + .setKeyType(String.class) + .setValueType(String.class) + .setCacheType(CacheType.INDICES_REQUEST_CACHE) + .setSettings(useRBMsettings) + .setExpireAfterAccess(TimeValue.MAX_VALUE) + .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES) + .setRemovalListener(removalListener) + .build(); + assertEquals(RBMIntKeyLookupStore.class, ehcacheTest.getKeystore().getClass()); + ehcacheTest.close(); + } + + Settings useDummySettings = Settings.builder() + .put( + EhcacheDiskCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE).get(USE_KEYSTORE_KEY).getKey(), + "unrecognized_name" + ) + .build(); + + try (NodeEnvironment env = newNodeEnvironment(Settings.EMPTY)) { + EhcacheDiskCache ehcacheTest = (EhcacheDiskCache) new EhcacheDiskCache.Builder() + .setDiskCacheAlias("test1") + .setThreadPoolAlias("ehcacheTest") + .setIsEventListenerModeSync(true) + .setStoragePath(env.nodePaths()[0].indicesPath.toString() + "/request_cache") + .setKeyType(String.class) + .setValueType(String.class) + .setCacheType(CacheType.INDICES_REQUEST_CACHE) + .setSettings(useDummySettings) + .setExpireAfterAccess(TimeValue.MAX_VALUE) + .setMaximumWeightInBytes(CACHE_SIZE_IN_BYTES) + .setRemovalListener(removalListener) + .build(); + assertEquals(DummyKeystore.class, ehcacheTest.getKeystore().getClass()); + + // Also test the dummy keystore doesn't incorrectly block any gets + int numKeys = 50; + Map keyValueMap = new HashMap<>(); + for (int i = 0; i < numKeys; i++) { + String key = generateRandomString(50); + String value = generateRandomString(50); + keyValueMap.put(key, value); + ehcacheTest.put(key, value); + } + for (String key : keyValueMap.keySet()) { + assertEquals(keyValueMap.get(key), ehcacheTest.get(key)); + } + ehcacheTest.close(); + } + + // Check the factory correctly gives RBM keystore + try (NodeEnvironment env = newNodeEnvironment(Settings.EMPTY)) { + ICache.Factory ehcacheFactory = new EhcacheDiskCache.EhcacheDiskCacheFactory(); + EhcacheDiskCache ehcacheTest = (EhcacheDiskCache) ehcacheFactory.create( + new CacheConfig.Builder().setValueType(String.class) + .setKeyType(String.class) + .setRemovalListener(removalListener) + .setSettings( + Settings.builder() + .put( + EhcacheDiskCacheSettings.getSettingListForCacheType(CacheType.INDICES_REQUEST_CACHE) + .get(DISK_STORAGE_PATH_KEY) + .getKey(), + env.nodePaths()[0].indicesPath.toString() + "/request_cache" + ) + .build() + ) + .build(), + CacheType.INDICES_REQUEST_CACHE, + Map.of() + ); + assertEquals(RBMIntKeyLookupStore.class, ehcacheTest.getKeystore().getClass()); + ehcacheTest.close(); + } + } + private static String generateRandomString(int length) { String characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; StringBuilder randomString = new StringBuilder(length);