From ce64facbd316b1bfd0fe429a2896d564e46977c3 Mon Sep 17 00:00:00 2001 From: Mohammad Hasnain Mohsin Rajan Date: Wed, 21 Aug 2024 10:46:59 +0530 Subject: [PATCH 1/5] =?UTF-8?q?Adding=20access=20to=20noSubMatches=20and?= =?UTF-8?q?=20noOverlappingMatches=20in=20Hyphenation=E2=80=A6=20(#13895)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adding access to noSubMatches and noOverlappingMatches in HyphenationCompoundWordTokenFilter Signed-off-by: Evan Kielley * Add Changelog Entry Signed-off-by: Mohammad Hasnain Mohsin Rajan * test: add hyphenation decompounder tests Signed-off-by: Mohammad Hasnain * test: refactor tests Signed-off-by: Mohammad Hasnain * test: reformat test files Signed-off-by: Mohammad Hasnain * chore: add changelog entry for 2.X Signed-off-by: Mohammad Hasnain * chore: remove 3.x changelog Signed-off-by: Mohammad Hasnain * chore: commonify settingsarr Signed-off-by: Mohammad Hasnain * chore: commonify settingsarr Signed-off-by: Mohammad Hasnain * chore: linting Signed-off-by: Mohammad Hasnain --------- Signed-off-by: Evan Kielley Signed-off-by: Mohammad Hasnain Mohsin Rajan Signed-off-by: Mohammad Hasnain Co-authored-by: Evan Kielley --- CHANGELOG.md | 1 + ...enationCompoundWordTokenFilterFactory.java | 9 +- .../common/CompoundAnalysisTests.java | 63 +- .../opensearch/analysis/common/da_UTF8.xml | 1208 +++++++++++++++++ .../org/opensearch/analysis/common/test1.json | 30 +- .../org/opensearch/analysis/common/test1.yml | 18 + 6 files changed, 1313 insertions(+), 16 deletions(-) create mode 100644 modules/analysis-common/src/test/resources/org/opensearch/analysis/common/da_UTF8.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index c49f506f94bf1..9fcbc3833ca8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) - Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) - Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) +- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) ### Dependencies - Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java index 8d29a347caeb8..181ebe5500ee5 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/HyphenationCompoundWordTokenFilterFactory.java @@ -54,11 +54,16 @@ */ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundWordTokenFilterFactory { + private final boolean noSubMatches; + private final boolean noOverlappingMatches; private final HyphenationTree hyphenationTree; HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, env, name, settings); + noSubMatches = settings.getAsBoolean("no_sub_matches", false); + noOverlappingMatches = settings.getAsBoolean("no_overlapping_matches", false); + String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null); if (hyphenationPatternsPath == null) { throw new IllegalArgumentException("hyphenation_patterns_path is a required setting."); @@ -85,7 +90,9 @@ public TokenStream create(TokenStream tokenStream) { minWordSize, minSubwordSize, maxSubwordSize, - onlyLongestMatch + onlyLongestMatch, + noSubMatches, + noOverlappingMatches ); } } diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CompoundAnalysisTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CompoundAnalysisTests.java index a681d9a104ecf..bfe30da50c055 100644 --- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CompoundAnalysisTests.java +++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CompoundAnalysisTests.java @@ -50,8 +50,12 @@ import org.opensearch.test.IndexSettingsModule; import org.opensearch.test.OpenSearchTestCase; import org.hamcrest.MatcherAssert; +import org.junit.Before; import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -63,17 +67,27 @@ import static org.hamcrest.Matchers.instanceOf; public class CompoundAnalysisTests extends OpenSearchTestCase { + + Settings[] settingsArr; + + @Before + public void initialize() throws IOException { + final Path home = createTempDir(); + copyHyphenationPatternsFile(home); + this.settingsArr = new Settings[] { getJsonSettings(home), getYamlSettings(home) }; + } + public void testDefaultsCompoundAnalysis() throws Exception { - Settings settings = getJsonSettings(); - IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); - AnalysisModule analysisModule = createAnalysisModule(settings); - TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec"); - MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class)); + for (Settings settings : this.settingsArr) { + IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); + AnalysisModule analysisModule = createAnalysisModule(settings); + TokenFilterFactory filterFactory = analysisModule.getAnalysisRegistry().buildTokenFilterFactories(idxSettings).get("dict_dec"); + MatcherAssert.assertThat(filterFactory, instanceOf(DictionaryCompoundWordTokenFilterFactory.class)); + } } public void testDictionaryDecompounder() throws Exception { - Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() }; - for (Settings settings : settingsArr) { + for (Settings settings : this.settingsArr) { List terms = analyze(settings, "decompoundingAnalyzer", "donaudampfschiff spargelcremesuppe"); MatcherAssert.assertThat(terms.size(), equalTo(8)); MatcherAssert.assertThat( @@ -83,6 +97,26 @@ public void testDictionaryDecompounder() throws Exception { } } + // Hyphenation Decompounder tests mimic the behavior of lucene tests + // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java + public void testHyphenationDecompounder() throws Exception { + for (Settings settings : this.settingsArr) { + List terms = analyze(settings, "hyphenationAnalyzer", "min veninde som er lidt af en læsehest"); + MatcherAssert.assertThat(terms.size(), equalTo(10)); + MatcherAssert.assertThat(terms, hasItems("min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest")); + } + } + + // Hyphenation Decompounder tests mimic the behavior of lucene tests + // lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestHyphenationCompoundWordTokenFilterFactory.java + public void testHyphenationDecompounderNoSubMatches() throws Exception { + for (Settings settings : this.settingsArr) { + List terms = analyze(settings, "hyphenationAnalyzerNoSubMatches", "basketballkurv"); + MatcherAssert.assertThat(terms.size(), equalTo(3)); + MatcherAssert.assertThat(terms, hasItems("basketballkurv", "basketball", "kurv")); + } + } + private List analyze(Settings settings, String analyzerName, String text) throws IOException { IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings); AnalysisModule analysisModule = createAnalysisModule(settings); @@ -111,21 +145,28 @@ public Map> getTokenFilters() { })); } - private Settings getJsonSettings() throws IOException { + private void copyHyphenationPatternsFile(Path home) throws IOException { + InputStream hyphenation_patterns_path = getClass().getResourceAsStream("da_UTF8.xml"); + Path config = home.resolve("config"); + Files.createDirectory(config); + Files.copy(hyphenation_patterns_path, config.resolve("da_UTF8.xml")); + } + + private Settings getJsonSettings(Path home) throws IOException { String json = "/org/opensearch/analysis/common/test1.json"; return Settings.builder() .loadFromStream(json, getClass().getResourceAsStream(json), false) .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .put(Environment.PATH_HOME_SETTING.getKey(), home.toString()) .build(); } - private Settings getYamlSettings() throws IOException { + private Settings getYamlSettings(Path home) throws IOException { String yaml = "/org/opensearch/analysis/common/test1.yml"; return Settings.builder() .loadFromStream(yaml, getClass().getResourceAsStream(yaml), false) .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .put(Environment.PATH_HOME_SETTING.getKey(), home.toString()) .build(); } } diff --git a/modules/analysis-common/src/test/resources/org/opensearch/analysis/common/da_UTF8.xml b/modules/analysis-common/src/test/resources/org/opensearch/analysis/common/da_UTF8.xml new file mode 100644 index 0000000000000..2c8d203be6881 --- /dev/null +++ b/modules/analysis-common/src/test/resources/org/opensearch/analysis/common/da_UTF8.xml @@ -0,0 +1,1208 @@ + + + + + + + + + + +aA +bB +cC +dD +eE +fF +gG +hH +iI +jJ +kK +lL +mM +nN +oO +pP +qQ +rR +sS +tT +uU +vV +wW +xX +yY +zZ +æÆ +øØ +åÅ + + + +.ae3 +.an3k +.an1s +.be5la +.be1t +.bi4tr +.der3i +.diagno5 +.her3 +.hoved3 +.ne4t5 +.om1 +.ove4 +.po1 +.til3 +.yd5r +ab5le +3abst +a3c +ade5la +5adg +a1e +5afg +5a4f1l +af3r +af4ri +5afs +a4gef +a4gi +ag5in +ag5si +3agti +a4gy +a3h +ais5t +a3j +a5ka +a3ke +a5kr +aku5 +a3la +a1le +a1li +al3k +4alkv +a1lo +al5si +a3lu +a1ly +am4pa +3analy +an4k5r +a3nu +3anv +a5o +a5pe +a3pi +a5po +a1ra +ar5af +1arb +a1re +5arg +a1ri +a3ro +a3sa +a3sc +a1si +a3sk +a3so +3a3sp +a3ste +a3sti +a1ta1 +a1te +a1ti +a4t5in +a1to +ato5v +a5tr +a1tu +a5va +a1ve +a5z +1ba +ba4ti +4bd +1be +be1k +be3ro +be5ru +be1s4 +be1tr +1bi +bi5sk +b1j +4b1n +1bo +bo4gr +bo3ra +bo5re +1br4 +4bs +bs5k +b3so +b1st +b5t +3bu +bu4s5tr +b5w +1by +by5s +4c1c +1ce +ce5ro +3ch +4ch. +ci4o +ck3 +5cy +3da +4d3af +d5anta +da4s +d1b +d1d4 +1de +de5d +4de4lem +der5eri +de4rig +de5sk +d1f +d1g +d3h +1di +di1e +di5l +d3j +d1k +d1l +d1m +4d1n +3do +4dop +d5ov +d1p +4drett +5d4reve +3drif +3driv +d5ros +d5ru +ds5an +ds5in +d1ski +d4sm +d4su +dsu5l +ds5vi +d3ta +d1te +dt5o +d5tr +dt5u +1du +dub5 +d1v +3dy +e5ad +e3af +e5ag +e3ak +e1al +ea4la +e3an +e5ap +e3at +e3bl +ebs3 +e1ci +ed5ar +edde4 +eddel5 +e4do +ed5ra +ed3re +ed3rin +ed4str +e3e +3eff +e3fr +3eft +e3gu +e1h +e3in +ei5s +e3je +e4j5el +e1ka +e3ke +e3kl +4e1ko +e5kr +ek5sa +3eksem +3eksp +e3ku +e1kv +e5ky +e3lad +el3ak +el3ar +e1las +e3le +e4lek +3elem +e1li +5elim +e3lo +el5sa +e5lu +e3ly +e4mad +em4p5le +em1s +en5ak +e4nan +4enn +e4no +en3so +e5nu +e5ol +e3op +e1or +e3ov +epi3 +e1pr +e3ra +er3af +e4rag +e4rak +e1re +e4ref +er5ege +5erhv +e1ri +e4rib +er1k +ero5d +er5ov +er3s +er5tr +e3rum +er5un +e5ry +e1ta +e1te +etek4s +e1ti +e3tj +e1to +e3tr +e3tu +e1ty +e3um +e3un +3eur +e1va +e3ve +e4v3erf +e1vi +e5x +1fa +fa4ce +fags3 +f1b +f1d +1fe +fej4 +fejl1 +f1f +f1g +f1h +1fi +f1k +3fl +1fo +for1en +fo4ri +f1p +f1s4 +4ft +f3ta +f1te +f1ti +f5to +f5tvi +1fu +f1v +3fy +1ga +g3art +g1b +g1d +1ge +4g5enden +ger3in +ge3s +g3f +g1g +g1h +1gi +gi4b +gi3st +5gj +g3k +g1l +g1m +3go +4g5om +g5ov +g3p +1gr +gs1a +gsde4len +g4se +gsha4 +g5sla +gs3or +gs1p +g5s4tide +g4str +gs1v +g3ta +g1te +g1ti +g5to +g3tr +gt4s +g3ud +gun5 +g3v +1gy +g5yd +4ha. +heds3 +he5s +4het +hi4e +hi4n5 +hi3s +ho5ko +ho5ve +4h3t +hun4 +hund3 +hvo4 +i1a +i3b +i4ble +i1c +i3dr +ids5k +i1el +i1en +i3er +i3et. +if3r +i3gu +i3h +i5i +i5j +i1ka +i1ke +ik1l +i5ko +ik3re +ik5ri +iks5t +ik4tu +i3ku +ik3v +i3lag +il3eg +il5ej +il5el +i3li +i4l5id +il3k +i1lo +il5u +i3mu +ind3t +5inf +ings1 +in3s +in4sv +inter1 +i3nu +i3od +i3og +i5ok +i3ol +ion4 +ions1 +i5o5r +i3ot +i5pi +i3pli +i5pr +i3re +i3ri +ir5t +i3sc +i3si +i4sm +is3p +i1ster +i3sti +i5sua +i1ta +i1te +i1ti +i3to +i3tr +it5re. +i1tu +i3ty +i1u +i1va +i1ve +i1vi +j3ag +jde4rer +jds1 +jek4to +4j5en. +j5k +j3le +j3li +jlmeld5 +jlmel4di +j3r +jre5 +ju3s +5kap +k5au +5kav +k5b +kel5s +ke3sk +ke5st +ke4t5a +k3h +ki3e +ki3st +k1k +k5lak +k1le +3klu +k4ny +5kod +1kon +ko3ra +3kort +ko3v +1kra +5kry +ks3an +k1si +ks3k +ks1p +k3ste +k5stu +ks5v +k1t +k4tar +k4terh +kti4e +kt5re +kt5s +3kur +1kus +3kut +k4vo +k4vu +5lab +lad3r +5lagd +la4g3r +5lam +1lat +l1b +ldiagnos5 +l3dr +ld3st +1le. +5led +4lele +le4mo +3len +1ler +1les +4leu +l1f +lfin4 +lfind5 +l1go1 +l3h +li4ga +4l5ins +4l3int +li5o +l3j +l1ke +l1ko +l3ky +l1l +l5mu +lo4du +l3op +4l5or +3lov +4l3p +l4ps +l3r +4ls +lses1 +ls5in +l5sj +l1ta +l4taf +l1te +l4t5erf +l3ti +lt3o +l3tr +l3tu +lu5l +l3ve +l3vi +1ma +m1b +m3d +1me +4m5ej +m3f +m1g +m3h +1mi +mi3k +m5ing +mi4o +mi5sty +m3k +m1l +m1m +mmen5 +m1n +3mo +mo4da +4mop +4m5ov +m1pe +m3pi +m3pl +m1po +m3pr +m1r +mse5s +ms5in +m5sk +ms3p +m3ste +ms5v +m3ta +m3te +m3ti +m3tr +m1ud +1mul +mu1li +3my +3na +4nak +1nal +n1b +n1c +4nd +n3dr +nd5si +nd5sk +nd5sp +1ne +ne5a +ne4da +nemen4 +nement5e +neo4 +n3erk +n5erl +ne5sl +ne5st +n1f +n4go +4n1h +1ni +4nim +ni5o +ni3st +n1ke +n1ko +n3kr +n3ku +n5kv +4n1l +n1m +n1n +1no +n3ord +n5p +n3r +4ns +n3si +n1sku +ns3po +n1sta +n5sti +n1ta +nta4le +n1te +n1ti +ntiali4 +n3to +n1tr +nt4s5t +nt4su +n3tu +n3ty +4n1v +3ny +n3z +o3a +o4as +ob3li +o1c +o4din +od5ri +od5s +od5un +o1e +of5r +o4gek +o4gel +o4g5o +og5re +og5sk +o5h +o5in +oi6s5e +o1j +o3ka +o1ke +o3ku +o3la +o3le +o1li +o1lo +o3lu +o5ly +1omr +on3k +ook5 +o3or +o5ov +o3pi +op3l +op3r +op3s +3opta +4or. +or1an +3ordn +ord5s +o3re. +o3reg +o3rek +o3rer +o3re3s +o3ret +o3ri +3orient +or5im +o4r5in +or3k +or5o +or3sl +or3st +o3si +o3so +o3t +o1te +o5un +ov4s +3pa +pa5gh +p5anl +p3d +4pec +3pen +1per +pe1ra +pe5s +pe3u +p3f +4p5h +1pla +p4lan +4ple. +4pler +4ples +p3m +p3n +5pok +4po3re +3pot +4p5p4 +p4ro +1proc +p3sk +p5so +ps4p +p3st +p1t +1pu +pu5b +p5ule +p5v +5py3 +qu4 +4raf +ra5is +4rarb +r1b +r4d5ar +r3dr +rd4s3 +4reks +1rel +re5la +r5enss +5rese +re5spo +4ress +re3st +re5s4u +5rett +r1f +r1gu +r1h +ri1e +ri5la +4rimo +r4ing +ringse4 +ringso4r +4rinp +4rint +r3ka +r1ke +r1ki +rk3so +r3ku +r1l +rmo4 +r5mu +r1n +ro1b +ro3p +r3or +r3p +r1r +rre5s +rro4n5 +r1sa +r1si +r5skr +r4sk5v +rs4n +r3sp +r5stu +r5su +r3sv +r5tal +r1te +r4teli +r1ti +r3to +r4t5or +rt5rat +rt3re +r5tri +r5tro +rt3s +r5ty +r3ud +run4da +5rut +r3va +r1ve +r3vi +ry4s +s3af +1sam +sa4ma +s3ap +s1ar +1sat +4s1b +s1d +sdy4 +1se +s4ed +5s4er +se4se +s1f +4s1g4 +4s3h +si4bl +1sig +s5int +5sis +5sit +5siu +s5ju +4sk. +1skab +1ske +s3kl +sk5s4 +5sky +s1le +s1li +slo3 +5slu +s5ly +s1m +s4my +4snin +s4nit +so5k +5sol +5som. +3somm +s5oms +5somt +3son +4s1op +sp4 +3spec +4sper +3s4pi +s1pl +3sprog. +s5r4 +s1s4 +4st. +5s4tam +1stan +st5as +3stat +1stav +1ste. +1sted +3stel +5stemo +1sten +5step +3ster. +3stes +5stet +5stj +3sto +st5om +1str +s1ud +3sul +s3un +3sur +s3ve +3s4y +1sy1s +5ta. +1tag +tands3 +4tanv +4tb +tede4l +teds5 +3teg +5tekn +teo1 +5term +te5ro +4t1f +6t3g +t1h +tialis5t +3tid +ti4en +ti3st +4t3k +4t1l +tli4s5 +t1m +t1n +to5ra +to1re +to1ri +tor4m +4t3p +t4ra +4tres +tro5v +1try +4ts +t3si +ts4pa +ts5pr +t3st +ts5ul +4t1t +t5uds +5tur +t5ve +1typ +u1a +5udl +ud5r +ud3s +3udv +u1e +ue4t5 +uge4ri +ugs3 +u5gu +u3i +u5kl +uk4ta +uk4tr +u1la +u1le +u5ly +u5pe +up5l +u5q +u3ra +u3re +u4r3eg +u1rer +u3ro +us5a +u3si +u5ska +u5so +us5v +u1te +u1ti +u1to +ut5r +ut5s4 +5u5v +va5d +3varm +1ved +ve4l5e +ve4reg +ve3s +5vet +v5h +vi4l3in +1vis +v5j +v5k +vl4 +v3le +v5li +vls1 +1vo +4v5om +v5p +v5re +v3st +v5su +v5t +3vu +y3a +y5dr +y3e +y3ke +y5ki +yk3li +y3ko +yk4s5 +y3kv +y5li +y5lo +y5mu +yns5 +y5o +y1pe +y3pi +y3re +yr3ek +y3ri +y3si +y3ti +y5t3r +y5ve +zi5o + +.så3 +.ær5i +.øv3r +a3tø +a5væ +brød3 +5bæ +5drøv +dstå4 +3dæ +3dø +e3læ +e3lø +e3rø +er5øn +e5tæ +e5tø +e1væ +e3æ +e5å +3fæ +3fø +fø4r5en +giø4 +g4sø +g5så +3gæ +3gø1 +3gå +i5tæ +i3ø +3kø +3kå +lingeniø4 +l3væ +5løs +m5tå +1mæ +3mø +3må +n3kæ +n5tæ +3næ +4n5æb +5nø +o5læ +or3ø +o5å +5præ +5pæd +på3 +r5kæ +r5tæ +r5tø +r3væ +r5æl +4røn +5rør +3råd +r5år +s4kå +3slå +s4næ +5stø +1stå +1sæ +4s5æn +1sø +s5øk +så4r5 +ti4ø +3træk. +t4sø +t5så +t3væ +u3læ +3værd +1værk +5vå +y5væ +æb3l +æ3c +æ3e +æg5a +æ4gek +æ4g5r +ægs5 +æ5i +æ5kv +ælle4 +æn1dr +æ5o +æ1re +ær4g5r +æ3ri +ær4ma +ær4mo +ær5s +æ5si +æ3so +æ3ste +æ3ve +øde5 +ø3e +ø1je +ø3ke +ø3le +øms5 +øn3st +øn4t3 +ø1re +ø3ri +ørne3 +ør5o +ø1ve +å1d +å1e +å5h +å3l +å3re +års5t +å5sk +å3t + + diff --git a/test/framework/src/main/resources/org/opensearch/analysis/common/test1.json b/test/framework/src/main/resources/org/opensearch/analysis/common/test1.json index e69c2db6ff400..18e30ec6f87d5 100644 --- a/test/framework/src/main/resources/org/opensearch/analysis/common/test1.json +++ b/test/framework/src/main/resources/org/opensearch/analysis/common/test1.json @@ -19,8 +19,22 @@ "type":"myfilter" }, "dict_dec":{ - "type":"dictionary_decompounder", - "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"] + "type":"dictionary_decompounder", + "word_list":["donau", "dampf", "schiff", "spargel", "creme", "suppe"] + }, + "hyphen_dec":{ + "type":"hyphenation_decompounder", + "word_list":["læse", "hest"], + "hyphenation_patterns_path": "da_UTF8.xml" + }, + "hyphen_dec_no_sub_matches":{ + "type":"hyphenation_decompounder", + "word_list":["basketball", "basket", "ball", "kurv"], + "hyphenation_patterns_path": "da_UTF8.xml", + "only_longest_match": false, + "no_sub_matches": true, + "no_overlapping_matches": false + } }, "analyzer":{ @@ -45,8 +59,16 @@ "filter":["lowercase", "stop", "czech_stem"] }, "decompoundingAnalyzer":{ - "tokenizer":"standard", - "filter":["dict_dec"] + "tokenizer":"standard", + "filter":["dict_dec"] + }, + "hyphenationAnalyzer":{ + "tokenizer":"standard", + "filter":["hyphen_dec"] + }, + "hyphenationAnalyzerNoSubMatches":{ + "tokenizer":"standard", + "filter":["hyphen_dec_no_sub_matches"] } } } diff --git a/test/framework/src/main/resources/org/opensearch/analysis/common/test1.yml b/test/framework/src/main/resources/org/opensearch/analysis/common/test1.yml index 82f933296a314..30862a9d0be8e 100644 --- a/test/framework/src/main/resources/org/opensearch/analysis/common/test1.yml +++ b/test/framework/src/main/resources/org/opensearch/analysis/common/test1.yml @@ -15,6 +15,18 @@ index : dict_dec : type : dictionary_decompounder word_list : [donau, dampf, schiff, spargel, creme, suppe] + hyphen_dec : + type : hyphenation_decompounder + word_list : [læse, hest] + hyphenation_patterns_path: "da_UTF8.xml" + hyphen_dec_no_sub_matches : + type : hyphenation_decompounder + word_list : [basketball, basket, ball, kurv] + hyphenation_patterns_path: "da_UTF8.xml" + only_longest_match: False + no_sub_matches: True + no_overlapping_matches: False + analyzer : standard : type : standard @@ -37,3 +49,9 @@ index : decompoundingAnalyzer : tokenizer : standard filter : [dict_dec] + hyphenationAnalyzer : + tokenizer : standard + filter : [hyphen_dec] + hyphenationAnalyzerNoSubMatches : + tokenizer : standard + filter : [hyphen_dec_no_sub_matches] From 01184decb897dcdfc1b1d36980c4fc4eeddb8e54 Mon Sep 17 00:00:00 2001 From: Ruirui Zhang Date: Tue, 20 Aug 2024 22:19:17 -0700 Subject: [PATCH 2/5] Add Settings related to Workload Management feature (#15028) * add QeryGroup Service tests Signed-off-by: Ruirui Zhang * add PR to changelog Signed-off-by: Ruirui Zhang * change the test directory Signed-off-by: Ruirui Zhang * modify comments to be more specific Signed-off-by: Ruirui Zhang * add test coverage Signed-off-by: Ruirui Zhang * remove QUERY_GROUP_RUN_INTERVAL_SETTING as we'll define it in QueryGroupService Signed-off-by: Ruirui Zhang * address comments Signed-off-by: Ruirui Zhang --- CHANGELOG.md | 1 + .../common/settings/ClusterSettings.java | 9 +- .../wlm/WorkloadManagementSettings.java | 256 ++++++++++++++++++ .../wlm/WorkloadManagementSettingsTests.java | 241 +++++++++++++++++ 4 files changed, 506 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java create mode 100644 server/src/test/java/org/opensearch/wlm/WorkloadManagementSettingsTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fcbc3833ca8a..d6943e538fdcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) - Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) - [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) +- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) - [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) - Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) - Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index d5e8e90458390..49ef87838ed2e 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -173,6 +173,7 @@ import org.opensearch.transport.SniffConnectionStrategy; import org.opensearch.transport.TransportSettings; import org.opensearch.watcher.ResourceWatcherService; +import org.opensearch.wlm.WorkloadManagementSettings; import java.util.Arrays; import java.util.Collections; @@ -766,7 +767,13 @@ public void apply(Settings value, Settings current, Settings previous) { // Composite index settings CompositeIndexSettings.STAR_TREE_INDEX_ENABLED_SETTING, - SystemTemplatesService.SETTING_APPLICATION_BASED_CONFIGURATION_TEMPLATES_ENABLED + SystemTemplatesService.SETTING_APPLICATION_BASED_CONFIGURATION_TEMPLATES_ENABLED, + + // WorkloadManagement settings + WorkloadManagementSettings.NODE_LEVEL_CPU_REJECTION_THRESHOLD, + WorkloadManagementSettings.NODE_LEVEL_CPU_CANCELLATION_THRESHOLD, + WorkloadManagementSettings.NODE_LEVEL_MEMORY_REJECTION_THRESHOLD, + WorkloadManagementSettings.NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD ) ) ); diff --git a/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java b/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java new file mode 100644 index 0000000000000..b104925df77b3 --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java @@ -0,0 +1,256 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; + +/** + * Main class to declare Workload Management related settings + */ +public class WorkloadManagementSettings { + private static final Double DEFAULT_NODE_LEVEL_MEMORY_REJECTION_THRESHOLD = 0.8; + private static final Double DEFAULT_NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD = 0.9; + private static final Double DEFAULT_NODE_LEVEL_CPU_REJECTION_THRESHOLD = 0.8; + private static final Double DEFAULT_NODE_LEVEL_CPU_CANCELLATION_THRESHOLD = 0.9; + public static final double NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD_MAX_VALUE = 0.95; + public static final double NODE_LEVEL_MEMORY_REJECTION_THRESHOLD_MAX_VALUE = 0.9; + public static final double NODE_LEVEL_CPU_CANCELLATION_THRESHOLD_MAX_VALUE = 0.95; + public static final double NODE_LEVEL_CPU_REJECTION_THRESHOLD_MAX_VALUE = 0.9; + + private Double nodeLevelMemoryCancellationThreshold; + private Double nodeLevelMemoryRejectionThreshold; + private Double nodeLevelCpuCancellationThreshold; + private Double nodeLevelCpuRejectionThreshold; + + /** + * Setting name for node level memory based rejection threshold for QueryGroup service + */ + public static final String NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME = "wlm.query_group.node.memory_rejection_threshold"; + /** + * Setting to control the memory based rejection threshold + */ + public static final Setting NODE_LEVEL_MEMORY_REJECTION_THRESHOLD = Setting.doubleSetting( + NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, + DEFAULT_NODE_LEVEL_MEMORY_REJECTION_THRESHOLD, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** + * Setting name for node level cpu based rejection threshold for QueryGroup service + */ + public static final String NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME = "wlm.query_group.node.cpu_rejection_threshold"; + /** + * Setting to control the cpu based rejection threshold + */ + public static final Setting NODE_LEVEL_CPU_REJECTION_THRESHOLD = Setting.doubleSetting( + NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, + DEFAULT_NODE_LEVEL_CPU_REJECTION_THRESHOLD, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** + * Setting name for node level memory based cancellation threshold for QueryGroup service + */ + public static final String NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME = "wlm.query_group.node.memory_cancellation_threshold"; + /** + * Setting to control the memory based cancellation threshold + */ + public static final Setting NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD = Setting.doubleSetting( + NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, + DEFAULT_NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** + * Setting name for node level cpu based cancellation threshold for QueryGroup service + */ + public static final String NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME = "wlm.query_group.node.cpu_cancellation_threshold"; + /** + * Setting to control the cpu based cancellation threshold + */ + public static final Setting NODE_LEVEL_CPU_CANCELLATION_THRESHOLD = Setting.doubleSetting( + NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, + DEFAULT_NODE_LEVEL_CPU_CANCELLATION_THRESHOLD, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * QueryGroup service settings constructor + * @param settings - QueryGroup service settings + * @param clusterSettings - QueryGroup cluster settings + */ + public WorkloadManagementSettings(Settings settings, ClusterSettings clusterSettings) { + nodeLevelMemoryCancellationThreshold = NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD.get(settings); + nodeLevelMemoryRejectionThreshold = NODE_LEVEL_MEMORY_REJECTION_THRESHOLD.get(settings); + nodeLevelCpuCancellationThreshold = NODE_LEVEL_CPU_CANCELLATION_THRESHOLD.get(settings); + nodeLevelCpuRejectionThreshold = NODE_LEVEL_CPU_REJECTION_THRESHOLD.get(settings); + + ensureRejectionThresholdIsLessThanCancellation( + nodeLevelMemoryRejectionThreshold, + nodeLevelMemoryCancellationThreshold, + NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, + NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME + ); + ensureRejectionThresholdIsLessThanCancellation( + nodeLevelCpuRejectionThreshold, + nodeLevelCpuCancellationThreshold, + NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, + NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME + ); + + clusterSettings.addSettingsUpdateConsumer(NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD, this::setNodeLevelMemoryCancellationThreshold); + clusterSettings.addSettingsUpdateConsumer(NODE_LEVEL_MEMORY_REJECTION_THRESHOLD, this::setNodeLevelMemoryRejectionThreshold); + clusterSettings.addSettingsUpdateConsumer(NODE_LEVEL_CPU_CANCELLATION_THRESHOLD, this::setNodeLevelCpuCancellationThreshold); + clusterSettings.addSettingsUpdateConsumer(NODE_LEVEL_CPU_REJECTION_THRESHOLD, this::setNodeLevelCpuRejectionThreshold); + } + + /** + * Method to get the node level memory based cancellation threshold + * @return current node level memory based cancellation threshold + */ + public Double getNodeLevelMemoryCancellationThreshold() { + return nodeLevelMemoryCancellationThreshold; + } + + /** + * Method to set the node level memory based cancellation threshold + * @param nodeLevelMemoryCancellationThreshold sets the new node level memory based cancellation threshold + * @throws IllegalArgumentException if the value is > 0.95 and cancellation < rejection threshold + */ + public void setNodeLevelMemoryCancellationThreshold(Double nodeLevelMemoryCancellationThreshold) { + if (Double.compare(nodeLevelMemoryCancellationThreshold, NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD_MAX_VALUE) > 0) { + throw new IllegalArgumentException( + NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME + " value cannot be greater than 0.95 as it can result in a node drop" + ); + } + + ensureRejectionThresholdIsLessThanCancellation( + nodeLevelMemoryRejectionThreshold, + nodeLevelMemoryCancellationThreshold, + NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, + NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME + ); + + this.nodeLevelMemoryCancellationThreshold = nodeLevelMemoryCancellationThreshold; + } + + /** + * Method to get the node level cpu based cancellation threshold + * @return current node level cpu based cancellation threshold + */ + public Double getNodeLevelCpuCancellationThreshold() { + return nodeLevelCpuCancellationThreshold; + } + + /** + * Method to set the node level cpu based cancellation threshold + * @param nodeLevelCpuCancellationThreshold sets the new node level cpu based cancellation threshold + * @throws IllegalArgumentException if the value is > 0.95 and cancellation < rejection threshold + */ + public void setNodeLevelCpuCancellationThreshold(Double nodeLevelCpuCancellationThreshold) { + if (Double.compare(nodeLevelCpuCancellationThreshold, NODE_LEVEL_CPU_CANCELLATION_THRESHOLD_MAX_VALUE) > 0) { + throw new IllegalArgumentException( + NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME + " value cannot be greater than 0.95 as it can result in a node drop" + ); + } + + ensureRejectionThresholdIsLessThanCancellation( + nodeLevelCpuRejectionThreshold, + nodeLevelCpuCancellationThreshold, + NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, + NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME + ); + + this.nodeLevelCpuCancellationThreshold = nodeLevelCpuCancellationThreshold; + } + + /** + * Method to get the memory based node level rejection threshold + * @return the current memory based node level rejection threshold + */ + public Double getNodeLevelMemoryRejectionThreshold() { + return nodeLevelMemoryRejectionThreshold; + } + + /** + * Method to set the node level memory based rejection threshold + * @param nodeLevelMemoryRejectionThreshold sets the new memory based rejection threshold + * @throws IllegalArgumentException if rejection > 0.90 and rejection < cancellation threshold + */ + public void setNodeLevelMemoryRejectionThreshold(Double nodeLevelMemoryRejectionThreshold) { + if (Double.compare(nodeLevelMemoryRejectionThreshold, NODE_LEVEL_MEMORY_REJECTION_THRESHOLD_MAX_VALUE) > 0) { + throw new IllegalArgumentException( + NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME + " value cannot be greater than 0.90 as it can result in a node drop" + ); + } + + ensureRejectionThresholdIsLessThanCancellation( + nodeLevelMemoryRejectionThreshold, + nodeLevelMemoryCancellationThreshold, + NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, + NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME + ); + + this.nodeLevelMemoryRejectionThreshold = nodeLevelMemoryRejectionThreshold; + } + + /** + * Method to get the cpu based node level rejection threshold + * @return the current cpu based node level rejection threshold + */ + public Double getNodeLevelCpuRejectionThreshold() { + return nodeLevelCpuRejectionThreshold; + } + + /** + * Method to set the node level cpu based rejection threshold + * @param nodeLevelCpuRejectionThreshold sets the new cpu based rejection threshold + * @throws IllegalArgumentException if rejection > 0.90 and rejection < cancellation threshold + */ + public void setNodeLevelCpuRejectionThreshold(Double nodeLevelCpuRejectionThreshold) { + if (Double.compare(nodeLevelCpuRejectionThreshold, NODE_LEVEL_CPU_REJECTION_THRESHOLD_MAX_VALUE) > 0) { + throw new IllegalArgumentException( + NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME + " value cannot be greater than 0.90 as it can result in a node drop" + ); + } + + ensureRejectionThresholdIsLessThanCancellation( + nodeLevelCpuRejectionThreshold, + nodeLevelCpuCancellationThreshold, + NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, + NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME + ); + + this.nodeLevelCpuRejectionThreshold = nodeLevelCpuRejectionThreshold; + } + + /** + * Method to validate that the cancellation threshold is greater than or equal to rejection threshold + * @param nodeLevelRejectionThreshold rejection threshold to be compared + * @param nodeLevelCancellationThreshold cancellation threshold to be compared + * @param rejectionThresholdSettingName name of the rejection threshold setting + * @param cancellationThresholdSettingName name of the cancellation threshold setting + * @throws IllegalArgumentException if cancellation threshold is less than rejection threshold + */ + private void ensureRejectionThresholdIsLessThanCancellation( + Double nodeLevelRejectionThreshold, + Double nodeLevelCancellationThreshold, + String rejectionThresholdSettingName, + String cancellationThresholdSettingName + ) { + if (Double.compare(nodeLevelCancellationThreshold, nodeLevelRejectionThreshold) < 0) { + throw new IllegalArgumentException( + cancellationThresholdSettingName + " value should not be less than " + rejectionThresholdSettingName + ); + } + } +} diff --git a/server/src/test/java/org/opensearch/wlm/WorkloadManagementSettingsTests.java b/server/src/test/java/org/opensearch/wlm/WorkloadManagementSettingsTests.java new file mode 100644 index 0000000000000..0f183555781d3 --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/WorkloadManagementSettingsTests.java @@ -0,0 +1,241 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchTestCase; + +import static org.opensearch.wlm.WorkloadManagementSettings.NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME; +import static org.opensearch.wlm.WorkloadManagementSettings.NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME; +import static org.opensearch.wlm.WorkloadManagementSettings.NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME; +import static org.opensearch.wlm.WorkloadManagementSettings.NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME; + +public class WorkloadManagementSettingsTests extends OpenSearchTestCase { + + /** + * Tests the invalid value for {@code wlm.query_group.node.memory_rejection_threshold} + * When the value is set more than {@code wlm.query_group.node.memory_cancellation_threshold} accidentally during + * new feature development. This test is to ensure that {@link WorkloadManagementSettings} holds the + * invariant {@code nodeLevelRejectionThreshold < nodeLevelCancellationThreshold} + */ + public void testInvalidMemoryInstantiationOfWorkloadManagementSettings() { + Settings settings = Settings.builder() + .put(NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, 0.8) + .put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.7) + .build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + assertThrows(IllegalArgumentException.class, () -> new WorkloadManagementSettings(settings, cs)); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.cpu_rejection_threshold} + * When the value is set more than {@code wlm.query_group.node.cpu_cancellation_threshold} accidentally during + * new feature development. This test is to ensure that {@link WorkloadManagementSettings} holds the + * invariant {@code nodeLevelRejectionThreshold < nodeLevelCancellationThreshold} + */ + public void testInvalidCpuInstantiationOfWorkloadManagementSettings() { + Settings settings = Settings.builder() + .put(NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, 0.8) + .put(NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, 0.7) + .build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + assertThrows(IllegalArgumentException.class, () -> new WorkloadManagementSettings(settings, cs)); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.cpu_rejection_threshold} + * Using setNodeLevelCpuRejectionThreshold function + */ + public void testValidNodeLevelCpuRejectionThresholdCase1() { + Settings settings = Settings.builder().put(NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + workloadManagementSettings.setNodeLevelCpuRejectionThreshold(0.7); + assertEquals(0.7, workloadManagementSettings.getNodeLevelCpuRejectionThreshold(), 1e-9); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.cpu_rejection_threshold} + */ + public void testValidNodeLevelCpuRejectionThresholdCase2() { + Settings settings = Settings.builder().put(NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, 0.79).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertEquals(0.79, workloadManagementSettings.getNodeLevelCpuRejectionThreshold(), 1e-9); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.cpu_rejection_threshold} + * When the value is set more than {@literal 0.9} + */ + public void testInvalidNodeLevelCpuRejectionThresholdCase1() { + Settings settings = Settings.builder().put(NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, 0.8).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelCpuRejectionThreshold(0.95)); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.cpu_rejection_threshold} + * When the value is set more than {@code wlm.query_group.node.cpu_cancellation_threshold} + */ + public void testInvalidNodeLevelCpuRejectionThresholdCase2() { + Settings settings = Settings.builder() + .put(NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, 0.7) + .put(NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8) + .build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelCpuRejectionThreshold(0.85)); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.cpu_cancellation_threshold} + */ + public void testValidNodeLevelCpuCancellationThresholdCase1() { + Settings settings = Settings.builder().put(NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertEquals(0.8, workloadManagementSettings.getNodeLevelCpuRejectionThreshold(), 1e-9); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.cpu_cancellation_threshold} + * Using setNodeLevelCpuCancellationThreshold function + */ + public void testValidNodeLevelCpuCancellationThresholdCase2() { + Settings settings = Settings.builder().put(NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, 0.8).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + workloadManagementSettings.setNodeLevelCpuCancellationThreshold(0.83); + assertEquals(0.83, workloadManagementSettings.getNodeLevelCpuCancellationThreshold(), 1e-9); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.cpu_cancellation_threshold} + * When the value is set more than {@literal 0.95} + */ + public void testInvalidNodeLevelCpuCancellationThresholdCase1() { + Settings settings = Settings.builder().put(NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, 0.9).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelCpuCancellationThreshold(0.96)); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.cpu_cancellation_threshold} + * When the value is set less than {@code wlm.query_group.node.cpu_rejection_threshold} + */ + public void testInvalidNodeLevelCpuCancellationThresholdCase2() { + Settings settings = Settings.builder() + .put(NODE_CPU_REJECTION_THRESHOLD_SETTING_NAME, 0.7) + .put(NODE_CPU_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8) + .build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelCpuCancellationThreshold(0.65)); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.memory_cancellation_threshold} + */ + public void testValidNodeLevelMemoryCancellationThresholdCase1() { + Settings settings = Settings.builder().put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertEquals(0.8, workloadManagementSettings.getNodeLevelMemoryCancellationThreshold(), 1e-9); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.memory_cancellation_threshold} + * Using setNodeLevelMemoryCancellationThreshold function + */ + public void testValidNodeLevelMemoryCancellationThresholdCase2() { + Settings settings = Settings.builder().put(NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, 0.8).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + workloadManagementSettings.setNodeLevelMemoryCancellationThreshold(0.83); + assertEquals(0.83, workloadManagementSettings.getNodeLevelMemoryCancellationThreshold(), 1e-9); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.memory_cancellation_threshold} + * When the value is set more than {@literal 0.95} + */ + public void testInvalidNodeLevelMemoryCancellationThresholdCase1() { + Settings settings = Settings.builder().put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.9).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelMemoryCancellationThreshold(0.96)); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.memory_cancellation_threshold} + * When the value is set less than {@code wlm.query_group.node.memory_rejection_threshold} + */ + public void testInvalidNodeLevelMemoryCancellationThresholdCase2() { + Settings settings = Settings.builder() + .put(NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, 0.7) + .put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8) + .build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelMemoryCancellationThreshold(0.65)); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.memory_rejection_threshold} + */ + public void testValidNodeLevelMemoryRejectionThresholdCase1() { + Settings settings = Settings.builder().put(NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, 0.79).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertEquals(0.79, workloadManagementSettings.getNodeLevelMemoryRejectionThreshold(), 1e-9); + } + + /** + * Tests the valid value for {@code wlm.query_group.node.memory_rejection_threshold} + * Using setNodeLevelMemoryRejectionThreshold function + */ + public void testValidNodeLevelMemoryRejectionThresholdCase2() { + Settings settings = Settings.builder().put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.9).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + workloadManagementSettings.setNodeLevelMemoryRejectionThreshold(0.86); + assertEquals(0.86, workloadManagementSettings.getNodeLevelMemoryRejectionThreshold(), 1e-9); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.memory_rejection_threshold} + * When the value is set more than {@literal 0.9} + */ + public void testInvalidNodeLevelMemoryRejectionThresholdCase1() { + Settings settings = Settings.builder().put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.9).build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelMemoryRejectionThreshold(0.92)); + } + + /** + * Tests the invalid value for {@code wlm.query_group.node.memory_rejection_threshold} + * When the value is set more than {@code wlm.query_group.node.memory_cancellation_threshold} + */ + public void testInvalidNodeLevelMemoryRejectionThresholdCase2() { + Settings settings = Settings.builder() + .put(NODE_MEMORY_REJECTION_THRESHOLD_SETTING_NAME, 0.7) + .put(NODE_MEMORY_CANCELLATION_THRESHOLD_SETTING_NAME, 0.8) + .build(); + ClusterSettings cs = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + WorkloadManagementSettings workloadManagementSettings = new WorkloadManagementSettings(settings, cs); + assertThrows(IllegalArgumentException.class, () -> workloadManagementSettings.setNodeLevelMemoryRejectionThreshold(0.85)); + } +} From b830d68c7a66092bb45909b1e8be37e0c1a49f28 Mon Sep 17 00:00:00 2001 From: "Daniel (dB.) Doubrovkine" Date: Wed, 21 Aug 2024 10:46:42 -0400 Subject: [PATCH 3/5] Update affiliation for @nknize. (#15322) Signed-off-by: dblock --- MAINTAINERS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 296c2c30aca49..4a8aa9305df74 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -22,7 +22,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje | Varun Bansal | [linuxpi](https://github.com/linuxpi) | Amazon | | Marc Handalian | [mch2](https://github.com/mch2) | Amazon | | Michael Froh | [msfroh](https://github.com/msfroh) | Amazon | -| Nick Knize | [nknize](https://github.com/nknize) | Amazon | +| Nick Knize | [nknize](https://github.com/nknize) | Lucenia | | Owais Kazi | [owaiskazi19](https://github.com/owaiskazi19) | Amazon | | Peter Nied | [peternied](https://github.com/peternied) | Amazon | | Rishikesh Pasham | [Rishikesh1159](https://github.com/Rishikesh1159) | Amazon | From 0ca4ed0b62ecc699547a8fbedd150f28f406b03a Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:33:52 +0530 Subject: [PATCH 4/5] Add log when download completes with file size (#15224) Signed-off-by: Gaurav Bafna --- .../org/opensearch/index/store/RemoteStoreFileDownloader.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java b/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java index 134994c0ae32c..ad42b6d677b41 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java @@ -149,6 +149,7 @@ private void copyOneFile( try { cancellableThreads.executeIO(() -> { destination.copyFrom(source, file, file, IOContext.DEFAULT); + logger.trace("Downloaded file {} of size {}", file, destination.fileLength(file)); onFileCompletion.run(); if (secondDestination != null) { secondDestination.copyFrom(destination, file, file, IOContext.DEFAULT); From abb1041c10c81964c7e0a93ddbaa714a2643bff2 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 22 Aug 2024 12:18:41 -0400 Subject: [PATCH 5/5] Support Filtering on Large List encoded by Bitmap (version update) (#15352) Signed-off-by: Andriy Redko --- .../java/org/opensearch/index/query/TermsQueryBuilder.java | 4 ++-- server/src/main/java/org/opensearch/indices/TermsLookup.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java index 21ff90f2bf534..4b92d6a1f5460 100644 --- a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java @@ -251,7 +251,7 @@ public TermsQueryBuilder(StreamInput in) throws IOException { termsLookup = in.readOptionalWriteable(TermsLookup::new); values = (List) in.readGenericValue(); this.supplier = null; - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { valueType = in.readEnum(ValueType.class); } } @@ -264,7 +264,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); out.writeOptionalWriteable(termsLookup); out.writeGenericValue(values); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeEnum(valueType); } } diff --git a/server/src/main/java/org/opensearch/indices/TermsLookup.java b/server/src/main/java/org/opensearch/indices/TermsLookup.java index 6b83b713c05c2..7337ed31ce41e 100644 --- a/server/src/main/java/org/opensearch/indices/TermsLookup.java +++ b/server/src/main/java/org/opensearch/indices/TermsLookup.java @@ -87,7 +87,7 @@ public TermsLookup(StreamInput in) throws IOException { path = in.readString(); index = in.readString(); routing = in.readOptionalString(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { store = in.readBoolean(); } } @@ -101,7 +101,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(path); out.writeString(index); out.writeOptionalString(routing); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeBoolean(store); } }