eclipse-equinox · ptziegler · Dec 21, 2024
diff --git a/...se.equinox.common.tests/src/org/eclipse/equinox/common/tests/text/StringMatcherTests.java b/...se.equinox.common.tests/src/org/eclipse/equinox/common/tests/text/StringMatcherTests.java
@@ -16,7 +16,7 @@
 
 @RunWith(Suite.class)
 @SuiteClasses({ StringMatcherFindTest.class, StringMatcherPlainTest.class, StringMatcherWildcardTest.class,
-		StringMatcherPrefixTest.class, StringMatcherOtherTest.class })
+		StringMatcherPrefixTest.class, StringMatcherOtherTest.class, TextMatcherTest.class })
 public class StringMatcherTests {
 	// empty
 }
diff --git a/...lipse.equinox.common.tests/src/org/eclipse/equinox/common/tests/text/TextMatcherTest.java b/...lipse.equinox.common.tests/src/org/eclipse/equinox/common/tests/text/TextMatcherTest.java
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * Copyright (c) 2020, 2024 Thomas Wolf<[email protected]> and others.
+ *
+ * This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License 2.0
+ * which accompanies this distribution, and is available at
+ * https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *******************************************************************************/
+package org.eclipse.equinox.common.tests.text;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.eclipse.core.text.StringMatcher;
+import org.junit.Test;
+
+/**
+ * Tests for {@link StringMatcher}.
+ */
+public class TextMatcherTest {
+
+	@Test
+	public void testEmpty() {
+		assertTrue(new StringMatcher("", false, false).matchWords(""));
+		assertFalse(new StringMatcher("", false, false).matchWords("foo"));
+		assertFalse(new StringMatcher("", false, false).matchWords("foo bar baz"));
+		assertTrue(new StringMatcher("", false, true).matchWords(""));
+		assertFalse(new StringMatcher("", false, true).matchWords("foo"));
+		assertFalse(new StringMatcher("", false, true).matchWords("foo bar baz"));
+	}
+
+	@Test
+	public void testSuffixes() {
+		assertFalse(new StringMatcher("fo*ar", false, false).matchWords("foobar_123"));
+		assertFalse(new StringMatcher("fo*ar", false, false).matchWords("foobar_baz"));
+	}
+
+	@Test
+	public void testChinese() {
+		assertTrue(new StringMatcher("喜欢", false, false).matchWords("我 喜欢 吃 苹果。"));
+		// This test would work only if word-splitting used the ICU BreakIterator.
+		// "Words" are as shown above.
+		// assertTrue(new StringMatcher("喜欢", false).matchWords("我喜欢吃苹果。"));
+	}
+
+	@Test
+	public void testSingleWords() {
+		assertTrue(new StringMatcher("huhn", false, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("h?hner", false, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("h*hner", false, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("hühner", false, false).matchWords("hahn henne hühner küken huhn"));
+		// Full pattern must match word fully
+		assertFalse(new StringMatcher("h?hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertFalse(new StringMatcher("h*hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertFalse(new StringMatcher("hühner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
+
+		assertTrue(new StringMatcher("huhn", false, true).matchWords("hahn henne hühner küken huhn"));
+		assertFalse(new StringMatcher("h?hner", false, true).matchWords("hahn henne hühner küken huhn"));
+		assertFalse(new StringMatcher("h*hner", false, true).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("hühner", false, true).matchWords("hahn henne hühner küken huhn"));
+		// Full pattern must match word fully
+		assertFalse(new StringMatcher("h?hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
+		assertFalse(new StringMatcher("h*hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
+		assertFalse(new StringMatcher("hühner", false, true).match("hahn henne hühnerhof küken huhn"));
+	}
+
+	@Test
+	public void testMultipleWords() {
+		assertTrue(new StringMatcher("huhn h?hner", false, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("huhn h?hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertFalse(new StringMatcher("huhn h?hner", false, true).matchWords("hahn henne hühner küken huhn"));
+		assertFalse(new StringMatcher("huhn h?hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
+		assertTrue(new StringMatcher("huhn h*hner", false, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("huhn h*hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertFalse(new StringMatcher("huhn h*hner", false, true).matchWords("hahn henne hühner küken huhn"));
+		assertFalse(new StringMatcher("huhn h*hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
+		assertTrue(new StringMatcher("huhn hühner", false, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("huhn hühner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertTrue(new StringMatcher("huhn hühner", false, true).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("huhn hühner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
+	}
+
+	@Test
+	public void testCaseInsensitivity() {
+		assertTrue(new StringMatcher("Huhn HÜHNER", true, false).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("Huhn HÜHNER", true, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertTrue(new StringMatcher("Huhn HÜHNER", true, true).matchWords("hahn henne hühner küken huhn"));
+		assertTrue(new StringMatcher("Huhn HÜHNER", true, true).matchWords("hahn henne hühnerhof küken huhn"));
+		assertTrue(new StringMatcher("HüHnEr", true, false).matchWords("hahn henne hühner küken huhn"));
+		assertFalse(new StringMatcher("HüHnEr", true, false).matchWords("hahn henne hühnerhof küken huhn"));
+		assertTrue(new StringMatcher("HüHnEr", true, true).matchWords("hahn henne hühner küken huhn"));
+		assertFalse(new StringMatcher("HüHnEr", true, true).matchWords("hahn henne hühnerhof küken huhn"));
+	}
+}
diff --git a/bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF b/bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF
@@ -2,7 +2,7 @@ Manifest-Version: 1.0
 Bundle-ManifestVersion: 2
 Bundle-Name: %pluginName
 Bundle-SymbolicName: org.eclipse.equinox.common; singleton:=true
-Bundle-Version: 3.19.200.qualifier
+Bundle-Version: 3.20.0.qualifier
 Bundle-Localization: plugin
 Export-Package: org.eclipse.core.internal.boot;x-friends:="org.eclipse.core.resources,org.eclipse.pde.build",
  org.eclipse.core.internal.runtime;common=split;mandatory:=common;

diff --git a/bundles/org.eclipse.equinox.common/src/org/eclipse/core/text/StringMatcher.java b/bundles/org.eclipse.equinox.common/src/org/eclipse/core/text/StringMatcher.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2020 IBM Corporation and others.
+ * Copyright (c) 2000, 2024 IBM Corporation and others.
  *
  * This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License 2.0
@@ -10,11 +10,12 @@
  *
  * Contributors:
  *     IBM Corporation - initial API and implementation
+ *     Thomas Wolf, Patrick Ziegler - support for matching individual words
  *******************************************************************************/
 package org.eclipse.core.text;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
+import java.util.regex.Pattern;
 
 /**
  * A string pattern matcher. Supports '*' and '?' wildcards.
@@ -23,6 +24,10 @@
  */
 public final class StringMatcher {
 
+	private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$
+
+	private static final StringMatcher[] NO_MATCHERS = new StringMatcher[0];
+
 	private final String fPattern;
 
 	private final int fLength; // pattern length
@@ -35,6 +40,8 @@ public final class StringMatcher {
 
 	private boolean fHasTrailingStar;
 
+	private StringMatcher fParts[]; // the given pattern is split into space-separated sub-patterns
+
 	private String fSegments[]; // the given pattern is split into * separated segments
 
 	/* Minimum length required for a match: shorter texts cannot possibly match. */
@@ -352,6 +359,105 @@ public boolean match(String text, int start, int end) {
 		}
 		return i == segCount;
 	}
+
+	/**
+	 * Similar to {@link #match(String)}, this methods matches a pattern that may
+	 * contain the wildcards '?' or '*' against a text. However, the matching is not
+	 * only done on the full text, but also on individual words from the text, and
+	 * if the pattern contains whitespace, the pattern is split into sub-patterns
+	 * and those are matched, too.
+	 * <p>
+	 * The precise rules are:
+	 * </p>
+	 * <ul>
+	 * <li>If the full pattern matches the full text, the match succeeds.</li>
+	 * <li>If the full pattern matches a single word of the text, the match
+	 * succeeds.</li>
+	 * <li>If all sub-patterns match a prefix of the whole text or any prefix of any
+	 * word, the match succeeds.</li>
+	 * <li>Otherwise, the match fails.</li>
+	 * </ul>
+	 * <p>
+	 * An empty pattern matches only the empty text.
+	 * </p>
+	 * 
+	 * @since 3.20
+	 */
+	public boolean matchWords(String text) {
+		if (match(text)) {
+			return true;
+		}
+		String[] words = getWords(text);
+		if (match(this, words)) {
+			return true;
+		}
+		if (fParts == null) {
+			fParts = splitPattern();
+		}
+		if (fParts.length == 0) {
+			return false;
+		}
+		for (StringMatcher subMatcher : fParts) {
+			if (!subMatcher.match(text) && !match(subMatcher, words)) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	private StringMatcher[] splitPattern() {
+		String pattern = fPattern.trim();
+		if (pattern.isEmpty()) {
+			return NO_MATCHERS;
+		}
+		String[] subPatterns = pattern.split("\\s+"); //$NON-NLS-1$
+		if (subPatterns.length <= 1) {
+			return NO_MATCHERS;
+		}
+		List<StringMatcher> matchers = new ArrayList<>();
+		for (String s : subPatterns) {
+			if (s == null || s.isEmpty()) {
+				continue;
+			}
+			StringMatcher m = new StringMatcher(s, fIgnoreCase, fIgnoreWildCards);
+			m.usePrefixMatch();
+			matchers.add(m);
+		}
+		return matchers.toArray(StringMatcher[]::new);
+	}
+
+	/**
+	 * Determines whether the given {@code matcher} matches at least one of the
+	 * given {@code words}.
+	 *
+	 * @param matcher either this or a sub-matcher; must not be {@code null}
+	 * @param words   words to match; must not be {@code null} and not contain
+	 *                {@code null} words.
+	 * @return {@code true} if at least one word is matched by the pattern;
+	 *         {@code false} otherwise
+	 */
+	private static boolean match(StringMatcher matcher, String[] words) {
+		return Arrays.stream(words).anyMatch(word -> matcher.match(word, 0, word.length()));
+	}
+
+	/**
+	 * Splits a given text into words.
+	 *
+	 * @param text to split
+	 * @return the words of the text
+	 * @since 3.20
+	 */
+	public static String[] getWords(String text) {
+		// Previous implementations (in the removed StringMatcher) used the ICU
+		// BreakIterator to split the text. That worked well, but in 2020 it was decided
+		// to drop the dependency to the ICU library due to its size. The JDK
+		// BreakIterator splits differently, causing e.g.
+		// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
+		// appears to work well for programming language text, but may give sub-optimal
+		// results for natural languages. See also
+		// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
+		return NON_WORD.split(text);
+	}
 
 	/**
 	 * Returns the single segment for a matcher ignoring wildcards.