Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move TextMatcher from Platform UI to Equinox Common #720

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

@RunWith(Suite.class)
@SuiteClasses({ StringMatcherFindTest.class, StringMatcherPlainTest.class, StringMatcherWildcardTest.class,
StringMatcherPrefixTest.class, StringMatcherOtherTest.class })
StringMatcherPrefixTest.class, StringMatcherOtherTest.class, TextMatcherTest.class })
public class StringMatcherTests {
// empty
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*******************************************************************************
* Copyright (c) 2020, 2024 Thomas Wolf<[email protected]> and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*******************************************************************************/
package org.eclipse.equinox.common.tests.text;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import org.eclipse.core.text.StringMatcher;
import org.junit.Test;

/**
* Tests for {@link StringMatcher}.
*/
public class TextMatcherTest {

@Test
public void testEmpty() {
assertTrue(new StringMatcher("", false, false).matchWords(""));
assertFalse(new StringMatcher("", false, false).matchWords("foo"));
assertFalse(new StringMatcher("", false, false).matchWords("foo bar baz"));
assertTrue(new StringMatcher("", false, true).matchWords(""));
assertFalse(new StringMatcher("", false, true).matchWords("foo"));
assertFalse(new StringMatcher("", false, true).matchWords("foo bar baz"));
}

@Test
public void testSuffixes() {
assertFalse(new StringMatcher("fo*ar", false, false).matchWords("foobar_123"));
assertFalse(new StringMatcher("fo*ar", false, false).matchWords("foobar_baz"));
}

@Test
public void testChinese() {
assertTrue(new StringMatcher("喜欢", false, false).matchWords("我 喜欢 吃 苹果。"));
// This test would work only if word-splitting used the ICU BreakIterator.
// "Words" are as shown above.
// assertTrue(new StringMatcher("喜欢", false).matchWords("我喜欢吃苹果。"));
}

@Test
public void testSingleWords() {
assertTrue(new StringMatcher("huhn", false, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("h?hner", false, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("h*hner", false, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("hühner", false, false).matchWords("hahn henne hühner küken huhn"));
// Full pattern must match word fully
assertFalse(new StringMatcher("h?hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
assertFalse(new StringMatcher("h*hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
assertFalse(new StringMatcher("hühner", false, false).matchWords("hahn henne hühnerhof küken huhn"));

assertTrue(new StringMatcher("huhn", false, true).matchWords("hahn henne hühner küken huhn"));
assertFalse(new StringMatcher("h?hner", false, true).matchWords("hahn henne hühner küken huhn"));
assertFalse(new StringMatcher("h*hner", false, true).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("hühner", false, true).matchWords("hahn henne hühner küken huhn"));
// Full pattern must match word fully
assertFalse(new StringMatcher("h?hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
assertFalse(new StringMatcher("h*hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
assertFalse(new StringMatcher("hühner", false, true).match("hahn henne hühnerhof küken huhn"));
}

@Test
public void testMultipleWords() {
assertTrue(new StringMatcher("huhn h?hner", false, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("huhn h?hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
assertFalse(new StringMatcher("huhn h?hner", false, true).matchWords("hahn henne hühner küken huhn"));
assertFalse(new StringMatcher("huhn h?hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
assertTrue(new StringMatcher("huhn h*hner", false, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("huhn h*hner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
assertFalse(new StringMatcher("huhn h*hner", false, true).matchWords("hahn henne hühner küken huhn"));
assertFalse(new StringMatcher("huhn h*hner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
assertTrue(new StringMatcher("huhn hühner", false, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("huhn hühner", false, false).matchWords("hahn henne hühnerhof küken huhn"));
assertTrue(new StringMatcher("huhn hühner", false, true).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("huhn hühner", false, true).matchWords("hahn henne hühnerhof küken huhn"));
}

@Test
public void testCaseInsensitivity() {
assertTrue(new StringMatcher("Huhn HÜHNER", true, false).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("Huhn HÜHNER", true, false).matchWords("hahn henne hühnerhof küken huhn"));
assertTrue(new StringMatcher("Huhn HÜHNER", true, true).matchWords("hahn henne hühner küken huhn"));
assertTrue(new StringMatcher("Huhn HÜHNER", true, true).matchWords("hahn henne hühnerhof küken huhn"));
assertTrue(new StringMatcher("HüHnEr", true, false).matchWords("hahn henne hühner küken huhn"));
assertFalse(new StringMatcher("HüHnEr", true, false).matchWords("hahn henne hühnerhof küken huhn"));
assertTrue(new StringMatcher("HüHnEr", true, true).matchWords("hahn henne hühner küken huhn"));
assertFalse(new StringMatcher("HüHnEr", true, true).matchWords("hahn henne hühnerhof küken huhn"));
}
}
2 changes: 1 addition & 1 deletion bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Manifest-Version: 1.0
Bundle-ManifestVersion: 2
Bundle-Name: %pluginName
Bundle-SymbolicName: org.eclipse.equinox.common; singleton:=true
Bundle-Version: 3.19.200.qualifier
Bundle-Version: 3.20.0.qualifier
Bundle-Localization: plugin
Export-Package: org.eclipse.core.internal.boot;x-friends:="org.eclipse.core.resources,org.eclipse.pde.build",
org.eclipse.core.internal.runtime;common=split;mandatory:=common;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2020 IBM Corporation and others.
* Copyright (c) 2000, 2024 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
Expand All @@ -10,11 +10,12 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
* Thomas Wolf, Patrick Ziegler - support for matching individual words
*******************************************************************************/
package org.eclipse.core.text;

import java.util.ArrayList;
import java.util.List;
import java.util.*;
import java.util.regex.Pattern;

/**
* A string pattern matcher. Supports '*' and '?' wildcards.
Expand All @@ -23,6 +24,10 @@
*/
public final class StringMatcher {

private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$

private static final StringMatcher[] NO_MATCHERS = new StringMatcher[0];

private final String fPattern;

private final int fLength; // pattern length
Expand All @@ -35,6 +40,8 @@ public final class StringMatcher {

private boolean fHasTrailingStar;

private StringMatcher fParts[]; // the given pattern is split into space-separated sub-patterns

private String fSegments[]; // the given pattern is split into * separated segments

/* Minimum length required for a match: shorter texts cannot possibly match. */
Expand Down Expand Up @@ -352,6 +359,105 @@ public boolean match(String text, int start, int end) {
}
return i == segCount;
}

/**
* Similar to {@link #match(String)}, this methods matches a pattern that may
* contain the wildcards '?' or '*' against a text. However, the matching is not
* only done on the full text, but also on individual words from the text, and
* if the pattern contains whitespace, the pattern is split into sub-patterns
* and those are matched, too.
* <p>
* The precise rules are:
* </p>
* <ul>
* <li>If the full pattern matches the full text, the match succeeds.</li>
* <li>If the full pattern matches a single word of the text, the match
* succeeds.</li>
* <li>If all sub-patterns match a prefix of the whole text or any prefix of any
* word, the match succeeds.</li>
* <li>Otherwise, the match fails.</li>
* </ul>
* <p>
* An empty pattern matches only the empty text.
* </p>
*
* @since 3.20
*/
public boolean matchWords(String text) {
if (match(text)) {
return true;
}
String[] words = getWords(text);
if (match(this, words)) {
return true;
}
if (fParts == null) {
fParts = splitPattern();
}
if (fParts.length == 0) {
return false;
}
for (StringMatcher subMatcher : fParts) {
if (!subMatcher.match(text) && !match(subMatcher, words)) {
return false;
}
}
return true;
}

private StringMatcher[] splitPattern() {
String pattern = fPattern.trim();
if (pattern.isEmpty()) {
return NO_MATCHERS;
}
String[] subPatterns = pattern.split("\\s+"); //$NON-NLS-1$
if (subPatterns.length <= 1) {
return NO_MATCHERS;
}
List<StringMatcher> matchers = new ArrayList<>();
for (String s : subPatterns) {
if (s == null || s.isEmpty()) {
continue;
}
StringMatcher m = new StringMatcher(s, fIgnoreCase, fIgnoreWildCards);
m.usePrefixMatch();
matchers.add(m);
}
return matchers.toArray(StringMatcher[]::new);
}

/**
* Determines whether the given {@code matcher} matches at least one of the
* given {@code words}.
*
* @param matcher either this or a sub-matcher; must not be {@code null}
* @param words words to match; must not be {@code null} and not contain
* {@code null} words.
* @return {@code true} if at least one word is matched by the pattern;
* {@code false} otherwise
*/
private static boolean match(StringMatcher matcher, String[] words) {
return Arrays.stream(words).anyMatch(word -> matcher.match(word, 0, word.length()));
}

/**
* Splits a given text into words.
*
* @param text to split
* @return the words of the text
* @since 3.20
*/
public static String[] getWords(String text) {
// Previous implementations (in the removed StringMatcher) used the ICU
// BreakIterator to split the text. That worked well, but in 2020 it was decided
// to drop the dependency to the ICU library due to its size. The JDK
// BreakIterator splits differently, causing e.g.
// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
// appears to work well for programming language text, but may give sub-optimal
// results for natural languages. See also
// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
return NON_WORD.split(text);
}

/**
* Returns the single segment for a matcher ignoring wildcards.
Expand Down
Loading