diff --git a/README.md b/README.md
index f655599..01a44ec 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,35 @@ $matches = WholeTextFinder::find($haystack, $needle);
```
+### Multi bytes strings
+
+Please note that `WholeTextFinder::find` function is multi byte safe and returns the correct word positions in the original phrase. Take a look here:
+
+```php
+//..
+use Finder\WholeTextFinder;
+
+$haystack = "La casa è bella bella";
+$needle = "bella";
+
+$matches = WholeTextFinder::find($haystack, $needle, true, true, true);
+
+// $matches is equals to:
+// array (
+// 0 =>
+// array (
+// 0 => 'bella',
+// 1 => 10,
+// ),
+// 1 =>
+// array (
+// 0 => 'bella',
+// 1 => 16,
+// ),
+//)
+
+```
+
## Find and Replace
There is also available a `findAndReplace` method:
@@ -74,14 +103,14 @@ $matches = WholeTextFinder::findAndReplace($haystack, $needle, $replacement);
// [0]=>
// string(6) "και"
// [1]=>
-// int(122)
+// int(66)
// }
// [1]=>
// array(2) {
// [0]=>
// string(6) "και"
// [1]=>
-// int(213)
+// int(123)
// }
// }
// }
diff --git a/src/Helper/Replacer.php b/src/Helper/Replacer.php
index 7255087..165f206 100644
--- a/src/Helper/Replacer.php
+++ b/src/Helper/Replacer.php
@@ -14,9 +14,9 @@ class Replacer
*
* $expected = "Beauty -> test Anti-Akne Gesichtsreiniger Schlankmacher XXX";
*
- * @param $pattern
- * @param $replacement
- * @param $haystack
+ * @param string $pattern
+ * @param string $replacement
+ * @param string $haystack
*
* @return string|string[]
*/
@@ -38,7 +38,7 @@ public static function replace($pattern, $replacement, $haystack)
*
* /(\|\|\|\||<.*?>|%{.*?})(*SKIP)(*FAIL)|ciao/iu
*
- * @param $pattern
+ * @param string $pattern
*
* @return string
*/
diff --git a/src/Helper/Strings.php b/src/Helper/Strings.php
index e2024e8..6cf18af 100644
--- a/src/Helper/Strings.php
+++ b/src/Helper/Strings.php
@@ -50,4 +50,14 @@ public static function token($length = 8)
return $key;
}
+
+ /**
+ * @param string $string
+ *
+ * @return bool
+ */
+ public static function isMultibyte($string)
+ {
+ return ((strlen($string) - mb_strlen($string)) > 0);
+ }
}
\ No newline at end of file
diff --git a/src/WholeTextFinder.php b/src/WholeTextFinder.php
index 034ad8a..64c9ab1 100644
--- a/src/WholeTextFinder.php
+++ b/src/WholeTextFinder.php
@@ -24,9 +24,54 @@ public static function find($haystack, $needle, $skipHtmlEntities = true, $exact
preg_match_all($patternAndHaystack['pattern'], $patternAndHaystack['haystack'], $matches, PREG_OFFSET_CAPTURE);
+ self::mbCorrectMatchPositions($patternAndHaystack['haystack'], $matches);
+
return $matches[0];
}
+ /**
+ * Correct position for multi byte strings
+ *
+ * @param string $haystack
+ * @param array $matches
+ *
+ * @return mixed
+ */
+ private static function mbCorrectMatchPositions( $haystack, &$matches)
+ {
+ if(!Strings::isMultibyte($haystack) ){
+ return $matches[0];
+ }
+
+ foreach ($matches[0] as $index => $match){
+ $word = $match[0];
+ $position = $match[1];
+
+ $correctPosition = self::mbFindTheCorrectPosition($haystack, $word, $position);
+ $matches[0][$index][1] = $correctPosition;
+ }
+ }
+
+ /**
+ * @param string $haystack
+ * @param string $word
+ * @param int $position
+ *
+ * @return int
+ */
+ private static function mbFindTheCorrectPosition( $haystack, $word, &$position)
+ {
+ $wordCheck = mb_substr($haystack, $position, mb_strlen($word));
+
+ if($wordCheck !== $word){
+ $position = $position - 1;
+
+ self::mbFindTheCorrectPosition($haystack, $word, $position);
+ }
+
+ return $position;
+ }
+
/**
* @param string $haystack
* @param string $needle
diff --git a/tests/StringsTest.php b/tests/StringsTest.php
index 73ab3d1..bee3b7b 100644
--- a/tests/StringsTest.php
+++ b/tests/StringsTest.php
@@ -17,4 +17,16 @@ public function html_entity_decode()
$this->assertEquals(Strings::htmlEntityDecode($input), $output);
}
+
+ /**
+ * @test
+ */
+ public function is_multibyte()
+ {
+ $string = "La casa e bella";
+ $string2 = "La casa è bella";
+
+ $this->assertFalse(Strings::isMultibyte($string));
+ $this->assertTrue(Strings::isMultibyte($string2));
+ }
}
\ No newline at end of file
diff --git a/tests/WholeTextFinderReplacementTest.php b/tests/WholeTextFinderReplacementTest.php
index 5029028..d27afbf 100644
--- a/tests/WholeTextFinderReplacementTest.php
+++ b/tests/WholeTextFinderReplacementTest.php
@@ -19,8 +19,8 @@ public function find_and_replace_test_on_greek_text()
$expected = [
'replacement' => 'Δύο παράγοντες καθόρισαν την αντίληψή μου για την Τενεσί Ουίλιαμς test τη σκηνική παρουσίαση των κειμένων: η Maria Britneva test η Annette Saddik, αφετέρου.',
'occurrencies' => [
- [$needle, 122],
- [$needle, 213],
+ [$needle, 66],
+ [$needle, 123],
],
];
$matches = WholeTextFinder::findAndReplace($haystack, $needle, $replacement);
@@ -64,7 +64,7 @@ public function find_and_replace_must_skip_matecat_ph_tags()
public function find_and_replace_must_skip_matecat_html_tags()
{
$haystack = "Beauty -> 0 Anti-Akne Gesichtsreiniger Schlankmacher XXX";
- $needle = 0;
+ $needle = "0";
$replacement = "test";
$expected = "Beauty -> test Anti-Akne Gesichtsreiniger Schlankmacher XXX";
diff --git a/tests/WholeTextFinderTest.php b/tests/WholeTextFinderTest.php
index 29feb14..11a2f30 100644
--- a/tests/WholeTextFinderTest.php
+++ b/tests/WholeTextFinderTest.php
@@ -7,6 +7,21 @@
class WholeTextFinderTest extends TestCase
{
+ /**
+ * @test
+ */
+ public function can_detect_positions()
+ {
+ $haystack = "La casa è bella bella";
+ $needle = "bella";
+
+ $matches = WholeTextFinder::find($haystack, $needle, true, true, true);
+
+ $this->assertCount(2, $matches);
+ $this->assertEquals(10, $matches[0][1]);
+ $this->assertEquals(16, $matches[1][1]);
+ }
+
/**
* @test
*/
@@ -138,17 +153,17 @@ public function find_should_return_correct_matches()
$expected = [
[
0 => 'ggio',
- 1 => 23
+ 1 => 22
]
];
$expected2 = [
[
0 => 'ggio',
- 1 => 18
+ 1 => 17
],
[
0 => 'ggio',
- 1 => 23
+ 1 => 22
]
];