Skip to content

Commit

Permalink
Merge pull request #4 from matecat/fix-positions
Browse files Browse the repository at this point in the history
WholeTextFinder::find multi byte safe
  • Loading branch information
mauretto78 authored Jan 5, 2022
2 parents d5ff07d + 47565f7 commit 43a8b2c
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 12 deletions.
33 changes: 31 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,35 @@ $matches = WholeTextFinder::find($haystack, $needle);

```

### Multi bytes strings

Please note that `WholeTextFinder::find` function is multi byte safe and returns the correct word positions in the original phrase. Take a look here:

```php
//..
use Finder\WholeTextFinder;

$haystack = "La casa è bella bella";
$needle = "bella";

$matches = WholeTextFinder::find($haystack, $needle, true, true, true);

// $matches is equals to:
// array (
// 0 =>
// array (
// 0 => 'bella',
// 1 => 10,
// ),
// 1 =>
// array (
// 0 => 'bella',
// 1 => 16,
// ),
//)

```

## Find and Replace

There is also available a `findAndReplace` method:
Expand All @@ -74,14 +103,14 @@ $matches = WholeTextFinder::findAndReplace($haystack, $needle, $replacement);
// [0]=>
// string(6) "και"
// [1]=>
// int(122)
// int(66)
// }
// [1]=>
// array(2) {
// [0]=>
// string(6) "και"
// [1]=>
// int(213)
// int(123)
// }
// }
// }
Expand Down
8 changes: 4 additions & 4 deletions src/Helper/Replacer.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ class Replacer
*
* $expected = "Beauty -> test Anti-Akne Gesichtsreiniger Schlankmacher <g id=\"2\">XXX</g>";
*
* @param $pattern
* @param $replacement
* @param $haystack
* @param string $pattern
* @param string $replacement
* @param string $haystack
*
* @return string|string[]
*/
Expand All @@ -38,7 +38,7 @@ public static function replace($pattern, $replacement, $haystack)
*
* /(\|\|\|\||<.*?>|%{.*?})(*SKIP)(*FAIL)|ciao/iu
*
* @param $pattern
* @param string $pattern
*
* @return string
*/
Expand Down
10 changes: 10 additions & 0 deletions src/Helper/Strings.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,14 @@ public static function token($length = 8)

return $key;
}

/**
* @param string $string
*
* @return bool
*/
public static function isMultibyte($string)
{
return ((strlen($string) - mb_strlen($string)) > 0);
}
}
45 changes: 45 additions & 0 deletions src/WholeTextFinder.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,54 @@ public static function find($haystack, $needle, $skipHtmlEntities = true, $exact

preg_match_all($patternAndHaystack['pattern'], $patternAndHaystack['haystack'], $matches, PREG_OFFSET_CAPTURE);

self::mbCorrectMatchPositions($patternAndHaystack['haystack'], $matches);

return $matches[0];
}

/**
* Correct position for multi byte strings
*
* @param string $haystack
* @param array $matches
*
* @return mixed
*/
private static function mbCorrectMatchPositions( $haystack, &$matches)
{
if(!Strings::isMultibyte($haystack) ){
return $matches[0];
}

foreach ($matches[0] as $index => $match){
$word = $match[0];
$position = $match[1];

$correctPosition = self::mbFindTheCorrectPosition($haystack, $word, $position);
$matches[0][$index][1] = $correctPosition;
}
}

/**
* @param string $haystack
* @param string $word
* @param int $position
*
* @return int
*/
private static function mbFindTheCorrectPosition( $haystack, $word, &$position)
{
$wordCheck = mb_substr($haystack, $position, mb_strlen($word));

if($wordCheck !== $word){
$position = $position - 1;

self::mbFindTheCorrectPosition($haystack, $word, $position);
}

return $position;
}

/**
* @param string $haystack
* @param string $needle
Expand Down
12 changes: 12 additions & 0 deletions tests/StringsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,16 @@ public function html_entity_decode()

$this->assertEquals(Strings::htmlEntityDecode($input), $output);
}

/**
* @test
*/
public function is_multibyte()
{
$string = "La casa e bella";
$string2 = "La casa è bella";

$this->assertFalse(Strings::isMultibyte($string));
$this->assertTrue(Strings::isMultibyte($string2));
}
}
6 changes: 3 additions & 3 deletions tests/WholeTextFinderReplacementTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ public function find_and_replace_test_on_greek_text()
$expected = [
'replacement' => 'Δύο παράγοντες καθόρισαν την αντίληψή μου για την Τενεσί Ουίλιαμς test τη σκηνική παρουσίαση των κειμένων: η Maria Britneva test η Annette Saddik, αφετέρου.',
'occurrencies' => [
[$needle, 122],
[$needle, 213],
[$needle, 66],
[$needle, 123],
],
];
$matches = WholeTextFinder::findAndReplace($haystack, $needle, $replacement);
Expand Down Expand Up @@ -64,7 +64,7 @@ public function find_and_replace_must_skip_matecat_ph_tags()
public function find_and_replace_must_skip_matecat_html_tags()
{
$haystack = "Beauty -> 0 Anti-Akne Gesichtsreiniger Schlankmacher <g id=\"2\">XXX</g>";
$needle = 0;
$needle = "0";
$replacement = "test";

$expected = "Beauty -> test Anti-Akne Gesichtsreiniger Schlankmacher <g id=\"2\">XXX</g>";
Expand Down
21 changes: 18 additions & 3 deletions tests/WholeTextFinderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@

class WholeTextFinderTest extends TestCase
{
/**
* @test
*/
public function can_detect_positions()
{
$haystack = "La casa è bella bella";
$needle = "bella";

$matches = WholeTextFinder::find($haystack, $needle, true, true, true);

$this->assertCount(2, $matches);
$this->assertEquals(10, $matches[0][1]);
$this->assertEquals(16, $matches[1][1]);
}

/**
* @test
*/
Expand Down Expand Up @@ -138,17 +153,17 @@ public function find_should_return_correct_matches()
$expected = [
[
0 => 'ggio',
1 => 23
1 => 22
]
];
$expected2 = [
[
0 => 'ggio',
1 => 18
1 => 17
],
[
0 => 'ggio',
1 => 23
1 => 22
]
];

Expand Down

0 comments on commit 43a8b2c

Please sign in to comment.