Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EZP-31248: Fixed handling non-printable characters for Search #2904

Merged
merged 10 commits into from
May 6, 2020
100 changes: 90 additions & 10 deletions eZ/Publish/API/Repository/Tests/SearchServiceLocationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
*/
namespace eZ\Publish\API\Repository\Tests;

use eZ\Publish\API\Repository\Exceptions\NotImplementedException;
use EzSystems\EzPlatformSolrSearchEngine\Tests\SetupFactory\LegacySetupFactory as LegacySolrSetupFactory;
use eZ\Publish\API\Repository\Tests\SetupFactory\LegacyElasticsearch;
use eZ\Publish\Core\Repository\Values\Content\Location;
use eZ\Publish\API\Repository\Values\Content\Content;
use eZ\Publish\API\Repository\Values\Content\LocationQuery;
use eZ\Publish\API\Repository\Values\Content\Query\Criterion;
use eZ\Publish\API\Repository\Values\Content\Query\SortClause;
use eZ\Publish\API\Repository\Values\Content\Search\SearchResult;
use eZ\Publish\API\Repository\Values\Content\Search\SearchHit;
use eZ\Publish\API\Repository\Exceptions\NotImplementedException;
use eZ\Publish\API\Repository\Values\Content\Search\SearchResult;
use eZ\Publish\Core\Repository\Values\Content\Location;

/**
* Test case for Location operations in the SearchService.
Expand Down Expand Up @@ -101,6 +103,30 @@ protected function createMultipleCountriesContent()
return $content;
}

protected function createFolderWithNonPrintableUtf8Characters(): Content
{
$repository = $this->getRepository();
$contentTypeService = $repository->getContentTypeService();
$contentService = $repository->getContentService();

$contentType = $contentTypeService->loadContentTypeByIdentifier('folder');
$createStruct = $contentService->newContentCreateStruct($contentType, 'eng-GB');
$createStruct->remoteId = 'non-printable-char-folder-123';
$createStruct->alwaysAvailable = false;
$createStruct->setField(
'name',
utf8_decode("Non\x09Printable\x0EFolder")
);

$locationCreateStruct = $repository->getLocationService()->newLocationCreateStruct(2);
$draft = $contentService->createContent($createStruct, [$locationCreateStruct]);
$content = $contentService->publishVersion($draft->getVersionInfo());

$this->refreshSearch($repository);

return $content;
}

/**
* Test for the findLocations() method.
*
Expand Down Expand Up @@ -157,6 +183,67 @@ public function testFieldCollectionContainsNoMatch()
$this->assertEquals(0, $result->totalCount);
}

/**
* @covers \eZ\Publish\API\Repository\SearchService::findLocations
*/
public function testNonPrintableUtf8Characters(): void
{
$folder = $this->createFolderWithNonPrintableUtf8Characters();
$query = new LocationQuery(
[
'query' => new Criterion\Field(
'name',
Criterion\Operator::EQ,
utf8_decode("Non\x09Printable\x0EFolder")
alongosz marked this conversation as resolved.
Show resolved Hide resolved
),
]
);

$repository = $this->getRepository();
$searchService = $repository->getSearchService();
$result = $searchService->findLocations($query);

$this->assertEquals(1, $result->totalCount);
$this->assertEquals(
$folder->contentInfo->mainLocationId,
$result->searchHits[0]->valueObject->id
);
}

/**
* @covers \eZ\Publish\API\Repository\SearchService::findLocations
* @depends \eZ\Publish\API\Repository\Tests\SearchServiceLocationTest::testNonPrintableUtf8Characters
*/
public function testEscapedNonPrintableUtf8Characters(): void
{
$setupFactory = $this->getSetupFactory();

if (
!$setupFactory instanceof LegacyElasticsearch AND
!$setupFactory instanceof LegacySolrSetupFactory
andrerom marked this conversation as resolved.
Show resolved Hide resolved
) {
$this->markTestIncomplete(
'Field Value mappers are used only with Solr and Elastic search engines'
);
}

$query = new LocationQuery(
[
'query' => new Criterion\Field(
'name',
Criterion\Operator::EQ,
'Non PrintableFolder'
),
]
);

$repository = $this->getRepository();
$searchService = $repository->getSearchService();
$result = $searchService->findLocations($query);

$this->assertEquals(1, $result->totalCount);
}

/**
* @expectedException \eZ\Publish\API\Repository\Exceptions\InvalidArgumentException
*/
Expand Down Expand Up @@ -224,8 +311,6 @@ public function testFindLocationsWithNonSearchableField()
}

/**
* @param \eZ\Publish\API\Repository\Values\Content\Search\SearchResult $result
*
* @return array
*/
protected function mapResultLocationIds(SearchResult $result)
Expand Down Expand Up @@ -1136,7 +1221,6 @@ public function testVisibilityCriterionWithHiddenContent()
/**
* Assert that query result matches the given fixture.
*
* @param LocationQuery $query
* @param string $fixture
* @param null|callable $closure
*/
Expand Down Expand Up @@ -1209,8 +1293,6 @@ protected function assertQueryFixture(LocationQuery $query, $fixture, $closure =
/**
* Show a simplified view of the search result for manual introspection.
*
* @param SearchResult $result
*
* @return string
*/
protected function printResult(SearchResult $result)
Expand All @@ -1228,8 +1310,6 @@ protected function printResult(SearchResult $result)
*
* This leads to saner comparisons of results, since we do not get the full
* content objects every time.
*
* @param SearchResult $result
*/
protected function simplifySearchResult(SearchResult $result)
{
Expand Down
20 changes: 13 additions & 7 deletions eZ/Publish/Core/Search/Common/FieldValueMapper/StringMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@
namespace eZ\Publish\Core\Search\Common\FieldValueMapper;

use eZ\Publish\Core\Search\Common\FieldValueMapper;
use eZ\Publish\SPI\Search\FieldType;
use eZ\Publish\SPI\Search\Field;
use eZ\Publish\SPI\Search\FieldType;

/**
* Common string field value mapper implementation.
*/
class StringMapper extends FieldValueMapper
{
public const REPLACE_WITH_SPACE_PATTERN = '([\x09\x0B\x0C]+)';
public const REMOVE_PATTERN = '([\x00-\x08\x0E-\x1F]+)';

/**
* Check if field can be mapped.
*
* @param \eZ\Publish\SPI\Search\Field $field
*
* @return bool
*/
public function canMap(Field $field)
Expand All @@ -33,8 +34,6 @@ public function canMap(Field $field)
/**
* Map field value to a proper search engine representation.
*
* @param \eZ\Publish\SPI\Search\Field $field
*
* @return mixed
*/
public function map(Field $field)
Expand All @@ -51,9 +50,16 @@ public function map(Field $field)
*/
protected function convert($value)
{
// Remove non-printable characters
// Replace tab, vertical tab, form-feed chars to single space.
$value = preg_replace(
self::REPLACE_WITH_SPACE_PATTERN,
' ',
(string)$value
);

// Remove non-printable characters.
return preg_replace(
'([\x00-\x09\x0B\x0C\x1E\x1F]+)',
self::REMOVE_PATTERN,
'',
(string)$value
);
Expand Down