Skip to content

Commit

Permalink
EZP-31248: Fixed handling non-printable characters for Search (#2904)
Browse files Browse the repository at this point in the history
* Removed all non-printable characters in String Field Value Mapper

* Implemented integration test (skipped for LSE)

Co-authored-by: Mateusz Bieniek <[email protected]>
  • Loading branch information
hgiesenow and mateuszbieniek authored May 6, 2020
1 parent 85a355c commit e4e7942
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 17 deletions.
100 changes: 90 additions & 10 deletions eZ/Publish/API/Repository/Tests/SearchServiceLocationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
*/
namespace eZ\Publish\API\Repository\Tests;

use eZ\Publish\API\Repository\Exceptions\NotImplementedException;
use EzSystems\EzPlatformSolrSearchEngine\Tests\SetupFactory\LegacySetupFactory as LegacySolrSetupFactory;
use eZ\Publish\API\Repository\Tests\SetupFactory\LegacyElasticsearch;
use eZ\Publish\Core\Repository\Values\Content\Location;
use eZ\Publish\API\Repository\Values\Content\Content;
use eZ\Publish\API\Repository\Values\Content\LocationQuery;
use eZ\Publish\API\Repository\Values\Content\Query\Criterion;
use eZ\Publish\API\Repository\Values\Content\Query\SortClause;
use eZ\Publish\API\Repository\Values\Content\Search\SearchResult;
use eZ\Publish\API\Repository\Values\Content\Search\SearchHit;
use eZ\Publish\API\Repository\Exceptions\NotImplementedException;
use eZ\Publish\API\Repository\Values\Content\Search\SearchResult;
use eZ\Publish\Core\Repository\Values\Content\Location;

/**
* Test case for Location operations in the SearchService.
Expand Down Expand Up @@ -178,6 +180,30 @@ protected function createMultipleCountriesContent()
return $content;
}

protected function createFolderWithNonPrintableUtf8Characters(): Content
{
$repository = $this->getRepository();
$contentTypeService = $repository->getContentTypeService();
$contentService = $repository->getContentService();

$contentType = $contentTypeService->loadContentTypeByIdentifier('folder');
$createStruct = $contentService->newContentCreateStruct($contentType, 'eng-GB');
$createStruct->remoteId = 'non-printable-char-folder-123';
$createStruct->alwaysAvailable = false;
$createStruct->setField(
'name',
utf8_decode("Non\x09Printable\x0EFolder")
);

$locationCreateStruct = $repository->getLocationService()->newLocationCreateStruct(2);
$draft = $contentService->createContent($createStruct, [$locationCreateStruct]);
$content = $contentService->publishVersion($draft->getVersionInfo());

$this->refreshSearch($repository);

return $content;
}

/**
* Test for the findLocations() method.
*
Expand Down Expand Up @@ -296,6 +322,67 @@ public function testFieldCollectionContainsNoMatch()
$this->assertEquals(0, $result->totalCount);
}

/**
* @covers \eZ\Publish\API\Repository\SearchService::findLocations
*/
public function testNonPrintableUtf8Characters(): void
{
$folder = $this->createFolderWithNonPrintableUtf8Characters();
$query = new LocationQuery(
[
'query' => new Criterion\Field(
'name',
Criterion\Operator::EQ,
utf8_decode("Non\x09Printable\x0EFolder")
),
]
);

$repository = $this->getRepository();
$searchService = $repository->getSearchService();
$result = $searchService->findLocations($query);

$this->assertEquals(1, $result->totalCount);
$this->assertEquals(
$folder->contentInfo->mainLocationId,
$result->searchHits[0]->valueObject->id
);
}

/**
* @covers \eZ\Publish\API\Repository\SearchService::findLocations
* @depends \eZ\Publish\API\Repository\Tests\SearchServiceLocationTest::testNonPrintableUtf8Characters
*/
public function testEscapedNonPrintableUtf8Characters(): void
{
$setupFactory = $this->getSetupFactory();

if (
!$setupFactory instanceof LegacyElasticsearch &&
!$setupFactory instanceof LegacySolrSetupFactory
) {
$this->markTestIncomplete(
'Field Value mappers are used only with Solr and Elastic search engines'
);
}

$query = new LocationQuery(
[
'query' => new Criterion\Field(
'name',
Criterion\Operator::EQ,
'Non PrintableFolder'
),
]
);

$repository = $this->getRepository();
$searchService = $repository->getSearchService();
$result = $searchService->findLocations($query);

$this->assertEquals(1, $result->totalCount);
}

/**
* @expectedException \eZ\Publish\API\Repository\Exceptions\InvalidArgumentException
*/
Expand Down Expand Up @@ -363,8 +450,6 @@ public function testFindLocationsWithNonSearchableField()
}

/**
* @param \eZ\Publish\API\Repository\Values\Content\Search\SearchResult $result
*
* @return array
*/
protected function mapResultLocationIds(SearchResult $result)
Expand Down Expand Up @@ -1275,7 +1360,6 @@ public function testVisibilityCriterionWithHiddenContent()
/**
* Assert that query result matches the given fixture.
*
* @param LocationQuery $query
* @param string $fixture
* @param callable|null $closure
*/
Expand Down Expand Up @@ -1348,8 +1432,6 @@ protected function assertQueryFixture(LocationQuery $query, $fixture, $closure =
/**
* Show a simplified view of the search result for manual introspection.
*
* @param SearchResult $result
*
* @return string
*/
protected function printResult(SearchResult $result)
Expand All @@ -1367,8 +1449,6 @@ protected function printResult(SearchResult $result)
*
* This leads to saner comparisons of results, since we do not get the full
* content objects every time.
*
* @param SearchResult $result
*/
protected function simplifySearchResult(SearchResult $result)
{
Expand Down
20 changes: 13 additions & 7 deletions eZ/Publish/Core/Search/Common/FieldValueMapper/StringMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@
namespace eZ\Publish\Core\Search\Common\FieldValueMapper;

use eZ\Publish\Core\Search\Common\FieldValueMapper;
use eZ\Publish\SPI\Search\FieldType;
use eZ\Publish\SPI\Search\Field;
use eZ\Publish\SPI\Search\FieldType;

/**
* Common string field value mapper implementation.
*/
class StringMapper extends FieldValueMapper
{
public const REPLACE_WITH_SPACE_PATTERN = '([\x09\x0B\x0C]+)';
public const REMOVE_PATTERN = '([\x00-\x08\x0E-\x1F]+)';

/**
* Check if field can be mapped.
*
* @param \eZ\Publish\SPI\Search\Field $field
*
* @return bool
*/
public function canMap(Field $field)
Expand All @@ -31,8 +32,6 @@ public function canMap(Field $field)
/**
* Map field value to a proper search engine representation.
*
* @param \eZ\Publish\SPI\Search\Field $field
*
* @return mixed
*/
public function map(Field $field)
Expand All @@ -49,9 +48,16 @@ public function map(Field $field)
*/
protected function convert($value)
{
// Remove non-printable characters
// Replace tab, vertical tab, form-feed chars to single space.
$value = preg_replace(
self::REPLACE_WITH_SPACE_PATTERN,
' ',
(string)$value
);

// Remove non-printable characters.
return preg_replace(
'([\x00-\x09\x0B\x0C\x1E\x1F]+)',
self::REMOVE_PATTERN,
'',
(string)$value
);
Expand Down

0 comments on commit e4e7942

Please sign in to comment.