-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
IBX-7987: Added extension point to skip nodes while extracting text
- Loading branch information
Showing
10 changed files
with
250 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
src/contracts/RichText/TextExtractor/NodeFilterFactoryInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor; | ||
|
||
interface NodeFilterFactoryInterface | ||
{ | ||
public function createPathFilter(string ...$path): NodeFilterInterface; | ||
} |
22 changes: 22 additions & 0 deletions
22
src/contracts/RichText/TextExtractor/NodeFilterInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor; | ||
|
||
use DOMNode; | ||
|
||
/** | ||
* Filters nodes for text extraction. | ||
*/ | ||
interface NodeFilterInterface | ||
{ | ||
/** | ||
* Return false to preserve the node, true to remove it. | ||
*/ | ||
public function filter(DOMNode $node): bool; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter; | ||
|
||
use DOMNode; | ||
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface; | ||
|
||
final class AggregateFilter implements NodeFilterInterface | ||
{ | ||
/** @var \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[] */ | ||
private iterable $filters; | ||
|
||
/** | ||
* @param \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[]|iterable $filters | ||
*/ | ||
public function __construct(iterable $filters) | ||
{ | ||
$this->filters = $filters; | ||
} | ||
|
||
public function filter(DOMNode $node): bool | ||
{ | ||
foreach ($this->filters as $filter) { | ||
if ($filter->filter($node)) { | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter; | ||
|
||
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface; | ||
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface; | ||
|
||
final class NodeFilterFactory implements NodeFilterFactoryInterface | ||
{ | ||
public function createPathFilter(string ...$path): NodeFilterInterface | ||
{ | ||
return new NodePathFilter(...$path); | ||
} | ||
} |
40 changes: 40 additions & 0 deletions
40
src/lib/RichText/TextExtractor/NodeFilter/NodePathFilter.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter; | ||
|
||
use DOMNode; | ||
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface; | ||
|
||
final class NodePathFilter implements NodeFilterInterface | ||
{ | ||
/** | ||
* Path in reverse order. | ||
* | ||
* @var string[] | ||
*/ | ||
private array $path; | ||
|
||
public function __construct(string ...$path) | ||
{ | ||
$this->path = array_reverse($path); | ||
} | ||
|
||
public function filter(DOMNode $node): bool | ||
{ | ||
foreach ($this->path as $name) { | ||
if ($node === null || $node->nodeName !== $name) { | ||
return false; | ||
} | ||
|
||
$node = $node->parentNode; | ||
} | ||
|
||
return true; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
tests/lib/RichText/TextExtractor/NodeFilter/AggregateFilterTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor\NodeFilter; | ||
|
||
use DOMNode; | ||
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface; | ||
use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
final class AggregateFilterTest extends TestCase | ||
{ | ||
public function testFilter(): void | ||
{ | ||
$node = $this->createMock(DOMNode::class); | ||
|
||
$filterA = $this->createMock(NodeFilterInterface::class); | ||
$filterA->expects(self::once())->method('filter')->with($node)->willReturn(false); | ||
$filterB = $this->createMock(NodeFilterInterface::class); | ||
$filterB->expects(self::once())->method('filter')->with($node)->willReturn(true); | ||
$filterC = $this->createMock(NodeFilterInterface::class); | ||
$filterC->expects(self::never())->method('filter'); | ||
|
||
$aggregateFilter = new AggregateFilter([$filterA, $filterB, $filterC]); | ||
|
||
self::assertTrue($aggregateFilter->filter($node)); | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
<?php | ||
|
||
/** | ||
* @copyright Copyright (C) Ibexa AS. All rights reserved. | ||
* @license For full copyright and license information view LICENSE file distributed with this source code. | ||
*/ | ||
declare(strict_types=1); | ||
|
||
namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor\NodeFilter; | ||
|
||
use DOMDocument; | ||
use DOMNode; | ||
use DOMNodeList; | ||
use DOMXPath; | ||
use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodePathFilter; | ||
use PHPUnit\Framework\TestCase; | ||
use RuntimeException; | ||
|
||
final class NodePathFilterTest extends TestCase | ||
{ | ||
public function testFilter(): void | ||
{ | ||
$document = new DOMDocument(); | ||
$document->loadXML('<a><b><c></c></b></a>'); | ||
|
||
$nodeA = $this->getNode($document, '//a'); | ||
$nodeB = $this->getNode($document, '//b'); | ||
$nodeC = $this->getNode($document, '//c'); | ||
|
||
self::assertFalse((new NodePathFilter('b', 'c'))->filter($nodeB)); | ||
self::assertTrue((new NodePathFilter('b', 'c'))->filter($nodeC)); | ||
self::assertFalse((new NodePathFilter('a', 'b', 'c', 'd'))->filter($nodeA)); | ||
} | ||
|
||
private function getNode(DOMDocument $document, string $expression): DOMNode | ||
{ | ||
$xpath = new DOMXPath($document); | ||
|
||
$results = $xpath->query($expression); | ||
if ($results instanceof DOMNodeList) { | ||
/** @var \DOMNode */ | ||
return $results->item(0); | ||
} | ||
|
||
throw new RuntimeException("Expression '$expression' did not return a node."); | ||
} | ||
} |