Skip to content

Commit

Permalink
IBX-7987: Added extension point to skip nodes while extracting text
Browse files Browse the repository at this point in the history
  • Loading branch information
adamwojs committed Mar 27, 2024
1 parent 6c49bc4 commit f9aaba8
Show file tree
Hide file tree
Showing 10 changed files with 250 additions and 2 deletions.
19 changes: 19 additions & 0 deletions src/bundle/Resources/config/settings/fieldtype_services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,22 @@ services:

Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor: ~

Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface:
alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter

Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface:
alias: Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodeFilterFactory

Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodeFilterFactory: ~

Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter:
arguments:
$filters: !tagged ibexa.field_type.richtext.text_extractor.node_filter

ibexa.field_type.richtext.text_extractor.node_filter.template:
class: Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface
factory: ['@Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface', 'createPathFilter']
arguments: ['eztemplate', 'ezconfig']
tags:
- { name: ibexa.field_type.richtext.text_extractor.node_filter }

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor;

interface NodeFilterFactoryInterface
{
public function createPathFilter(string ...$path): NodeFilterInterface;
}
22 changes: 22 additions & 0 deletions src/contracts/RichText/TextExtractor/NodeFilterInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor;

use DOMNode;

/**
* Filters nodes for text extraction.
*/
interface NodeFilterInterface
{
/**
* Return false to preserve the node, true to remove it.
*/
public function filter(DOMNode $node): bool;
}
14 changes: 13 additions & 1 deletion src/lib/RichText/TextExtractor/FullTextExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

use DOMDocument;
use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractorInterface;

/**
Expand All @@ -19,6 +20,13 @@
*/
final class FullTextExtractor implements TextExtractorInterface
{
private NodeFilterInterface $filter;

public function __construct(NodeFilterInterface $filter)
{
$this->filter = $filter;
}

public function extractText(DOMDocument $document): string
{
return null !== $document->documentElement
Expand All @@ -28,8 +36,12 @@ public function extractText(DOMDocument $document): string

private function extractTextFromNode(DOMNode $node): string
{
$text = '';
if ($this->filter->filter($node) === true) {
// Node is excluded
return '';
}

$text = '';
if ($node->childNodes !== null && $node->childNodes->count() > 0) {
foreach ($node->childNodes as $child) {
$text .= $this->extractTextFromNode($child);
Expand Down
37 changes: 37 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/AggregateFilter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class AggregateFilter implements NodeFilterInterface
{
/** @var \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[] */
private iterable $filters;

/**
* @param \Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface[]|iterable $filters
*/
public function __construct(iterable $filters)
{
$this->filters = $filters;
}

public function filter(DOMNode $node): bool
{
foreach ($this->filters as $filter) {
if ($filter->filter($node)) {
return true;
}
}

return false;
}
}
20 changes: 20 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/NodeFilterFactory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterFactoryInterface;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class NodeFilterFactory implements NodeFilterFactoryInterface
{
public function createPathFilter(string ...$path): NodeFilterInterface
{
return new NodePathFilter(...$path);
}
}
40 changes: 40 additions & 0 deletions src/lib/RichText/TextExtractor/NodeFilter/NodePathFilter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;

final class NodePathFilter implements NodeFilterInterface
{
/**
* Path in reverse order.
*
* @var string[]
*/
private array $path;

public function __construct(string ...$path)
{
$this->path = array_reverse($path);
}

public function filter(DOMNode $node): bool
{
foreach ($this->path as $name) {
if ($node === null || $node->nodeName !== $name) {
return false;
}

$node = $node->parentNode;
}

return true;
}
}
6 changes: 5 additions & 1 deletion tests/lib/RichText/TextExtractor/FullTextExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@

namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor;

use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\FieldTypeRichText\RichText\TextExtractor\FullTextExtractor;

final class FullTextExtractorTest extends BaseTest
{
protected function setUp(): void
{
$this->textExtractor = new FullTextExtractor();
$filter = $this->createMock(NodeFilterInterface::class);
$filter->method('filter')->willReturn(false);

$this->textExtractor = new FullTextExtractor($filter);
}

public function providerForTestExtractText(): array
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMNode;
use Ibexa\Contracts\FieldTypeRichText\RichText\TextExtractor\NodeFilterInterface;
use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\AggregateFilter;
use PHPUnit\Framework\TestCase;

final class AggregateFilterTest extends TestCase
{
public function testFilter(): void
{
$node = $this->createMock(DOMNode::class);

$filterA = $this->createMock(NodeFilterInterface::class);
$filterA->expects(self::once())->method('filter')->with($node)->willReturn(false);
$filterB = $this->createMock(NodeFilterInterface::class);
$filterB->expects(self::once())->method('filter')->with($node)->willReturn(true);
$filterC = $this->createMock(NodeFilterInterface::class);
$filterC->expects(self::never())->method('filter');

$aggregateFilter = new AggregateFilter([$filterA, $filterB, $filterC]);

self::assertTrue($aggregateFilter->filter($node));
}
}
47 changes: 47 additions & 0 deletions tests/lib/RichText/TextExtractor/NodeFilter/NodePathFilterTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Tests\FieldTypeRichText\RichText\TextExtractor\NodeFilter;

use DOMDocument;
use DOMNode;
use DOMNodeList;
use DOMXPath;
use Ibexa\FieldTypeRichText\RichText\TextExtractor\NodeFilter\NodePathFilter;
use PHPUnit\Framework\TestCase;
use RuntimeException;

final class NodePathFilterTest extends TestCase
{
public function testFilter(): void
{
$document = new DOMDocument();
$document->loadXML('<a><b><c></c></b></a>');

$nodeA = $this->getNode($document, '//a');
$nodeB = $this->getNode($document, '//b');
$nodeC = $this->getNode($document, '//c');

self::assertFalse((new NodePathFilter('b', 'c'))->filter($nodeB));
self::assertTrue((new NodePathFilter('b', 'c'))->filter($nodeC));
self::assertFalse((new NodePathFilter('a', 'b', 'c', 'd'))->filter($nodeA));
}

private function getNode(DOMDocument $document, string $expression): DOMNode
{
$xpath = new DOMXPath($document);

$results = $xpath->query($expression);
if ($results instanceof DOMNodeList) {
/** @var \DOMNode */
return $results->item(0);
}

throw new RuntimeException("Expression '$expression' did not return a node.");
}
}

0 comments on commit f9aaba8

Please sign in to comment.