Skip to content

Commit

Permalink
[FEATURE] Introduce mandatory tag attributes (#74)
Browse files Browse the repository at this point in the history
* [FEATURE] Introduce mandatory tag attributes (Related: #71)
* [TASK] Add iframe example scenario (Related: #70)
* [TASK] Add tests for Attr flag assignment
* [TASK] Add example for Attr::MANDATORY to README.md
* [TASK] Resolve todos in ScenarioTest
  • Loading branch information
ohader authored Oct 6, 2022
1 parent 60bfdc7 commit 688533f
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 19 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ $behavior = (new Behavior())
(new Behavior\Tag('div', Behavior\Tag::ALLOW_CHILDREN))
->addAttrs(...$commonAttrs),
(new Behavior\Tag('a', Behavior\Tag::ALLOW_CHILDREN))
->addAttrs($hrefAttr, ...$commonAttrs),
->addAttrs(...$commonAttrs)
->addAttrs($hrefAttr->withFlags(Behavior\Attr::MANDATORY)),
(new Behavior\Tag('br'))
);

Expand All @@ -66,6 +67,7 @@ $sanitizer = new Sanitizer(...$visitors);

$html = <<< EOH
<div id="main">
<a class="no-href">invalidated, due to missing mandatory `href` attr</a>
<a href="https://typo3.org/" data-type="url" wrong-attr="is-removed">TYPO3</a><br>
(the <span>SPAN, SPAN, SPAN</span> tag shall be encoded to HTML entities)
</div>
Expand All @@ -78,6 +80,7 @@ will result in the following sanitized output

```html
<div id="main">
&lt;a class="no-href"&gt;invalidated, due to missing mandatory `href` attr&lt;/a&gt;
<a href="https://typo3.org/" data-type="url">TYPO3</a><br>
(the &lt;span&gt;SPAN, SPAN, SPAN&lt;/span&gt; tag shall be encoded to HTML entities)
</div>
Expand Down
25 changes: 25 additions & 0 deletions src/Behavior/Attr.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ class Attr
*/
public const MATCH_ALL_VALUES = 4;

/**
* whether the current attribute is mandatory for the tag
*/
public const MANDATORY = 8;

/**
* either specific attribute name (`class`) or a prefix
* (`data-`) in case corresponding NAME_PREFIX flag is set
Expand All @@ -69,6 +74,16 @@ public function __construct(string $name, int $flags = 0)
$this->flags = $flags;
}

public function withFlags(int $flags): self
{
if ($flags === $this->flags) {
return $this;
}
$target = clone $this;
$target->flags = $flags;
return $target;
}

/**
* Adds value items directly to the current `Attr` instance.
*
Expand Down Expand Up @@ -103,6 +118,11 @@ public function getName(): string
return $this->name;
}

public function getFlags(): int
{
return $this->flags;
}

/**
* @return AttrValueInterface[]
*/
Expand All @@ -129,6 +149,11 @@ public function shallMatchAllValues(): bool
return ($this->flags & self::MATCH_ALL_VALUES) === self::MATCH_ALL_VALUES;
}

public function isMandatory(): bool
{
return ($this->flags & self::MANDATORY) === self::MANDATORY;
}

public function matchesName(string $givenName): bool
{
$givenName = strtolower($givenName);
Expand Down
60 changes: 60 additions & 0 deletions src/Behavior/MultiTokenAttrValue.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php

declare(strict_types=1);

/*
* This file is part of the TYPO3 project.
*
* It is free software; you can redistribute it and/or modify it under the terms
* of the MIT License (MIT). For the full copyright and license information,
* please read the LICENSE file that was distributed with this source code.
*
* The TYPO3 project - inspiring people to share!
*/

namespace TYPO3\HtmlSanitizer\Behavior;

use LogicException;

class MultiTokenAttrValue implements AttrValueInterface
{
/**
* @var string
*/
protected $delimiter;

/**
* @var list<string>
*/
protected $tokens;

public function __construct(string $delimiter, string ...$tokens)
{
if ($delimiter === '') {
throw new LogicException('Delimiter cannot be empty', 1642111976);
}
$tokens = array_filter($tokens, [$this, 'keepNonEmpty']);
if ($tokens === []) {
throw new LogicException('Tokens cannot be empty or only empty strings', 1642111637);
}
$this->delimiter = $delimiter;
$this->tokens = $tokens;
}

public function matches(string $value): bool
{
$tokens = explode($this->delimiter, $value);
$tokens = array_filter($tokens, [$this, 'keepNonEmpty']);
// in case there is no token, the result implicit is `true`
// @todo has to be changed, in case mandatory tokes would be implemented
if (empty($tokens)) {
return true;
}
return array_diff($tokens, $this->tokens) === [];
}

protected function keepNonEmpty(string $item): bool
{
return $item !== '';
}
}
46 changes: 35 additions & 11 deletions src/Visitor/CommonVisitor.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,15 @@ public function enterNode(DOMNode $node): ?DOMNode
'behavior' => $this->behavior->getName(),
'nodeName' => $node->nodeName,
]);
if ($this->behavior->shallEncodeInvalidTag()) {
return $this->convertToText($node);
}
return null;
return $this->handleInvalidTag($node);
}
$node = $this->processAttributes($node, $tag);
$node = $this->processChildren($node, $tag);
// completely remove node, in case it is expected to exist with attributes only
if ($node !== null && $node->attributes->length === 0 && $tag->shallPurgeWithoutAttrs()) {
if ($node instanceof DOMElement && $node->attributes->length === 0 && $tag->shallPurgeWithoutAttrs()) {
return null;
}
$node = $this->handleMandatoryAttributes($node, $tag);
return $node;
}

Expand All @@ -107,10 +105,10 @@ public function leaveNode(DOMNode $node): ?DOMNode
return $node;
}

protected function processAttributes(?DOMElement $node, Behavior\Tag $tag): ?DOMElement
protected function processAttributes(?DOMNode $node, Behavior\Tag $tag): ?DOMNode
{
if ($node === null) {
return null;
if (!$node instanceof DOMElement) {
return $node;
}
// reverse processing of attributes,
// allowing to directly remove attribute nodes
Expand All @@ -126,10 +124,10 @@ protected function processAttributes(?DOMElement $node, Behavior\Tag $tag): ?DOM
return $node;
}

protected function processChildren(?DOMElement $node, Behavior\Tag $tag): ?DOMElement
protected function processChildren(?DOMNode $node, Behavior\Tag $tag): ?DOMNode
{
if ($node === null) {
return null;
if (!$node instanceof DOMElement) {
return $node;
}
if (!$tag->shallAllowChildren()
&& $node->childNodes->length > 0
Expand Down Expand Up @@ -170,6 +168,32 @@ protected function processAttribute(DOMElement $node, Behavior\Tag $tag, DOMAttr
}
}

protected function handleMandatoryAttributes(?DOMNode $node, Behavior\Tag $tag): ?DOMNode
{
if (!$node instanceof DOMElement) {
return $node;
}
foreach ($tag->getAttrs() as $attr) {
if ($attr->isMandatory() && !$node->hasAttribute($attr->getName())) {
$this->log('Missing mandatory attribute {nodeName}.{attrName}', [
'behavior' => $this->behavior->getName(),
'nodeName' => $node->nodeName,
'attrName' => $attr->getName(),
]);
return $this->handleInvalidTag($node);
}
}
return $node;
}

protected function handleInvalidTag(DOMNode $node): ?DOMNode
{
if ($this->behavior->shallEncodeInvalidTag()) {
return $this->convertToText($node);
}
return null;
}

/**
* @param DOMNode $node
* @param string $name
Expand Down
16 changes: 14 additions & 2 deletions tests/Behavior/AttrTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@

class AttrTest extends TestCase
{
/**
* @test
*/
public function withFlagsClonesInstance(): void
{
$attr = new Attr('test', Attr::BLUNT);
$modifiedAttr = $attr->withFlags(Attr::MANDATORY);
self::assertNotSame($attr, $modifiedAttr);
self::assertEquals(Attr::BLUNT, $attr->getFlags());
self::assertEquals(Attr::MANDATORY, $modifiedAttr->getFlags());
}

/**
* @test
*/
Expand All @@ -36,7 +48,7 @@ public function addValuesKeepsInstance(): void
/**
* @test
*/
public function withValuesKeepsInstance(): void
public function withValuesKeepsInstanceWhenNotModified(): void
{
$valueA = new DatasetAttrValue('a1', 'a2');
$valueB = new DatasetAttrValue('b1', 'b2');
Expand All @@ -51,7 +63,7 @@ public function withValuesKeepsInstance(): void
/**
* @test
*/
public function withValuesClonesInstance(): void
public function withValuesClonesInstanceWhenModified(): void
{
$valueA = new DatasetAttrValue('a1', 'a2');
$valueB = new DatasetAttrValue('b1', 'b2');
Expand Down
66 changes: 61 additions & 5 deletions tests/ScenarioTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

use PHPUnit\Framework\TestCase;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Behavior\Attr\UriAttrValueBuilder;
use TYPO3\HtmlSanitizer\Sanitizer;
use TYPO3\HtmlSanitizer\Visitor\CommonVisitor;

Expand Down Expand Up @@ -114,9 +115,9 @@ public function isJsonLdScriptAllowed(): void
'2:<script type="application/javascript">alert(2)</script>',
// `type` attr will be removed -> no attrs -> tag will be removed due to `PURGE_WITHOUT_ATTRS`
'3:<script type="application/ecmascript">alert(3)</script>',
// @todo not sanitized by `PURGE_WITHOUT_ATTRS` -> `type` attr value needs to be mandatory
// tag will be encoded due to incompleteness, mandatory `type` attr is missing
'4:<script id="identifier">alert(1)</script>',
// @todo not sanitized by `PURGE_WITHOUT_ATTRS` -> `type` attr value needs to be mandatory
// tag will be encoded due to incompleteness, mandatory `type` attr mismatches
'5:<script id="identifier" type="application/javascript">alert(2)</script>',
// tag will be removed due to `PURGE_WITHOUT_CHILDREN`
'6:<script type="application/ld+json"></script>',
Expand All @@ -128,8 +129,8 @@ public function isJsonLdScriptAllowed(): void
'1:',
'2:',
'3:',
'4:<script id="identifier">alert(1)</script>',
'5:<script id="identifier">alert(2)</script>',
'4:&lt;script id="identifier"&gt;alert(1)&lt;/script&gt;',
'5:&lt;script id="identifier"&gt;alert(2)&lt;/script&gt;',
'6:',
'7:<script type="application/ld+json">alert(4)</script>',
'8:<script type="application/ld+json">{"@id": "https://github.com/TYPO3/html-sanitizer"}</script>',
Expand All @@ -144,7 +145,7 @@ public function isJsonLdScriptAllowed(): void
Behavior\Tag::PURGE_WITHOUT_ATTRS + Behavior\Tag::PURGE_WITHOUT_CHILDREN + Behavior\Tag::ALLOW_CHILDREN
))->addAttrs(
(new Behavior\Attr('id')),
(new Behavior\Attr('type'))
(new Behavior\Attr('type', Behavior\Attr::MANDATORY))
->addValues(new Behavior\DatasetAttrValue('application/ld+json'))
)
);
Expand All @@ -154,4 +155,59 @@ public function isJsonLdScriptAllowed(): void
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
}

/**
* @test
*/
public function iframeSandboxIsAllowed(): void
{
$payload = implode("\n" , [
'1:<iframe src="https://example.org/"></iframe>',
'2:<iframe src="https://example.org/" sandbox></iframe>',
// `sandbox` will be removed, since token is not valid
'3:<iframe src="https://example.org/" sandbox="allow-non-existing-property"></iframe>',
'4:<iframe src="https://example.org/" allow="fullscreen" sandbox="allow-downloads allow-modals"></iframe>',
'5:<iframe src="https://example.org/" sandbox="allow-downloads allow-modals allow-orientation-lock allow-pointer-lock allow-popups allow-scripts"></iframe>',
]);
$expectation = implode("\n" , [
'1:&lt;iframe src="https://example.org/"&gt;&lt;/iframe&gt;',
'2:<iframe src="https://example.org/" sandbox></iframe>',
'3:&lt;iframe src="https://example.org/"&gt;&lt;/iframe&gt;',
'4:<iframe src="https://example.org/" allow="fullscreen" sandbox="allow-downloads allow-modals"></iframe>',
'5:<iframe src="https://example.org/" sandbox="allow-downloads allow-modals allow-orientation-lock allow-pointer-lock allow-popups allow-scripts"></iframe>',
]);

$behavior = (new Behavior())
->withFlags(Behavior::ENCODE_INVALID_TAG + Behavior::REMOVE_UNEXPECTED_CHILDREN)
->withName('scenario-test')
->withTags(
(new Behavior\Tag('iframe'))->addAttrs(
(new Behavior\Attr('id')),
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/iframe#attr-allow
(new Behavior\Attr('allow'))->withValues(
new Behavior\MultiTokenAttrValue(' ', 'fullscreen')
),
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/iframe#attr-sandbox
(new Behavior\Attr('sandbox', Behavior\Attr::MANDATORY))->withValues(
new Behavior\MultiTokenAttrValue(
' ',
'allow-downloads',
'allow-modals',
'allow-orientation-lock',
'allow-pointer-lock',
'allow-popups',
'allow-scripts'
)
),
(new Behavior\Attr('src'))->withValues(
...(new UriAttrValueBuilder())->allowSchemes('http', 'https')->getValues()
)
)
);

$sanitizer = new Sanitizer(
new CommonVisitor($behavior)
);
self::assertSame($expectation, $sanitizer->sanitize($payload));
}
}

0 comments on commit 688533f

Please sign in to comment.