From a4bb1de26a936764f1b513bf41e073200ffbf781 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Sat, 5 Oct 2024 20:57:01 -1000 Subject: [PATCH] fix: fix `` parsing (#1277) * fix: fix `<noframes>` parsing Fixes #972 --- packages/parse5/lib/parser/index.test.ts | 34 ++++++++++++++++++++++++ packages/parse5/lib/parser/index.ts | 13 ++++----- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/packages/parse5/lib/parser/index.test.ts b/packages/parse5/lib/parser/index.test.ts index 890594e0..249d00c7 100644 --- a/packages/parse5/lib/parser/index.test.ts +++ b/packages/parse5/lib/parser/index.test.ts @@ -3,6 +3,7 @@ import { parseFragment, parse } from 'parse5'; import { jest } from '@jest/globals'; import { generateParsingTests } from 'parse5-test-utils/utils/generate-parsing-tests.js'; import { treeAdapters } from 'parse5-test-utils/utils/common.js'; +import type { Element, TextNode } from '../tree-adapters/default.js'; generateParsingTests( 'parser', @@ -110,4 +111,37 @@ describe('parser', () => { expect(onItemPop).toHaveBeenLastCalledWith(bodyElement.childNodes[0], bodyElement); }); }); + + describe('rawtext parsing', () => { + it.each([ + ['iframe'], + ['noembed'], + ['noframes'], + ['noscript'], + ['script'], + ['style'], + ['textarea'], + ['title'], + ['xmp'], + ])('<%s>', (tagName) => { + const html = `<r><${tagName}><math id="</${tagName}><b>should be outside</b>">`; + const fragment = parseFragment(html); + + expect(fragment.childNodes.length).toBe(1); + const r = fragment.childNodes[0] as Element; + expect(r.nodeName).toBe('r'); + expect(r.childNodes).toHaveLength(3); + expect(r.childNodes.map((_) => _.nodeName)).toEqual([tagName, 'b', '#text']); + + const target = r.childNodes[0] as Element; + expect(target.childNodes).toHaveLength(1); + expect(target.childNodes[0].nodeName).toBe('#text'); + expect((target.childNodes[0] as TextNode).value).toBe('<math id="'); + + const b = r.childNodes[1] as Element; + expect(b.childNodes).toHaveLength(1); + expect(b.childNodes[0].nodeName).toBe('#text'); + expect((b.childNodes[0] as TextNode).value).toBe('should be outside'); + }); + }); }); diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index 5dd908ca..8a0fcb34 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -2195,9 +2195,9 @@ function iframeStartTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } -//NOTE: here we assume that we always act as an user agent with enabled plugins, so we parse -//<noembed> as rawtext. -function noembedStartTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagToken): void { +//NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse +//<noembed>/<noframes> as rawtext. +function rawTextStartTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagToken): void { p._switchToTextParsing(token, TokenizerMode.RAWTEXT); } @@ -2449,8 +2449,9 @@ function startTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagTo optgroupStartTagInBody(p, token); break; } - case $.NOEMBED: { - noembedStartTagInBody(p, token); + case $.NOEMBED: + case $.NOFRAMES: { + rawTextStartTagInBody(p, token); break; } case $.FRAMESET: { @@ -2463,7 +2464,7 @@ function startTagInBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagTo } case $.NOSCRIPT: { if (p.options.scriptingEnabled) { - noembedStartTagInBody(p, token); + rawTextStartTagInBody(p, token); } else { genericStartTagInBody(p, token); }