From 20dedba69946ccbe47cfea873efd4f0ef8eac977 Mon Sep 17 00:00:00 2001 From: scripthunter7 <57285466+scripthunter7@users.noreply.github.com> Date: Sat, 18 Feb 2023 22:38:19 +0100 Subject: [PATCH] Improve `contains` This heuristic hopefully covers all cases, but the syntax is quite permissive --- src/syntax/index.js | 80 ++++- test/syntax/contains.test.js | 670 ++++++++++++++++++++++++++++++++++- 2 files changed, 714 insertions(+), 36 deletions(-) diff --git a/src/syntax/index.js b/src/syntax/index.js index 8bf520a..480e3fc 100644 --- a/src/syntax/index.js +++ b/src/syntax/index.js @@ -9,7 +9,6 @@ import { fork as originalFork, tokenize as originalTokenize, tokenTypes } from "css-tree"; import { CLOSING_PARENTHESIS, DOUBLE_QUOTE, ESCAPE, OPENING_PARENTHESIS, SPACE } from "../utils/constants.js"; -import { findNextUnescapedCharacter } from "../utils/string.js"; const selector = { parse() { @@ -106,23 +105,69 @@ const extCssContains = { * fix the token stream here to avoid this error. */ parse() { - // Empty pseudo-class - if (this.tokenType === tokenTypes.RightParenthesis) { - this.error('No parameter specified for "contains()" pseudo-class'); + // Get the current token stream + const tokens = this.dump(); + + // Note: CSSTree removes the whitespace token after the function name before calling this function + // So if we have :contains( something), our tokenIndex here points to "something" and not to the + // whitespace token. + + // :contains() case, but not :contains( something) case, so we check if the previous token is not a whitespace + if (this.tokenType === tokenTypes.RightParenthesis && tokens[this.tokenIndex - 1].type !== "whitespace-token") { + this.error('Empty parameter specified for "contains()" pseudo-class'); } - // Create a list for children - const children = this.createList(); + // Find the "real" start position of the contains() function's argument + let startPosition = -1; // Save the current position within the token stream (we will need to restore it later) - const prevTokenIndex = this.tokenIndex; + let prevTokenIndex = this.tokenIndex; + + for (let i = this.tokenIndex; i >= 0; i--) { + // Check token name to avoid :contains(join('')) case, where join( is also a function token + if ( + tokens[i].type === "function-token" && + ["contains(", "-abp-contains(", "has-text("].includes(tokens[i].chunk) + ) { + // Token after the function name is the first token of the argument + startPosition = this.getTokenStart(i + 1); + prevTokenIndex = i + 1; + break; + } + } + + // Theoretically, this should never happen, but we check it anyway + if (startPosition === -1) { + this.error("Cannot find the start position of the contains() function's argument"); + } + + // Create a list for children + const children = this.createList(); // Find the real end index of the contains() function's argument const sourceCode = this.source; - // Find the next unescaped closing parenthesis. Don't forget to set the start position. - const startPosition = this.getTokenStart(this.tokenIndex); - const endPosition = findNextUnescapedCharacter(sourceCode, CLOSING_PARENTHESIS, startPosition); + let endPosition = -1; + + // Parenthesis balance + let balance = 0; + + // Contains can contain any character, such as parentheses, quotes, etc, + // so a bit tricky to find the end position of the pseudo-class + for (let i = startPosition; i < sourceCode.length; i++) { + const char = sourceCode[i]; + + if (char === OPENING_PARENTHESIS && sourceCode[i - 1] !== ESCAPE) { + balance++; + } else if (char === CLOSING_PARENTHESIS && sourceCode[i - 1] !== ESCAPE) { + balance--; + + if (balance === -1) { + endPosition = i; + break; + } + } + } // If we cannot find the closing parenthesis, we cannot fix the token stream, so we // just return the children list as is. In this case, the parser will fail with an @@ -131,6 +176,11 @@ const extCssContains = { return children; } + // Empty parameter + if (endPosition === startPosition) { + this.error('No parameter specified for "contains()" pseudo-class'); + } + // Push content to children list children.push({ type: "Raw", @@ -159,10 +209,7 @@ const extCssContains = { this.next(); } - // But at this point we are just at the beginning of the contains() function's argument, - // so we need to skip the dummy spaces that we added to the source code before, which - // means +1 whitespace token that should be skipped: - this.next(); + // CSSTree will skip insterted whitespaces // Return the children list which contains the contains() function's argument as a Raw node return children; @@ -260,10 +307,7 @@ const xpath = { this.next(); } - // But at this point we are just at the beginning of the xpath() function's argument, - // so we need to skip the dummy spaces that we added to the source code before, which - // means +1 whitespace token that should be skipped: - this.next(); + // CSSTree will skip insterted whitespaces // Return the children list which contains the xpath() function's argument as a Raw node return children; diff --git a/test/syntax/contains.test.js b/test/syntax/contains.test.js index fe8ff49..e823969 100644 --- a/test/syntax/contains.test.js +++ b/test/syntax/contains.test.js @@ -1,3 +1,5 @@ +// Tests for :contains(), :-abp-contains() and :has-text() pseudo-classes + import { parse, generate, toPlainObject } from "../../src/syntax"; const parserConfig = { @@ -5,19 +7,523 @@ const parserConfig = { positions: true, }; -describe(":contains()", () => { - test("throws on invalid input", () => { - expect(() => parse(`:contains()`, parserConfig)).toThrow(); - expect(() => parse(`:contains( )`, parserConfig)).toThrow(); - expect(() => parse(`:contains( )`, parserConfig)).toThrow(); +describe(":contains()", () => { + test("throws on invalid input", () => { + expect(() => parse(`:contains()`, parserConfig)).toThrow( + 'Empty parameter specified for "contains()" pseudo-class' + ); + + expect(() => parse(`:contains(a`, parserConfig)).toThrow(); + expect(() => parse(`:contains(a'`, parserConfig)).toThrow(); + }); + + test("parses valid input properly", () => { + // One whitespace + expect(toPlainObject(parse(`:contains( )`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 12, + line: 1, + column: 13, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 12, + line: 1, + column: 13, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 11, + line: 1, + column: 12, + }, + }, + value: " ", + }, + ], + }, + ], + }); + + // Two whitespaces + expect(toPlainObject(parse(`:contains( )`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 13, + line: 1, + column: 14, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 13, + line: 1, + column: 14, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 12, + line: 1, + column: 13, + }, + }, + value: " ", + }, + ], + }, + ], + }); + + // Very simple input + expect(toPlainObject(parse(`:contains(aaa)`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 14, + line: 1, + column: 15, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 14, + line: 1, + column: 15, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 13, + line: 1, + column: 14, + }, + }, + value: "aaa", + }, + ], + }, + ], + }); + + // Space before input + expect(toPlainObject(parse(`:contains( aaa)`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 15, + line: 1, + column: 16, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 15, + line: 1, + column: 16, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 14, + line: 1, + column: 15, + }, + }, + value: " aaa", + }, + ], + }, + ], + }); + + // Space after input + expect(toPlainObject(parse(`:contains(aaa )`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 15, + line: 1, + column: 16, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 15, + line: 1, + column: 16, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 14, + line: 1, + column: 15, + }, + }, + value: "aaa ", + }, + ], + }, + ], + }); + + // Space before and after input + expect(toPlainObject(parse(`:contains( aaa )`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 16, + line: 1, + column: 17, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 16, + line: 1, + column: 17, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 15, + line: 1, + column: 16, + }, + }, + value: " aaa ", + }, + ], + }, + ], + }); + + // Space before and after input, with space in input + expect(toPlainObject(parse(`:contains( aaa bbb )`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 23, + line: 1, + column: 24, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 23, + line: 1, + column: 24, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 22, + line: 1, + column: 23, + }, + }, + value: " aaa bbb ", + }, + ], + }, + ], + }); + + // Space in input + expect(toPlainObject(parse(`:contains(aaa bbb ccc)`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 22, + line: 1, + column: 23, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 22, + line: 1, + column: 23, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 21, + line: 1, + column: 22, + }, + }, + value: "aaa bbb ccc", + }, + ], + }, + ], + }); - expect(() => parse(`:contains(a`, parserConfig)).toThrow(); - expect(() => parse(`:contains(a'`, parserConfig)).toThrow(); - }); + // Parenthesis in input + expect(toPlainObject(parse(`:contains((aaa))`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 16, + line: 1, + column: 17, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 16, + line: 1, + column: 17, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 15, + line: 1, + column: 16, + }, + }, + value: "(aaa)", + }, + ], + }, + ], + }); - test("parses valid input properly", () => { - // Very simple input - expect(toPlainObject(parse(`:contains(aaa)`, parserConfig))).toMatchObject({ + // Parenthesis in input, but a bit more complex + expect(toPlainObject(parse(`:contains((aaa)(bbb)\\)\\()`, parserConfig))).toMatchObject({ type: "Selector", loc: { source: "", @@ -27,9 +533,9 @@ describe(":contains()", () => { column: 1, }, end: { - offset: 14, + offset: 25, line: 1, - column: 15, + column: 26, }, }, children: [ @@ -43,9 +549,9 @@ describe(":contains()", () => { column: 1, }, end: { - offset: 14, + offset: 25, line: 1, - column: 15, + column: 26, }, }, name: "contains", @@ -60,12 +566,12 @@ describe(":contains()", () => { column: 11, }, end: { - offset: 13, + offset: 24, line: 1, - column: 14, + column: 25, }, }, - value: "aaa", + value: "(aaa)(bbb)\\)\\(", }, ], }, @@ -184,6 +690,62 @@ describe(":contains()", () => { ], }); + // Regular expression with parentheses + expect(toPlainObject(parse(`:contains(/^(a|b){3,}$/)`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 24, + line: 1, + column: 25, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 24, + line: 1, + column: 25, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 23, + line: 1, + column: 24, + }, + }, + value: "/^(a|b){3,}$/", + }, + ], + }, + ], + }); + // Regular expression with escaped parentheses expect(toPlainObject(parse(`:contains(/aaa\\(\\)/i)`, parserConfig))).toMatchObject({ type: "Selector", @@ -351,13 +913,85 @@ describe(":contains()", () => { }, ], }); + + // Functions + expect(toPlainObject(parse(`:contains(function(another('')))`, parserConfig))).toMatchObject({ + type: "Selector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 32, + line: 1, + column: 33, + }, + }, + children: [ + { + type: "PseudoClassSelector", + loc: { + source: "", + start: { + offset: 0, + line: 1, + column: 1, + }, + end: { + offset: 32, + line: 1, + column: 33, + }, + }, + name: "contains", + children: [ + { + type: "Raw", + loc: { + source: "", + start: { + offset: 10, + line: 1, + column: 11, + }, + end: { + offset: 31, + line: 1, + column: 32, + }, + }, + value: "function(another(''))", + }, + ], + }, + ], + }); }); test("generates valid input properly", () => { + expect(generate(parse(`:contains( )`, parserConfig))).toEqual(`:contains( )`); + expect(generate(parse(`:contains( )`, parserConfig))).toEqual(`:contains( )`); + expect(generate(parse(`:contains(aaa)`, parserConfig))).toEqual(`:contains(aaa)`); + expect(generate(parse(`:contains( aaa)`, parserConfig))).toEqual(`:contains( aaa)`); + expect(generate(parse(`:contains(aaa )`, parserConfig))).toEqual(`:contains(aaa )`); + expect(generate(parse(`:contains( aaa )`, parserConfig))).toEqual(`:contains( aaa )`); + expect(generate(parse(`:contains( aaa bbb )`, parserConfig))).toEqual(`:contains( aaa bbb )`); + expect(generate(parse(`:contains( aaa bbb )`, parserConfig))).toEqual(`:contains( aaa bbb )`); + expect(generate(parse(`:contains( aaa bbb ccc )`, parserConfig))).toEqual(`:contains( aaa bbb ccc )`); + + expect(generate(parse(`:contains((aaa))`, parserConfig))).toEqual(`:contains((aaa))`); + // TODO: "(aaa)(bbb)\\)\\("" is generated as "(aaa)(bbb) \\)\\(", but it should be "(aaa)(bbb)\\)\\(" - CSSTree related issue + // expect(generate(parse(`:contains((aaa)(bbb)\\)\\()`, parserConfig))).toEqual(`:contains((aaa)(bbb)\\)\\()`); + expect(generate(parse(`:contains(/aaa/)`, parserConfig))).toEqual(`:contains(/aaa/)`); expect(generate(parse(`:contains(/aaa/i)`, parserConfig))).toEqual(`:contains(/aaa/i)`); + expect(generate(parse(`:contains(/^(a|b){3,}$/)`, parserConfig))).toEqual(`:contains(/^(a|b){3,}$/)`); expect(generate(parse(`:contains(/aaa\\(\\)/i)`, parserConfig))).toEqual(`:contains(/aaa\\(\\)/i)`); + expect(generate(parse(`:contains(aaa'bbb)`, parserConfig))).toEqual(`:contains(aaa'bbb)`); expect(generate(parse(`:contains(aaa"bbb)`, parserConfig))).toEqual(`:contains(aaa"bbb)`); });