Skip to content

Commit

Permalink
Improve contains
Browse files Browse the repository at this point in the history
This heuristic hopefully covers all cases, but the syntax is quite permissive
  • Loading branch information
scripthunter7 committed Feb 18, 2023
1 parent 6fa8b12 commit 20dedba
Show file tree
Hide file tree
Showing 2 changed files with 714 additions and 36 deletions.
80 changes: 62 additions & 18 deletions src/syntax/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import { fork as originalFork, tokenize as originalTokenize, tokenTypes } from "css-tree";
import { CLOSING_PARENTHESIS, DOUBLE_QUOTE, ESCAPE, OPENING_PARENTHESIS, SPACE } from "../utils/constants.js";
import { findNextUnescapedCharacter } from "../utils/string.js";

const selector = {
parse() {
Expand Down Expand Up @@ -106,23 +105,69 @@ const extCssContains = {
* fix the token stream here to avoid this error.
*/
parse() {
// Empty pseudo-class
if (this.tokenType === tokenTypes.RightParenthesis) {
this.error('No parameter specified for "contains()" pseudo-class');
// Get the current token stream
const tokens = this.dump();

// Note: CSSTree removes the whitespace token after the function name before calling this function
// So if we have :contains( something), our tokenIndex here points to "something" and not to the
// whitespace token.

// :contains() case, but not :contains( something) case, so we check if the previous token is not a whitespace
if (this.tokenType === tokenTypes.RightParenthesis && tokens[this.tokenIndex - 1].type !== "whitespace-token") {
this.error('Empty parameter specified for "contains()" pseudo-class');
}

// Create a list for children
const children = this.createList();
// Find the "real" start position of the contains() function's argument
let startPosition = -1;

// Save the current position within the token stream (we will need to restore it later)
const prevTokenIndex = this.tokenIndex;
let prevTokenIndex = this.tokenIndex;

for (let i = this.tokenIndex; i >= 0; i--) {
// Check token name to avoid :contains(join('')) case, where join( is also a function token
if (
tokens[i].type === "function-token" &&
["contains(", "-abp-contains(", "has-text("].includes(tokens[i].chunk)
) {
// Token after the function name is the first token of the argument
startPosition = this.getTokenStart(i + 1);
prevTokenIndex = i + 1;
break;
}
}

// Theoretically, this should never happen, but we check it anyway
if (startPosition === -1) {
this.error("Cannot find the start position of the contains() function's argument");
}

// Create a list for children
const children = this.createList();

// Find the real end index of the contains() function's argument
const sourceCode = this.source;

// Find the next unescaped closing parenthesis. Don't forget to set the start position.
const startPosition = this.getTokenStart(this.tokenIndex);
const endPosition = findNextUnescapedCharacter(sourceCode, CLOSING_PARENTHESIS, startPosition);
let endPosition = -1;

// Parenthesis balance
let balance = 0;

// Contains can contain any character, such as parentheses, quotes, etc,
// so a bit tricky to find the end position of the pseudo-class
for (let i = startPosition; i < sourceCode.length; i++) {
const char = sourceCode[i];

if (char === OPENING_PARENTHESIS && sourceCode[i - 1] !== ESCAPE) {
balance++;
} else if (char === CLOSING_PARENTHESIS && sourceCode[i - 1] !== ESCAPE) {
balance--;

if (balance === -1) {
endPosition = i;
break;
}
}
}

// If we cannot find the closing parenthesis, we cannot fix the token stream, so we
// just return the children list as is. In this case, the parser will fail with an
Expand All @@ -131,6 +176,11 @@ const extCssContains = {
return children;
}

// Empty parameter
if (endPosition === startPosition) {
this.error('No parameter specified for "contains()" pseudo-class');
}

// Push content to children list
children.push({
type: "Raw",
Expand Down Expand Up @@ -159,10 +209,7 @@ const extCssContains = {
this.next();
}

// But at this point we are just at the beginning of the contains() function's argument,
// so we need to skip the dummy spaces that we added to the source code before, which
// means +1 whitespace token that should be skipped:
this.next();
// CSSTree will skip insterted whitespaces

// Return the children list which contains the contains() function's argument as a Raw node
return children;
Expand Down Expand Up @@ -260,10 +307,7 @@ const xpath = {
this.next();
}

// But at this point we are just at the beginning of the xpath() function's argument,
// so we need to skip the dummy spaces that we added to the source code before, which
// means +1 whitespace token that should be skipped:
this.next();
// CSSTree will skip insterted whitespaces

// Return the children list which contains the xpath() function's argument as a Raw node
return children;
Expand Down
Loading

0 comments on commit 20dedba

Please sign in to comment.