Skip to content

Commit

Permalink
Adjust match indices with subclass strategy line_or_search_start
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Dec 19, 2024
1 parent 3afb0cc commit 0f60fb5
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 72 deletions.
7 changes: 7 additions & 0 deletions spec/match-search-start.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,13 @@ describe('Assertion: Search start', () => {
expect(toRegExp(r`(\G|^)a`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`(?:(\G|^)a)`).exec('b\na')?.index).toBe(2);
expect(toRegExp(r`((\G|^)a)`).exec('b\na')?.index).toBe(2);

// Updates match indices accurately
const re = toRegExp(r`(?<n>^|\G)a`, {global: true, hasIndices: true});
re.lastIndex = 2;
expect(re.exec('12a').indices[0][0]).toBe(2);
re.lastIndex = 2;
expect(re.exec('12a').indices.groups.n[0]).toBe(2);
});

// Leading `(?!\G)` and similar
Expand Down
165 changes: 93 additions & 72 deletions src/subclass.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,75 +2,6 @@ import {AstAssertionKinds, AstTypes} from './parse.js';
import {hasOnlyChild, isLookaround, isZeroLengthNode} from './utils-node.js';
import {RegExpSubclass} from 'regex/internals';

// Special case AST transformation handling that requires coupling with a `RegExp` subclass (see
// `EmulatedRegExp`). These changes add emulation support for some common patterns that are
// otherwise unsupportable. Only one subclass strategy is supported per pattern
function applySubclassStrategies(ast) {
const alts = ast.pattern.alternatives;
const firstEl = alts[0].elements[0];

if (alts.length > 1 || !firstEl) {
// These strategies only work if there's no top-level alternation
return null;
}

const hasWrapperGroup =
hasOnlyChild(ast.pattern, kid => (
kid.type === AstTypes.CapturingGroup || kid.type === AstTypes.Group
)) &&
firstEl.alternatives.length === 1;
const singleAltIn = hasWrapperGroup ? firstEl.alternatives[0] : alts[0];
// First el within first group if the group doesn't contain top-level alternation, else just the
// first el of the pattern; ex: a flag group might enclose the full pattern
const firstElIn = hasWrapperGroup ? singleAltIn.elements[0] : firstEl;
if (!firstElIn) {
return null;
}

// ## Strategy `line_or_search_start`: Support leading `(^|\G)` and similar
if (
(firstElIn.type === AstTypes.CapturingGroup || firstElIn.type === AstTypes.Group) &&
firstElIn.alternatives.length === 2 &&
firstElIn.alternatives[0].elements.length === 1 &&
firstElIn.alternatives[1].elements.length === 1
) {
const el1 = firstElIn.alternatives[0].elements[0];
const el2 = firstElIn.alternatives[1].elements[0];
if (
(el1.kind === AstAssertionKinds.line_start && el2.kind === AstAssertionKinds.search_start) ||
(el1.kind === AstAssertionKinds.search_start && el2.kind === AstAssertionKinds.line_start)
) {
// Remove the `\G` and its container alternative
if (el1.kind === AstAssertionKinds.line_start) {
firstElIn.alternatives.pop();
} else {
firstElIn.alternatives.shift();
}
return 'line_or_search_start';
}
}

// ## Strategy `not_search_start`: Support leading `(?!\G)` and similar
if (isLoneGLookaround(firstElIn, {negate: true})) {
// Remove the `\G` and its containing negative lookaround
firstElIn.parent.elements.shift();
return 'not_search_start';
}
for (let i = 0; i < singleAltIn.elements.length; i++) {
const el = singleAltIn.elements[i];
if (!isZeroLengthNode(el)) {
break;
}
if (isLoneGLookaround(el, {negate: true})) {
// Remove the `\G` and its containing negative lookaround
singleAltIn.elements.splice(i, 1);
return 'not_search_start';
}
}

return null;
}

/**
@typedef {{
useEmulationGroups?: boolean;
Expand Down Expand Up @@ -127,9 +58,7 @@ class EmulatedRegExp extends RegExpSubclass {
this.lastIndex = 0;
const match = exec.call(this, str.slice(pos));
if (match) {
match.input = str;
match.index += pos;
this.lastIndex += pos;
adjustMatchDetails(str, this, match, pos);
}
return match;
}
Expand All @@ -149,6 +78,98 @@ class EmulatedRegExp extends RegExpSubclass {
}
}

function adjustMatchDetails(str, re, match, offset) {
match.input = str;
match.index += offset;
re.lastIndex += offset;
if (re.hasIndices) {
const matchIndices = match.indices;
for (let i = 0; i < matchIndices.length; i++) {
const arr = matchIndices[i];
// Replace the array rather than updating values since the keys of `match.indices` and
// `match.indices.groups` share their value arrays by reference. Need to be precise in case
// they were previously altered separately
matchIndices[i] = [arr[0] + offset, arr[1] + offset];
}
const groupIndices = matchIndices.groups;
if (groupIndices) {
Object.keys(groupIndices).forEach(key => {
const arr = groupIndices[key];
groupIndices[key] = [arr[0] + offset, arr[1] + offset];
});
}
}
}

// Special case AST transformation handling that requires coupling with a `RegExp` subclass (see
// `EmulatedRegExp`). These changes add emulation support for some common patterns that are
// otherwise unsupportable. Only one subclass strategy is supported per pattern
function applySubclassStrategies(ast) {
const alts = ast.pattern.alternatives;
const firstEl = alts[0].elements[0];

if (alts.length > 1 || !firstEl) {
// These strategies only work if there's no top-level alternation
return null;
}

const hasWrapperGroup =
hasOnlyChild(ast.pattern, kid => (
kid.type === AstTypes.CapturingGroup || kid.type === AstTypes.Group
)) &&
firstEl.alternatives.length === 1;
const singleAltIn = hasWrapperGroup ? firstEl.alternatives[0] : alts[0];
// First el within first group if the group doesn't contain top-level alternation, else just the
// first el of the pattern; ex: a flag group might enclose the full pattern
const firstElIn = hasWrapperGroup ? singleAltIn.elements[0] : firstEl;
if (!firstElIn) {
return null;
}

// ## Strategy `line_or_search_start`: Support leading `(^|\G)` and similar
if (
(firstElIn.type === AstTypes.CapturingGroup || firstElIn.type === AstTypes.Group) &&
firstElIn.alternatives.length === 2 &&
firstElIn.alternatives[0].elements.length === 1 &&
firstElIn.alternatives[1].elements.length === 1
) {
const el1 = firstElIn.alternatives[0].elements[0];
const el2 = firstElIn.alternatives[1].elements[0];
if (
(el1.kind === AstAssertionKinds.line_start && el2.kind === AstAssertionKinds.search_start) ||
(el1.kind === AstAssertionKinds.search_start && el2.kind === AstAssertionKinds.line_start)
) {
// Remove the `\G` and its container alternative
if (el1.kind === AstAssertionKinds.line_start) {
firstElIn.alternatives.pop();
} else {
firstElIn.alternatives.shift();
}
return 'line_or_search_start';
}
}

// ## Strategy `not_search_start`: Support leading `(?!\G)` and similar
if (isLoneGLookaround(firstElIn, {negate: true})) {
// Remove the `\G` and its containing negative lookaround
firstElIn.parent.elements.shift();
return 'not_search_start';
}
for (let i = 0; i < singleAltIn.elements.length; i++) {
const el = singleAltIn.elements[i];
if (!isZeroLengthNode(el)) {
break;
}
if (isLoneGLookaround(el, {negate: true})) {
// Remove the `\G` and its containing negative lookaround
singleAltIn.elements.splice(i, 1);
return 'not_search_start';
}
}

return null;
}

function isLoneGLookaround(node, options) {
return (
isLookaround(node) &&
Expand Down

0 comments on commit 0f60fb5

Please sign in to comment.