Skip to content

Commit

Permalink
Add loose mode
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 6, 2024
1 parent d594d53 commit 42624bd
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 34 deletions.
33 changes: 26 additions & 7 deletions spec/match-assertion.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ describe('Assertion', () => {
expect('a').toExactlyMatch(r`(?=\G)a`);
expect('a').toExactlyMatch(r`(?=\Ga)a`);
expect('aaba'.match(toRegExp(r`(?=\Ga)a`, '', {global: true}))).toEqual(['a', 'a']);
expect(['a', 'b']).toExactlyMatch(r`(?=\G)a|\Gb`);
// Similar but not covered
[ r`(?=\G|)a`,
r`(?:(?=\G))?a`,
r`(?=\G)a|b`,
].forEach(pattern => {
expect(() => compile(pattern)).toThrow();
});
});

it('should allow if trailing in a leading positive lookbehind', () => {
Expand All @@ -135,6 +143,14 @@ describe('Assertion', () => {
let re = toRegExp(r`(?<=a\G)a`);
re.lastIndex = 3;
expect(re.exec('abaa')?.index).toBe(3);
expect(['a', 'b']).toExactlyMatch(r`(?<=\G)a|\Gb`);
// Similar but not covered
[ r`(?<=\G|)a`,
r`(?:(?<=\G))?a`,
r`(?<=\G)a|b`,
].forEach(pattern => {
expect(() => compile(pattern)).toThrow();
});
});

it('should throw if leading in a leading positive lookbehind', () => {
Expand All @@ -154,20 +170,23 @@ describe('Assertion', () => {
expect(() => compile(r`\Ga|\G\Gb`)).toThrow();
});

// Could support by replacing `\G` with `(?!)`
// Note: Could support by replacing `\G` with `(?!)`, but these forms aren't useful
it('should throw at unmatchable positions', () => {
expect(() => compile(r`a\Gb`)).toThrow();
expect(() => compile(r`(?<=a\Gb)`)).toThrow();
expect(() => compile(r`(?=a\Gb)`)).toThrow();
expect(() => compile(r`(?=ab\G)`)).toThrow();
});

// Unsupported; some or all might be emulatable
it('should throw for other unsupported uses', () => {
expect(() => compile(r`(?<=\G|)a`)).toThrow();
expect(() => compile(r`(?:(?<=\G))?a`)).toThrow();
expect('a').toExactlyMatch(r`(?=\G)a|\Gb`);
expect(() => compile(r`(?=\G)a|b`)).toThrow();
it('should allow unsupported forms if using loose emulation', () => {
const patterns = [
r`a\G`,
r`\G|`,
];
patterns.forEach(pattern => {
expect(() => compile(pattern)).toThrow();
expect(toRegExp(pattern, '', {emulation: 'loose'}).sticky).toBe(true);
});
});

describe('subclass strategies', () => {
Expand Down
4 changes: 2 additions & 2 deletions src/compile.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ import {generate} from './generate.js';
import {parse} from './parse.js';
import {tokenize} from './tokenize.js';
import {transform} from './transform.js';
import {EsVersion, Target} from './utils.js';
import {EmulationMode, EsVersion, Target} from './utils.js';
import {atomic, possessive} from 'regex/atomic';
import {recursion} from 'regex-recursion';

/**
@typedef {{
emulation?: 'strict' | 'default' | 'loose';
emulation?: keyof EmulationMode;
global?: boolean;
hasIndices?: boolean;
maxRecursionDepth?: number | null;
Expand Down
57 changes: 32 additions & 25 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {AstAssertionKinds, AstCharacterSetKinds, AstDirectiveKinds, AstTypes, As
import {tokenize} from './tokenize.js';
import {traverse} from './traverse.js';
import {JsUnicodeProperties, PosixClassesMap} from './unicode.js';
import {cp, getNewCurrentFlags, getOrCreate, isMinTarget, r, Target} from './utils.js';
import {cp, EmulationMode, getNewCurrentFlags, getOrCreate, isMinTarget, r, Target} from './utils.js';

/**
@typedef {{
Expand All @@ -22,7 +22,7 @@ then down-convert to the desired JS target version.
@param {{
allowSubclassBasedEmulation?: boolean;
bestEffortTarget?: keyof Target;
emulation?: 'strict' | 'default' | 'loose';
emulation?: keyof EmulationMode;
}} [options]
@returns {RegexAst}
*/
Expand All @@ -40,7 +40,7 @@ function transform(ast, options) {
...options,
};
// AST changes that work together with a `RegExp` subclass to add advanced emulation
const strategy = opts.allowSubclassBasedEmulation ? applySubclassStrategies(ast) : null;
const strategy = opts.allowSubclassBasedEmulation ? applySubclassStrategies(ast, opts.emulation) : null;
const firstPassState = {
emulation: opts.emulation,
flagDirectivesByAlt: new Map(),
Expand Down Expand Up @@ -114,7 +114,7 @@ const FirstPassVisitor = {
},
},

Assertion({node, ast, remove, replaceWith}, {supportedGNodes}) {
Assertion({node, ast, remove, replaceWith}, {emulation, supportedGNodes}) {
const {kind, negate} = node;
if (kind === AstAssertionKinds.line_end) {
// Onig's only line break char is line feed, unlike JS
Expand All @@ -123,8 +123,8 @@ const FirstPassVisitor = {
// Onig's only line break char is line feed, unlike JS
replaceWith(parseFragment(r`(?<=\A|\n)`));
} else if (kind === AstAssertionKinds.search_start) {
if (!supportedGNodes.has(node)) {
throw new Error(r`Uses "\G" in a way that's unsupported; try allowSubclassBasedEmulation`);
if (!supportedGNodes.has(node) && emulation !== 'loose') {
throw new Error(r`Uses "\G" in a way that's unsupported`);
}
ast.flags.sticky = true;
remove();
Expand Down Expand Up @@ -266,7 +266,7 @@ const FirstPassVisitor = {
!node.flags.enable && !node.flags.disable && delete node.flags;
},

Pattern({node}, {supportedGNodes}) {
Pattern({node}, {emulation, supportedGNodes}) {
// For `\G` to be accurately emulatable using JS flag y, it must be at (and only at) the start
// of every top-level alternative (with complex rules for what determines being at the start).
// Additional `\G` error checking in `Assertion` visitor
Expand All @@ -286,8 +286,8 @@ const FirstPassVisitor = {
hasAltWithoutLeadG = true;
}
}
if (hasAltWithLeadG && hasAltWithoutLeadG) {
throw new Error(r`Uses "\G" in a way that's unsupported; try allowSubclassBasedEmulation`);
if (hasAltWithLeadG && hasAltWithoutLeadG && emulation !== 'loose') {
throw new Error(r`Uses "\G" in a way that's unsupported`);
}
// These nodes will be removed when traversed; other `\G` nodes will error
leadingGs.forEach(g => supportedGNodes.add(g))
Expand Down Expand Up @@ -567,7 +567,7 @@ function adoptAndSwapKids(parent, kids) {
return parent;
}

function applySubclassStrategies(ast) {
function applySubclassStrategies(ast, emulation) {
// Special case handling that requires coupling with a `RegExp` subclass (see `WrappedRegExp`).
// These changes add emulation support for some common patterns that are otherwise unsupportable.
// Only one subclass strategy is supported per pattern
Expand All @@ -585,7 +585,7 @@ function applySubclassStrategies(ast) {
const firstElIn = hasWrapperGroup ? firstEl.alternatives[0].elements[0] : firstEl;
const singleAltIn = hasWrapperGroup ? firstEl.alternatives[0] : alts[0];

// ## Subclass strategy `line_or_search_start`: Support leading `(^|\G)` and similar
// ## Strategy `line_or_search_start`: Support leading `(^|\G)` and similar
if (
(firstElIn.type === AstTypes.CapturingGroup || firstElIn.type === AstTypes.Group) &&
firstElIn.alternatives.length === 2 &&
Expand All @@ -608,7 +608,7 @@ function applySubclassStrategies(ast) {
}
}

// ## Subclass strategy `not_search_start`: Support leading `(?!\G)` and similar
// ## Strategy `not_search_start`: Support leading `(?!\G)` and similar
function isNegG(node) {
return isLookaround(node) &&
node.negate &&
Expand All @@ -628,8 +628,8 @@ function applySubclassStrategies(ast) {
return {name: 'not_search_start'};
}

// ## Subclass strategy `after_search_start_or_subpattern`: Support leading `(?<=\G|…)` and
// similar. NB: Leading `(?<=\G)` without other alts is already supported; no need for a subclass
// ## Strategy `after_search_start_or_subpattern`: Support leading `(?<=\G|…)` and similar
// Note: Leading `(?<=\G)` without other alts is already supported; no need for a subclass
if (
isLookaround(firstElIn) &&
!firstElIn.negate &&
Expand All @@ -645,22 +645,29 @@ function applySubclassStrategies(ast) {
}
});
if (hasGAlt && siblingAlts.length) {
let supported = true;
if (siblingAlts.some(alt => alt.elements.some(el => {
// Check for nodes that are or can include captures
return el.type === AstTypes.CapturingGroup || el.type === AstTypes.Group || el.type === AstTypes.Subroutine || isLookaround(el);
}))) {
throw new Error(r`Uses "\G" in a way that's unsupported`);
if (emulation === 'loose') {
supported = false;
} else {
throw new Error(r`Uses "\G" in a way that's unsupported`);
}
}
if (supported) {
// [HACK] Replace the lookbehind with an emulation marker since it isn't easy from here to
// acurately extract what will later become the generated subpattern
const emulationGroup = adoptAndSwapKids(createGroup(), [
adoptAndSwapKids(createAlternative(), [createUnicodeProperty('<<', {skipPropertyNameValidation: true})]),
...siblingAlts,
adoptAndSwapKids(createAlternative(), [createUnicodeProperty('>>', {skipPropertyNameValidation: true})]),
]);
emulationGroup.parent = firstElIn.parent;
firstElIn.parent.elements[0] = emulationGroup;
return {name: 'after_search_start_or_subpattern'};
}
// [HACK] Replace the lookbehind with an emulation marker since from here it isn't easy to
// acurately extract what will later become the generated subpattern
const emulationGroup = adoptAndSwapKids(createGroup(), [
adoptAndSwapKids(createAlternative(), [createUnicodeProperty('<<', {skipPropertyNameValidation: true})]),
...siblingAlts,
adoptAndSwapKids(createAlternative(), [createUnicodeProperty('>>', {skipPropertyNameValidation: true})]),
]);
emulationGroup.parent = firstElIn.parent;
firstElIn.parent.elements[0] = emulationGroup;
return {name: 'after_search_start_or_subpattern'};
}
}
return null;
Expand Down
7 changes: 7 additions & 0 deletions src/utils.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
const cp = String.fromCodePoint;
const r = String.raw;

const EmulationMode = /** @type {const} */ ({
strict: 'strict',
default: 'default',
loose: 'loose',
});

const EsVersion = {
ES2018: 2018,
ES2024: 2024,
Expand Down Expand Up @@ -45,6 +51,7 @@ function throwIfNot(value, msg) {

export {
cp,
EmulationMode,
EsVersion,
getNewCurrentFlags,
getOrCreate,
Expand Down

0 comments on commit 42624bd

Please sign in to comment.