Skip to content

Commit

Permalink
Transform quantified assertions
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Oct 27, 2024
1 parent bee04ff commit 7b68006
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 72 deletions.
64 changes: 32 additions & 32 deletions dist/index.min.js

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions src/generate.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {getOptions} from './compile.js';
import {AstAssertionKinds, AstCharacterSetKinds, AstTypes} from './parse.js';
import {AstAssertionKinds, AstCharacterSetKinds, AstTypes, isLookaround} from './parse.js';
import {traverse} from './traverse.js';
import {getIgnoreCaseMatchChars, JsUnicodePropertiesPostEs2018, UnicodePropertiesWithSpecificCase} from './unicode.js';
import {cp, isMinTarget, r} from './utils.js';
Expand Down Expand Up @@ -198,8 +198,9 @@ function charHasCase(char) {
return casedRe.test(char);
}

function genAssertion({kind, negate, alternatives}, _, gen) {
if (kind === AstAssertionKinds.lookahead || kind === AstAssertionKinds.lookbehind) {
function genAssertion(node, _, gen) {
const {kind, negate, alternatives} = node;
if (isLookaround(node)) {
const prefix = `${kind === AstAssertionKinds.lookahead ? '' : '<'}${negate ? '!' : '='}`;
return `(?${prefix}${alternatives.map(gen).join('|')})`;
}
Expand Down
15 changes: 9 additions & 6 deletions src/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,10 @@ const AstTypeAliases = {
AnyNode: 'AnyNode',
};

function getAstTypeAliases({type, kind}) {
function getAstTypeAliases(node) {
const {type} = node;
const types = [AstTypeAliases.AnyNode];
if (
(type === AstTypes.Assertion && (kind === AstAssertionKinds.lookahead || kind === AstAssertionKinds.lookbehind)) ||
type === AstTypes.CapturingGroup ||
type === AstTypes.Group
) {
if (isLookaround(node) || type === AstTypes.CapturingGroup || type === AstTypes.Group) {
types.push(AstTypeAliases.AnyGroup);
}
types.push(type);
Expand Down Expand Up @@ -666,6 +663,11 @@ function getOptimizedGroup(node) {
return node;
}

function isLookaround({type, kind}) {
return type === AstTypes.Assertion &&
(kind === AstAssertionKinds.lookahead || kind === AstAssertionKinds.lookbehind);
}

function isValidGroupNameOniguruma(name) {
return !/^(?:[-\d]|$)/.test(name);
}
Expand Down Expand Up @@ -713,5 +715,6 @@ export {
createUnicodeProperty,
createVariableLengthCharacterSet,
getAstTypeAliases,
isLookaround,
parse,
};
80 changes: 51 additions & 29 deletions src/transform.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import emojiRegex from 'emoji-regex-xs';
import {AstAssertionKinds, AstCharacterSetKinds, AstDirectiveKinds, AstTypes, AstVariableLengthCharacterSetKinds, createAlternative, createBackreference, createGroup, createLookaround, createUnicodeProperty, parse} from './parse.js';
import {AstAssertionKinds, AstCharacterSetKinds, AstDirectiveKinds, AstTypes, AstVariableLengthCharacterSetKinds, createAlternative, createBackreference, createGroup, createLookaround, createUnicodeProperty, isLookaround, parse} from './parse.js';
import {tokenize} from './tokenize.js';
import {traverse} from './traverse.js';
import {JsUnicodeProperties, PosixClassesMap} from './unicode.js';
Expand Down Expand Up @@ -172,7 +172,8 @@ const FirstPassVisitor = {
}
},

Directive({node, parent, key, container, ast, remove, replaceWith, removeAllPrevSiblings, removeAllNextSiblings}, state) {
Directive(path, state) {
const {node, parent, ast, remove, replaceWith, removeAllPrevSiblings, removeAllNextSiblings} = path;
const {kind, flags} = node;
if (kind === AstDirectiveKinds.flags) {
if (!flags.enable && !flags.disable) {
Expand All @@ -181,12 +182,7 @@ const FirstPassVisitor = {
} else {
const flagGroup = prepContainer(createGroup({flags}), removeAllNextSiblings());
replaceWith(flagGroup);
traverse({
node: flagGroup,
parent,
key,
container,
}, state, FirstPassVisitor);
traverseReplacement(flagGroup, path, state, FirstPassVisitor);
}
} else if (kind === AstDirectiveKinds.keep) {
// Allows multiple `\K`s since the the node is removed
Expand Down Expand Up @@ -280,14 +276,28 @@ const FirstPassVisitor = {
}
},

Quantifier({node}) {
// TODO: Handle quantified assertions; not allowed in JS. If min is 0, remove the assertion and skip kids; else unwrap
// TODO: Custom or better error for quantified flag directives `(?i)+`
if (node.element.type === AstTypes.Quantifier) {
const group = prepContainer(createGroup(), [node.element]);
// Manually set the parent since we're not using `replaceWith`
group.parent = node;
node.element = group;
Quantifier(path, state) {
const {node, remove, replaceWith, skip} = path;
const child = node.element;
if (child.type === AstTypes.Quantifier) {
// Change e.g. `a**` to `(?:a*)*`
moveQuantifierToWrapper(node);
} else if (child.type === AstTypes.Assertion) {
// Quantified assertions aren't allowed in JS
if (node.min) {
// Strip the quantifier but keep its child
replaceWith(child);
traverseReplacement(child, path, state, FirstPassVisitor);
skip();
} else if (isLookaround(child)) {
// Change e.g. `(?=a)*` to `(?:(?=a))*`; can't remove the child since the lookaround might
// contain captures reffed elsewhere
moveQuantifierToWrapper(node);
} else {
// In other cases with `min: 0`, the quantifier makes its assertion irrelevant
remove();
skip();
}
}
},

Expand Down Expand Up @@ -433,16 +443,16 @@ const SecondPassVisitor = {
},
},

Subroutine({node, parent, key, container, replaceWith}, state) {
const {groupOriginByCopy, subroutineRefMap} = state;
Subroutine(path, state) {
const {node, replaceWith} = path;
const {ref} = node;
const reffedGroupNode = subroutineRefMap.get(ref);
const reffedGroupNode = state.subroutineRefMap.get(ref);
// Other forms of recursion are handled by the `CapturingGroup` visitor
const isGlobalRecursion = ref === 0;
const expandedSubroutine = isGlobalRecursion ?
createRecursion(ref) :
// The reffed group might itself contain subroutines, which are expanded during sub-traversal
cloneCapturingGroup(reffedGroupNode, groupOriginByCopy, null);
cloneCapturingGroup(reffedGroupNode, state.groupOriginByCopy, null);
let replacement = expandedSubroutine;
if (!isGlobalRecursion) {
// Subroutines take their flags from the reffed group, not the flags surrounding themselves
Expand All @@ -457,14 +467,9 @@ const SecondPassVisitor = {
}
replaceWith(replacement);
if (!isGlobalRecursion) {
traverse({
// Start traversal at the flag group wrapper so the logic for stripping duplicate names
// propagates through its alternative
node: replacement,
parent,
key,
container,
}, state, SecondPassVisitor);
// Start traversal at the flag group wrapper so the logic for stripping duplicate names
// propagates through its alternative
traverseReplacement(replacement, path, state, SecondPassVisitor);
}
},
};
Expand Down Expand Up @@ -588,8 +593,8 @@ function getCombinedFlagsFromFlagNodes(flagNodes) {
}

function getParentAlternative(node) {
// Skip past quantifiers, etc.
while (node = node.parent) {
// Skip past quantifiers, etc.
if (node.type === AstTypes.Alternative) {
return node;
}
Expand All @@ -603,6 +608,13 @@ function isValidGroupNameJs(name) {
return /^[$_\p{IDS}][$\u200C\u200D\p{IDC}]*$/u.test(name);
}

function moveQuantifierToWrapper(quantifier) {
const group = prepContainer(createGroup(), [quantifier.element]);
// Manually set the parent since we're not using `replaceWith`
group.parent = quantifier;
quantifier.element = group;
}

// Returns a single node, either the given node or all nodes wrapped in a noncapturing group
// TODO: Consider moving to `parse` module and dropping assumptions about `parent` props
function parseFragment(pattern, {bypassPropertyNameCheck} = {}) {
Expand All @@ -624,6 +636,16 @@ function prepContainer(node, kids) {
return node;
}

function traverseReplacement(replacement, {parent, key, container}, state, visitor) {
traverse({
// Don't use the `node` from `path`
node: replacement,
parent,
key,
container,
}, state, visitor);
}

export {
transform,
};
4 changes: 2 additions & 2 deletions src/traverse.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {AstAssertionKinds, AstTypes, getAstTypeAliases} from './parse.js';
import {AstTypes, getAstTypeAliases, isLookaround} from './parse.js';
import {throwIfNot} from './utils.js';

function traverse(path, state, visitor) {
Expand Down Expand Up @@ -61,7 +61,7 @@ function traverse(path, state, visitor) {
traverseArray(node.elements, node);
break;
case AstTypes.Assertion:
if (node.kind === AstAssertionKinds.lookahead || node.kind === AstAssertionKinds.lookbehind) {
if (isLookaround(node)) {
traverseArray(node.alternatives, node);
}
break;
Expand Down

0 comments on commit 7b68006

Please sign in to comment.