diff --git a/demo/index.html b/demo/index.html index 4adeec6..22a4150 100644 --- a/demo/index.html +++ b/demo/index.html @@ -128,7 +128,7 @@

Try it

const cp = String.fromCodePoint; function printAst(ast) { if (ast?.type !== 'Regex') { - throw new Error('Oniguruma or `regex` AST expected'); + throw new Error('Oniguruma or Regex+ AST expected'); } const isObject = value => ({}).toString.call(value) === '[object Object]'; const nodeIds = new Map(); diff --git a/spec/match-backreference.spec.js b/spec/match-backreference.spec.js index 778e10b..81dd8a4 100644 --- a/spec/match-backreference.spec.js +++ b/spec/match-backreference.spec.js @@ -73,6 +73,8 @@ describe('Backreference', () => { expect('').not.toFindMatch(r`(\g<2>(\2))`); }); + // For 1-9, else it becomes octal if not enough groups defined to the left, even if enough + // groups defined to the right it('should throw for forward references to defined groups', () => { expect(() => toDetails(r`\1()`)).toThrow(); expect(() => toDetails(r`()\2()`)).toThrow(); diff --git a/src/generate.js b/src/generate.js index 8e952e3..5d07185 100644 --- a/src/generate.js +++ b/src/generate.js @@ -5,7 +5,7 @@ import {getIgnoreCaseMatchChars, JsUnicodePropertiesPostEs2018, UnicodePropertie import {cp, getNewCurrentFlags, isMinTarget, r} from './utils.js'; /** -Generates a `regex`-compatible `pattern`, `flags`, and `options` from a `regex` AST. +Generates a Regex+ compatible `pattern`, `flags`, and `options` from a Regex+ AST. @param {import('./transform.js').RegexAst} ast @param {import('.').Options} [options] @returns {{ @@ -125,7 +125,7 @@ function generate(ast, options) { const result = gen(ast); if (!minTargetEs2024) { - // Switch from flag v to u. By default, `regex` implicitly chooses; control it instead + // Switch from flag v to u. By default, Regex+ implicitly chooses; control it instead delete result.options.force.v; result.options.disable.v = true; result.options.unicodeSetsPlugin = null; @@ -366,7 +366,7 @@ function genFlags(node, state) { (state.appliedGlobalFlags.ignoreCase ? 'i' : '') + (node.dotAll ? 's' : '') + (node.sticky ? 'y' : '') - // `regex` doesn't allow explicitly adding flags it handles implicitly, so there are no + // Regex+ doesn't allow explicitly adding flags it handles implicitly, so there are no // `unicode` (flag u) or `unicodeSets` (flag v) props; those flags are added separately ); } diff --git a/src/index.js b/src/index.js index 4b058ad..90161bd 100644 --- a/src/index.js +++ b/src/index.js @@ -10,14 +10,14 @@ import {recursion} from 'regex-recursion'; // compared to native JS RegExp is layered into all steps of the compilation process: // 1. Tokenizer: Understands Oniguruma syntax, with many large and small differences from JS. // 2. Parser: Builds an Oniguruma AST from the tokens with understanding of Oniguruma differences. -// 3. Transformer: Converts the Oniguruma AST to a `regex` AST that preserves all Oniguruma +// 3. Transformer: Converts the Oniguruma AST to a Regex+ AST that preserves all Oniguruma // behavior. This is true even in cases of non-native-JS features that are supported by both -// `regex` and Oniguruma but with subtly different behavior in each (subroutines, flag x). -// 4. Generator: Converts the `regex` AST to a `regex` pattern, flags, and options. -// 5. Compiler: Components of the `regex` libray are used to transpile several remaining features -// that aren't native to JS (atomic groups, possessive quantifiers, recursion). `regex` uses a +// Regex+ and Oniguruma but with subtly different behavior in each (subroutines, flag x). +// 4. Generator: Converts the Regex+ AST to a Regex+ pattern, flags, and options. +// 5. Compiler: Components of the Regex+ libray are used to transpile several remaining features +// that aren't native to JS (atomic groups, possessive quantifiers, recursion). Regex+ uses a // strict superset of JS RegExp syntax, so using it allows this library to benefit from not -// reinventing the wheel for complex features that `regex` already knows how to transpile to JS. +// reinventing the wheel for complex features that Regex+ already knows how to transpile to JS. /** @typedef {{ @@ -63,7 +63,7 @@ function toDetails(pattern, options) { let subpattern; if (regexAst._strategy) { // Look for an emulation marker added as part of the strategy. Do this after the pattern has - // been passed through `regex` plugins, so they can operate on the full pattern (e.g. backrefs + // been passed through Regex+ plugins, so they can operate on the full pattern (e.g. backrefs // might be rewritten when using some features) genPattern = genPattern.replace(/\(\?:\\p{sc=<<}\|(.*?)\|\\p{sc=>>}\)/s, (_, sub) => { subpattern = sub; diff --git a/src/parse.js b/src/parse.js index 6a2d9e6..509b233 100644 --- a/src/parse.js +++ b/src/parse.js @@ -21,7 +21,7 @@ const AstTypes = { Regex: 'Regex', Subroutine: 'Subroutine', VariableLengthCharacterSet: 'VariableLengthCharacterSet', - // Used only by the transformer for `regex` ASTs + // Used only by the transformer for Regex+ ASTs Recursion: 'Recursion', }; @@ -367,7 +367,7 @@ function parseQuantifier({token, parent}) { // - Subroutines don't get their own capturing group numbers; ex: `(.)\g<1>\2` is invalid. // - Subroutines use the flags that apply to their referenced group, so e.g. // `(?-i)(?a)(?i)\g` is fully case sensitive. -// - Differences from PCRE/Perl/`regex` subroutines: +// - Differences from PCRE/Perl/Regex+ subroutines: // - Subroutines can't reference duplicate group names (though duplicate names are valid if no // subroutines reference them). // - Subroutines can't use absolute or relative numbers if named capture is used anywhere. diff --git a/src/transform.js b/src/transform.js index 29d5f4b..67cfb0b 100644 --- a/src/transform.js +++ b/src/transform.js @@ -19,14 +19,14 @@ import emojiRegex from 'emoji-regex-xs'; }} RegexAst */ /** -Transforms an Oniguruma AST in-place to a [`regex`](https://github.com/slevithan/regex) AST. +Transforms an Oniguruma AST in-place to a [Regex+](https://github.com/slevithan/regex) AST. Targets `ESNext`, expecting the generator to then down-convert to the desired JS target version. -`regex`'s syntax and behavior is a strict superset of native JavaScript, so the AST is very close +Regex+'s syntax and behavior is a strict superset of native JavaScript, so the AST is very close to representing native ESNext `RegExp` but with some added features (atomic groups, possessive -quantifiers, recursion). The AST doesn't use some of `regex`'s extended features like flag `x` or +quantifiers, recursion). The AST doesn't use some of Regex+'s extended features like flag `x` or subroutines because they follow PCRE behavior and work somewhat differently than in Oniguruma. The -AST represents what's needed to precisely reproduce Oniguruma behavior using `regex`. +AST represents what's needed to precisely reproduce Oniguruma behavior using Regex+. @param {import('./parse.js').OnigurumaAst} ast @param {{ accuracy?: keyof Accuracy; @@ -236,17 +236,17 @@ const FirstPassVisitor = { multiline: false, // JS flag y; no Onig equiv, but used for `\G` emulation sticky: node.sticky ?? false, - // Note: `regex` doesn't allow explicitly adding flags it handles implicitly, so leave out + // Note: Regex+ doesn't allow explicitly adding flags it handles implicitly, so leave out // properties `unicode` (JS flag u) and `unicodeSets` (JS flag v). Keep the existing values // for `ignoreCase` (flag i) and `dotAll` (JS flag s, but Onig flag m) }); - // Options accepted by `regex`; see + // Options accepted by Regex+; see parent.options = { disable: { - // Onig uses different rules for flag x than `regex`, so disable the implicit flag + // Onig uses different rules for flag x than Regex+, so disable the implicit flag x: true, // Onig has no flag to control "named capture only" mode but contextually applies its - // behavior when named capturing is used, so disable `regex`'s implicit flag for it + // behavior when named capturing is used, so disable Regex+'s implicit flag for it n: true, }, force: {