Skip to content

Commit

Permalink
Update Regex+ lib name
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 8, 2024
1 parent 0397d6b commit a2850fd
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 21 deletions.
2 changes: 1 addition & 1 deletion demo/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ <h2>Try it</h2>
const cp = String.fromCodePoint;
function printAst(ast) {
if (ast?.type !== 'Regex') {
throw new Error('Oniguruma or `regex` AST expected');
throw new Error('Oniguruma or Regex+ AST expected');
}
const isObject = value => ({}).toString.call(value) === '[object Object]';
const nodeIds = new Map();
Expand Down
2 changes: 2 additions & 0 deletions spec/match-backreference.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ describe('Backreference', () => {
expect('').not.toFindMatch(r`(\g<2>(\2))`);
});

// For 1-9, else it becomes octal if not enough groups defined to the left, even if enough
// groups defined to the right
it('should throw for forward references to defined groups', () => {
expect(() => toDetails(r`\1()`)).toThrow();
expect(() => toDetails(r`()\2()`)).toThrow();
Expand Down
6 changes: 3 additions & 3 deletions src/generate.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {getIgnoreCaseMatchChars, JsUnicodePropertiesPostEs2018, UnicodePropertie
import {cp, getNewCurrentFlags, isMinTarget, r} from './utils.js';

/**
Generates a `regex`-compatible `pattern`, `flags`, and `options` from a `regex` AST.
Generates a Regex+ compatible `pattern`, `flags`, and `options` from a Regex+ AST.
@param {import('./transform.js').RegexAst} ast
@param {import('.').Options} [options]
@returns {{
Expand Down Expand Up @@ -125,7 +125,7 @@ function generate(ast, options) {

const result = gen(ast);
if (!minTargetEs2024) {
// Switch from flag v to u. By default, `regex` implicitly chooses; control it instead
// Switch from flag v to u. By default, Regex+ implicitly chooses; control it instead
delete result.options.force.v;
result.options.disable.v = true;
result.options.unicodeSetsPlugin = null;
Expand Down Expand Up @@ -366,7 +366,7 @@ function genFlags(node, state) {
(state.appliedGlobalFlags.ignoreCase ? 'i' : '') +
(node.dotAll ? 's' : '') +
(node.sticky ? 'y' : '')
// `regex` doesn't allow explicitly adding flags it handles implicitly, so there are no
// Regex+ doesn't allow explicitly adding flags it handles implicitly, so there are no
// `unicode` (flag u) or `unicodeSets` (flag v) props; those flags are added separately
);
}
Expand Down
14 changes: 7 additions & 7 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ import {recursion} from 'regex-recursion';
// compared to native JS RegExp is layered into all steps of the compilation process:
// 1. Tokenizer: Understands Oniguruma syntax, with many large and small differences from JS.
// 2. Parser: Builds an Oniguruma AST from the tokens with understanding of Oniguruma differences.
// 3. Transformer: Converts the Oniguruma AST to a `regex` AST that preserves all Oniguruma
// 3. Transformer: Converts the Oniguruma AST to a Regex+ AST that preserves all Oniguruma
// behavior. This is true even in cases of non-native-JS features that are supported by both
// `regex` and Oniguruma but with subtly different behavior in each (subroutines, flag x).
// 4. Generator: Converts the `regex` AST to a `regex` pattern, flags, and options.
// 5. Compiler: Components of the `regex` libray are used to transpile several remaining features
// that aren't native to JS (atomic groups, possessive quantifiers, recursion). `regex` uses a
// Regex+ and Oniguruma but with subtly different behavior in each (subroutines, flag x).
// 4. Generator: Converts the Regex+ AST to a Regex+ pattern, flags, and options.
// 5. Compiler: Components of the Regex+ libray are used to transpile several remaining features
// that aren't native to JS (atomic groups, possessive quantifiers, recursion). Regex+ uses a
// strict superset of JS RegExp syntax, so using it allows this library to benefit from not
// reinventing the wheel for complex features that `regex` already knows how to transpile to JS.
// reinventing the wheel for complex features that Regex+ already knows how to transpile to JS.

/**
@typedef {{
Expand Down Expand Up @@ -63,7 +63,7 @@ function toDetails(pattern, options) {
let subpattern;
if (regexAst._strategy) {
// Look for an emulation marker added as part of the strategy. Do this after the pattern has
// been passed through `regex` plugins, so they can operate on the full pattern (e.g. backrefs
// been passed through Regex+ plugins, so they can operate on the full pattern (e.g. backrefs
// might be rewritten when using some features)
genPattern = genPattern.replace(/\(\?:\\p{sc=<<}\|(.*?)\|\\p{sc=>>}\)/s, (_, sub) => {
subpattern = sub;
Expand Down
4 changes: 2 additions & 2 deletions src/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const AstTypes = {
Regex: 'Regex',
Subroutine: 'Subroutine',
VariableLengthCharacterSet: 'VariableLengthCharacterSet',
// Used only by the transformer for `regex` ASTs
// Used only by the transformer for Regex+ ASTs
Recursion: 'Recursion',
};

Expand Down Expand Up @@ -367,7 +367,7 @@ function parseQuantifier({token, parent}) {
// - Subroutines don't get their own capturing group numbers; ex: `(.)\g<1>\2` is invalid.
// - Subroutines use the flags that apply to their referenced group, so e.g.
// `(?-i)(?<a>a)(?i)\g<a>` is fully case sensitive.
// - Differences from PCRE/Perl/`regex` subroutines:
// - Differences from PCRE/Perl/Regex+ subroutines:
// - Subroutines can't reference duplicate group names (though duplicate names are valid if no
// subroutines reference them).
// - Subroutines can't use absolute or relative numbers if named capture is used anywhere.
Expand Down
16 changes: 8 additions & 8 deletions src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ import emojiRegex from 'emoji-regex-xs';
}} RegexAst
*/
/**
Transforms an Oniguruma AST in-place to a [`regex`](https://github.com/slevithan/regex) AST.
Transforms an Oniguruma AST in-place to a [Regex+](https://github.com/slevithan/regex) AST.
Targets `ESNext`, expecting the generator to then down-convert to the desired JS target version.
`regex`'s syntax and behavior is a strict superset of native JavaScript, so the AST is very close
Regex+'s syntax and behavior is a strict superset of native JavaScript, so the AST is very close
to representing native ESNext `RegExp` but with some added features (atomic groups, possessive
quantifiers, recursion). The AST doesn't use some of `regex`'s extended features like flag `x` or
quantifiers, recursion). The AST doesn't use some of Regex+'s extended features like flag `x` or
subroutines because they follow PCRE behavior and work somewhat differently than in Oniguruma. The
AST represents what's needed to precisely reproduce Oniguruma behavior using `regex`.
AST represents what's needed to precisely reproduce Oniguruma behavior using Regex+.
@param {import('./parse.js').OnigurumaAst} ast
@param {{
accuracy?: keyof Accuracy;
Expand Down Expand Up @@ -236,17 +236,17 @@ const FirstPassVisitor = {
multiline: false,
// JS flag y; no Onig equiv, but used for `\G` emulation
sticky: node.sticky ?? false,
// Note: `regex` doesn't allow explicitly adding flags it handles implicitly, so leave out
// Note: Regex+ doesn't allow explicitly adding flags it handles implicitly, so leave out
// properties `unicode` (JS flag u) and `unicodeSets` (JS flag v). Keep the existing values
// for `ignoreCase` (flag i) and `dotAll` (JS flag s, but Onig flag m)
});
// Options accepted by `regex`; see <github.com/slevithan/regex#-options>
// Options accepted by Regex+; see <github.com/slevithan/regex#-options>
parent.options = {
disable: {
// Onig uses different rules for flag x than `regex`, so disable the implicit flag
// Onig uses different rules for flag x than Regex+, so disable the implicit flag
x: true,
// Onig has no flag to control "named capture only" mode but contextually applies its
// behavior when named capturing is used, so disable `regex`'s implicit flag for it
// behavior when named capturing is used, so disable Regex+'s implicit flag for it
n: true,
},
force: {
Expand Down

0 comments on commit a2850fd

Please sign in to comment.