From 4f116aa14a45df3f9cc0062645ce601323558db0 Mon Sep 17 00:00:00 2001 From: Steven Levithan Date: Thu, 19 Dec 2024 03:32:41 +0100 Subject: [PATCH] Add rules.captureName option --- README.md | 42 ++++++++--- demo/demo.css | 6 +- demo/demo.js | 1 + demo/index.html | 121 +++++++++++++++++-------------- scripts/utils.js | 4 +- spec/helpers/matchers.js | 5 +- spec/options.spec.js | 150 +++++++++++++++++++++++++++++++++++++++ spec/todetails.spec.js | 36 +--------- src/index.js | 10 ++- src/options.js | 5 +- src/parse.js | 4 +- src/tokenize.js | 56 +++++++++------ src/transform.js | 39 ++++++---- 13 files changed, 334 insertions(+), 145 deletions(-) create mode 100644 spec/options.spec.js diff --git a/README.md b/README.md index 16e6204..7c87875 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ type OnigurumaToEsOptions = { allowOrphanBackrefs?: boolean; allowUnhandledGAnchors?: boolean; asciiWordBoundaries?: boolean; + captureGroup?: boolean; }; target?: 'auto' | 'ES2025' | 'ES2024' | 'ES2018'; verbose?: boolean; @@ -117,6 +118,9 @@ function toOnigurumaAst( pattern: string, options?: { flags?: string; + rules?: { + captureGroup?: boolean; + }; } ): OnigurumaAst; ``` @@ -210,7 +214,8 @@ Advanced pattern options that override standard error checking and flags when en - `allowOrphanBackrefs`: Useful with TextMate grammars that merge backreferences across patterns. - `allowUnhandledGAnchors`: Applies flag `y` for unsupported uses of `\G`, rather than erroring. - Oniguruma-To-ES uses a variety of strategies to accurately emulate many common uses of `\G`. When using this option, if a `\G` is found that doesn't have a known emulation strategy, the `\G` is simply removed and JavaScript's `y` (`sticky`) flag is added. This might lead to some false positives and negatives, but is useful for non-critical matching (like syntax highlighting) when having some mismatches is better than not working. -- `asciiWordBoundaries`: Use ASCII-based `\b` and `\B`, which increases performance. +- `asciiWordBoundaries`: Use ASCII-based `\b` and `\B`, which increases search performance of generated regexes. +- `captureGroup`: Oniguruma option `ONIG_OPTION_CAPTURE_GROUP`. Unnamed captures and numbered calls allowed when using named capture. ### `target` @@ -616,7 +621,7 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Always "multiline"
✔ Only \n as newline
- ✔ No match after string-terminating \n
+ ✔ ^ doesn't match after string-terminating \n
@@ -911,6 +916,17 @@ Notice that nearly every feature below has at least subtle differences from Java ✔ Error
+ + + Compile-time options + ONIG_OPTION_CAPTURE_GROUP + ✅ + ✅ + + ✔ Unnamed captures and numbered calls allowed when using named capture
+ ✔ Allows numbered subroutine refs to duplicate group names
+ + The table above doesn't include all aspects that Oniguruma-To-ES emulates (including error handling, most aspects that work the same as in JavaScript, and many aspects of non-JavaScript features that work the same in the other regex flavors that support them). @@ -928,14 +944,20 @@ The table above doesn't include all aspects that Oniguruma-To-ES emulates (inclu The following don't yet have any support, and throw errors. They're all infrequently-used features, with most being *extremely* rare. -- Grapheme boundaries: `\y`, `\Y`. -- Flags `P` (POSIX is ASCII) and `y{g}`/`y{w}` (grapheme boundary modes). -- Whole-pattern modifiers: Don't capture `(?C)`, ignore-case is ASCII `(?I)`, find longest `(?L)`. -- Absence functions: `(?~…)`, etc. -- Conditionals: `(?(…)…)`, etc. -- Rarely-used character specifiers: Non-A-Za-z with `\cx`, `\C-x`; meta `\M-x`, `\M-\C-x`; bracketed octals `\o{…}`; octal UTF-8 encoded bytes (≥ `\200`). -- Code point sequences: `\x{H H …}`, `\o{O O …}`. -- Callout functions: `(?{…})`, etc. +- Supportable: + - Grapheme boundaries: `\y`, `\Y`. + - Flags `P` (POSIX is ASCII) and `y{g}`/`y{w}` (grapheme boundary modes). + - Rarely-used character specifiers: Non-A-Za-z with `\cx`, `\C-x`; meta `\M-x`, `\M-\C-x`; bracketed octals `\o{…}`; octal UTF-8 encoded bytes (≥ `\200`). + - Code point sequences: `\x{H H …}`, `\o{O O …}`. + - Whole-pattern modifiers: Don't capture `(?C)`, ignore-case is ASCII `(?I)`. +- Supportable for some uses: + - Absence functions: `(?~…)`, etc. + - Conditionals: `(?(…)…)`, etc. + - Whole-pattern modifiers: Find longest `(?L)`. +- Not supportable: + - Callout functions: `(?{…})`, etc. + +Despite the current omissions, Oniguruma-To-ES handles more than 99.9% of real-world Oniguruma regexes, based on patterns used in a large [collection](https://github.com/shikijs/textmate-grammars-themes/tree/main/packages/tm-grammars/grammars) of TextMate grammars. ## ㊗️ Unicode / mixed case-sensitivity diff --git a/demo/demo.css b/demo/demo.css index 821b95c..c070394 100644 --- a/demo/demo.css +++ b/demo/demo.css @@ -162,12 +162,12 @@ pre, code, kbd, textarea { border-radius: 0.375em; } -#more-options { +#more-options-cols { display: flex; } -#more-options div { - margin-right: 3%; +#more-options-cols div { + margin-right: 5%; } #output, textarea { diff --git a/demo/demo.js b/demo/demo.js index eea024a..828b0e2 100644 --- a/demo/demo.js +++ b/demo/demo.js @@ -24,6 +24,7 @@ const state = { allowOrphanBackrefs: getValue('option-allowOrphanBackrefs'), allowUnhandledGAnchors: getValue('option-allowUnhandledGAnchors'), asciiWordBoundaries: getValue('option-asciiWordBoundaries'), + captureGroup: getValue('option-captureGroup'), }, target: getValue('option-target'), verbose: getValue('option-verbose'), diff --git a/demo/index.html b/demo/index.html index 00f486c..48f19e2 100644 --- a/demo/index.html +++ b/demo/index.html @@ -74,31 +74,74 @@

Try it

More options -
-
-

- -

-

- -

+
+
+
+

+ +

+

+ +

+
+
+

+ +

+

+ +

+
+
+

+ +

+

+ +

+
+
+

+ +

+

+ +

+
-

- -

Try it

-
-

- -

-

- -

-
-
-

- -

-

- -

-

diff --git a/scripts/utils.js b/scripts/utils.js
index 3b4c3a4..6e99b0f 100644
--- a/scripts/utils.js
+++ b/scripts/utils.js
@@ -76,7 +76,9 @@ function getMatchDetails(match) {
 const transpiledRegExpResult = (pattern, str, pos) => {
   let result;
   try {
-    const options = {};
+    // `vscode-oniguruma` uses option `ONIG_OPTION_CAPTURE_GROUP` by default; see
+    // 
+    const options = {rules: {captureGroup: true}};
     if (pos) {
       options.global = true;
     }
diff --git a/spec/helpers/matchers.js b/spec/helpers/matchers.js
index f8b876a..f0f3303 100644
--- a/spec/helpers/matchers.js
+++ b/spec/helpers/matchers.js
@@ -12,6 +12,7 @@ function getArgs(actual, expected) {
     pattern: typeof expected === 'string' ? expected : expected.pattern,
     flags: expected.flags ?? '',
     accuracy: expected.accuracy ?? 'default',
+    rules: expected.rules ?? {},
     strings: Array.isArray(actual) ? actual : [actual],
     targets: targeted,
   };
@@ -24,9 +25,9 @@ function wasFullStrMatch(match, str) {
 // Expects `negate` to be set by `negativeCompare` and doesn't rely on Jasmine's automatic matcher
 // negation because when negated we don't want to early return `true` when looping over the array
 // of strings and one is found to not match; they all need to not match
-function matchWithAllTargets({pattern, flags, strings, targets, accuracy}, {exact, negate}) {
+function matchWithAllTargets({pattern, flags, accuracy, rules, strings, targets}, {exact, negate}) {
   for (const target of targets) {
-    const re = toRegExp(pattern, {accuracy, flags, target});
+    const re = toRegExp(pattern, {accuracy, flags, rules, target});
     for (const str of strings) {
       // In case the regex includes flag g or y
       re.lastIndex = 0;
diff --git a/spec/options.spec.js b/spec/options.spec.js
new file mode 100644
index 0000000..9bf6d62
--- /dev/null
+++ b/spec/options.spec.js
@@ -0,0 +1,150 @@
+import {toDetails} from '../dist/index.mjs';
+import {envSupportsFlagV, r} from '../src/utils.js';
+import {matchers} from './helpers/matchers.js';
+
+beforeEach(() => {
+  jasmine.addMatchers(matchers);
+});
+
+describe('Options', () => {
+  describe('flags', () => {
+    it('should accept and translate supported flags', () => {
+      expect(toDetails('', {flags: 'i'}).flags).toContain('i');
+      expect(toDetails('', {flags: 'm'}).flags).toContain('s');
+      expect(toDetails('', {flags: 'm'}).flags).not.toContain('m');
+      expect(toDetails('', {flags: 'x'}).flags).not.toContain('x');
+      expect(toDetails('', {flags: 'D'}).flags).not.toContain('D');
+      expect(toDetails('', {flags: 'S'}).flags).not.toContain('S');
+      expect(toDetails('', {flags: 'W'}).flags).not.toContain('W');
+    });
+  
+    it('should throw for unexpected flags', () => {
+      expect(() => toDetails('', {flags: 'd'})).toThrow();
+      expect(() => toDetails('', {flags: 'g'})).toThrow();
+      expect(() => toDetails('', {flags: 's'})).toThrow();
+      expect(() => toDetails('', {flags: 'u'})).toThrow();
+      expect(() => toDetails('', {flags: 'v'})).toThrow();
+      expect(() => toDetails('', {flags: 'y'})).toThrow();
+    });
+  });
+
+  describe('target', () => {
+    it('should set target based on env for target auto', () => {
+      if (envSupportsFlagV) {
+        expect(toDetails('', {target: 'auto'}).flags).toBe('v');
+      } else {
+        expect(toDetails('', {target: 'auto'}).flags).toBe('u');
+      }
+    });
+
+    it('should use target auto if unspecified', () => {
+      if (envSupportsFlagV) {
+        expect(toDetails('').flags).toBe('v');
+      } else {
+        expect(toDetails('').flags).toBe('u');
+      }
+    });
+
+    it('should add flag v for target ES2024+', () => {
+      expect(toDetails('', {target: 'ES2024'}).flags).toBe('v');
+      expect(toDetails('', {target: 'ES2025'}).flags).toBe('v');
+    });
+
+    it('should add flag u for target ES2018', () => {
+      expect(toDetails('', {target: 'ES2018'}).flags).toBe('u');
+    });
+
+    it('should throw for unexpected targets', () => {
+      expect(() => toDetails('', {target: 'ES6'})).toThrow();
+      expect(() => toDetails('', {target: 'ES2019'})).toThrow();
+    });
+  });
+
+  describe('rules', () => {
+    describe('captureGroup', () => {
+      it('enables mixed unnamed and named capture', () => {
+        expect('aba').toExactlyMatch({
+          pattern: r`(a)(?b)\1`,
+          rules: {captureGroup: true},
+        });
+        expect('abb').toExactlyMatch({
+          pattern: r`(a)(?b)\2`,
+          rules: {captureGroup: true},
+        });
+        // Without `rules.captureGroup`
+        expect(() => toDetails(r`(a)(?b)\1`)).toThrow();
+      });
+
+      it('no multiplexing for numbered backrefs to named capture', () => {
+        expect('abb').toExactlyMatch({
+          pattern: r`(?a)(?b)\2`,
+          rules: {captureGroup: true},
+        });
+        expect('aba').not.toFindMatch({
+          pattern: r`(?a)(?b)\2`,
+          rules: {captureGroup: true},
+        });
+      });
+
+      it('multiplexing preserved for named backrefs', () => {
+        expect(['abcb', 'abcc']).toExactlyMatch({
+          pattern: r`(a)(?b)(?c)\k`,
+          rules: {captureGroup: true},
+        });
+        expect('abca').not.toFindMatch({
+          pattern: r`(a)(?b)(?c)\k`,
+          rules: {captureGroup: true},
+        });
+      });
+
+      it('backrefs rematch the most recent of a set with subroutines and unnamed capture', () => {
+        expect('abcc').toExactlyMatch({
+          pattern: r`(.)(?b)\g<1>\1`,
+          rules: {captureGroup: true},
+        });
+        expect('abca').not.toFindMatch({
+          pattern: r`(.)(?b)\g<1>\1`,
+          rules: {captureGroup: true},
+        });
+      });
+
+      it('backrefs rematch the most recent of a set with subroutines and named capture', () => {
+        expect('abcc').toExactlyMatch({
+          pattern: r`(a)(?.)\g<2>\2`,
+          rules: {captureGroup: true},
+        });
+        expect('abcb').not.toFindMatch({
+          pattern: r`(a)(?.)\g<2>\2`,
+          rules: {captureGroup: true},
+        });
+        expect('abcb').not.toFindMatch({
+          pattern: r`(a)(?.)\g<2>\k`,
+          rules: {captureGroup: true},
+        });
+      });
+
+      it('allows numbered subroutine refs to duplicate group names', () => {
+        expect(['abca', 'abcc']).toExactlyMatch({
+          pattern: r`(?.)(?.)\g<2>\k`,
+          rules: {captureGroup: true},
+        });
+        expect('abcb').not.toFindMatch({
+          pattern: r`(?.)(?.)\g<2>\k`,
+          rules: {captureGroup: true},
+        });
+        expect(['abcdc', 'abcdd']).toExactlyMatch({
+          pattern: r`(a)(?.)(?.)\g<2>\k`,
+          rules: {captureGroup: true},
+        });
+        expect('abcdb').not.toFindMatch({
+          pattern: r`(a)(?.)(?.)\g<2>\k`,
+          rules: {captureGroup: true},
+        });
+      });
+    });
+
+    // TODO: Add remaining
+  });
+
+  // TODO: Add remaining
+});
diff --git a/spec/todetails.spec.js b/spec/todetails.spec.js
index fc9d0e6..521bdd9 100644
--- a/spec/todetails.spec.js
+++ b/spec/todetails.spec.js
@@ -5,7 +5,7 @@ describe('toDetails', () => {
     expect(Object.keys(toDetails(''))).toEqual(['pattern', 'flags']);
   });
 
-  it('should throw for non-string pattern', () => {
+  it('should throw for non-string patterns', () => {
     expect(() => toDetails()).toThrow();
     for (const value of [undefined, null, 0, false, [], {}, /(?:)/]) {
       expect(() => toDetails(value)).toThrow();
@@ -15,38 +15,4 @@ describe('toDetails', () => {
   it('should return an empty pattern if given an empty string', () => {
     expect(toDetails('').pattern).toBe('');
   });
-
-  it('should accept and translate supported flags', () => {
-    expect(toDetails('', {flags: 'i'}).flags).toContain('i');
-    expect(toDetails('', {flags: 'm'}).flags).toContain('s');
-    expect(toDetails('', {flags: 'm'}).flags).not.toContain('m');
-    expect(toDetails('', {flags: 'x'}).flags).not.toContain('x');
-  });
-
-  it('should throw for unexpected flags', () => {
-    expect(() => toDetails('', {flags: 'd'})).toThrow();
-    expect(() => toDetails('', {flags: 'g'})).toThrow();
-    expect(() => toDetails('', {flags: 's'})).toThrow();
-    expect(() => toDetails('', {flags: 'u'})).toThrow();
-    expect(() => toDetails('', {flags: 'v'})).toThrow();
-    expect(() => toDetails('', {flags: 'y'})).toThrow();
-  });
-
-  it('should add flag v if target unspecified', () => {
-    expect(toDetails('').flags).toBe('v');
-  });
-
-  it('should add flag v for target ES2024+', () => {
-    expect(toDetails('', {target: 'ES2024'}).flags).toBe('v');
-    expect(toDetails('', {target: 'ES2025'}).flags).toBe('v');
-  });
-
-  it('should add flag u for target ES2018', () => {
-    expect(toDetails('', {target: 'ES2018'}).flags).toBe('u');
-  });
-
-  it('should throw for unexpected targets', () => {
-    expect(() => toDetails('', {target: 'ES6'})).toThrow();
-    expect(() => toDetails('', {target: 'ES2019'})).toThrow();
-  });
 });
diff --git a/src/index.js b/src/index.js
index 0be7a98..768a7e8 100644
--- a/src/index.js
+++ b/src/index.js
@@ -32,6 +32,7 @@ import {recursion} from 'regex-recursion';
     allowOrphanBackrefs?: boolean;
     allowUnhandledGAnchors?: boolean;
     asciiWordBoundaries?: boolean;
+    captureGroup?: boolean;
   };
   target?: keyof Target;
   verbose?: boolean;
@@ -50,7 +51,7 @@ Accepts an Oniguruma pattern and returns the details needed to construct an equi
 */
 function toDetails(pattern, options) {
   const opts = getOptions(options);
-  const tokenized = tokenize(pattern, opts.flags);
+  const tokenized = tokenize(pattern, opts.flags, {captureGroup: opts.rules.captureGroup});
   const onigurumaAst = parse(tokenized, {
     skipBackrefValidation: opts.rules.allowOrphanBackrefs,
     verbose: opts.verbose,
@@ -85,11 +86,16 @@ Returns an Oniguruma AST generated from an Oniguruma pattern.
 @param {string} pattern Oniguruma regex pattern.
 @param {{
   flags?: string;
+  rules?: {
+    captureGroup?: boolean;
+  };
 }} [options]
 @returns {import('./parse.js').OnigurumaAst}
 */
 function toOnigurumaAst(pattern, options) {
-  return parse(tokenize(pattern, options?.flags));
+  const flags = options?.flags ?? '';
+  const captureGroup = options?.rules?.captureGroup ?? false;
+  return parse(tokenize(pattern, flags, {captureGroup}));
 }
 
 /**
diff --git a/src/options.js b/src/options.js
index 51dd32b..429d739 100644
--- a/src/options.js
+++ b/src/options.js
@@ -57,8 +57,11 @@ function getOptions(options) {
       allowOrphanBackrefs: false,
       // Applies flag `y` for unsupported uses of `\G`, rather than erroring.
       allowUnhandledGAnchors: false,
-      // Use ASCII-based `\b` and `\B`, which increases performance.
+      // Use ASCII-based `\b` and `\B`, which increases search performance of generated regexes.
       asciiWordBoundaries: false,
+      // Oniguruma option `ONIG_OPTION_CAPTURE_GROUP`. Unnamed captures and numbered calls allowed
+      // when using named capture.
+      captureGroup: false,
       ...(options?.rules),
     },
   };
diff --git a/src/parse.js b/src/parse.js
index aaf1ffa..c1c47ed 100644
--- a/src/parse.js
+++ b/src/parse.js
@@ -64,7 +64,7 @@ const AstVariableLengthCharacterSetKinds = {
 }} [options]
 @returns {OnigurumaAst}
 */
-function parse({tokens, flags}, options) {
+function parse({tokens, flags, rules}, options) {
   const opts = {
     skipBackrefValidation: false,
     skipPropertyNameValidation: false,
@@ -135,7 +135,7 @@ function parse({tokens, flags}, options) {
   // `context` updated by preceding `walk` loop
   const {capturingGroups, hasNumberedRef, namedGroupsByName, subroutines} = context;
   // Validation that requires knowledge about the complete pattern
-  if (hasNumberedRef && namedGroupsByName.size) {
+  if (hasNumberedRef && namedGroupsByName.size && !rules.captureGroup) {
     throw new Error('Numbered backref/subroutine not allowed when using named capture');
   }
   for (const {ref} of subroutines) {
diff --git a/src/tokenize.js b/src/tokenize.js
index 5d67bb1..a06b413 100644
--- a/src/tokenize.js
+++ b/src/tokenize.js
@@ -125,22 +125,33 @@ const charClassTokenRe = new RegExp(r`
     extended: boolean;
     ignoreCase: boolean;
   };
+  rules: {
+    captureGroup: boolean;
+  };
 }} TokenizerResult
 */
 /**
-@param {string} pattern
-@param {string} [flags] Oniguruma flags. Flag `m` is equivalent to JS flag `s`.
+@param {string} pattern Oniguruma pattern.
+@param {string} [flags] Oniguruma flags.
+@param {{captureGroup?: boolean;}} [rules] Oniguruma compile-time options.
 @returns {TokenizerResult}
 */
-function tokenize(pattern, flags = '') {
+function tokenize(pattern, flags = '', rules) {
+  rules = {
+    // `ONIG_OPTION_CAPTURE_GROUP`
+    captureGroup: false,
+    ...rules,
+  };
   if (typeof pattern !== 'string') {
     throw new Error('String expected as pattern');
   }
   if (!/^[imxDSW]*$/.test(flags)) {
     throw new Error(`Flags "${flags}" includes unsupported value`);
   }
-  const xStack = [flags.includes('x')];
+  const extended = flags.includes('x');
+  const xStack = [extended];
   const context = {
+    captureGroup: rules.captureGroup,
     getCurrentModX: () => xStack.at(-1),
     numOpenGroups: 0,
     popModX() {xStack.pop()},
@@ -163,25 +174,24 @@ function tokenize(pattern, flags = '') {
   }
 
   const potentialUnnamedCaptureTokens = [];
-  let numNamedCaptures = 0;
+  let numNamedAndOptInUnnamedCaptures = 0;
   tokens.forEach(t => {
     if (t.type === TokenTypes.GroupOpen) {
       if (t.kind === TokenGroupKinds.capturing) {
-        numNamedCaptures++;
-        t.number = numNamedCaptures;
+        t.number = ++numNamedAndOptInUnnamedCaptures;
       } else if (t.raw === '(') {
         potentialUnnamedCaptureTokens.push(t);
       }
     }
   });
-  // Enable unnamed capturing groups if no named captures
-  if (!numNamedCaptures) {
+  // Enable unnamed capturing groups if no named captures (when `captureGroup` not enabled)
+  if (!numNamedAndOptInUnnamedCaptures) {
     potentialUnnamedCaptureTokens.forEach((t, i) => {
       t.kind = TokenGroupKinds.capturing;
       t.number = i + 1;
     });
   }
-  const numCaptures = numNamedCaptures || potentialUnnamedCaptureTokens.length;
+  const numCaptures = numNamedAndOptInUnnamedCaptures || potentialUnnamedCaptureTokens.length;
   // Can now split escaped nums accurately, accounting for number of captures
   tokens = tokens.map(
     t => t.type === TokenTypes.EscapedNumber ? splitEscapedNumToken(t, numCaptures) : t
@@ -195,12 +205,13 @@ function tokenize(pattern, flags = '') {
       // is equivalent to JS flag s
       dotAll: flags.includes('m'),
       // Flag x is fully handled during tokenization
-      extended: flags.includes('x'),
+      extended,
       // Flags D, S, W are currently only supported as top-level flags
       digitIsAscii: flags.includes('D'),
       spaceIsAscii: flags.includes('S'),
       wordIsAscii: flags.includes('W'),
     },
+    rules,
   };
 }
 
@@ -296,8 +307,9 @@ function getTokenWithDetails(context, pattern, m, lastIndex) {
     context.pushModX(context.getCurrentModX());
     context.numOpenGroups++;
     if (
-      // Unnamed capture if no named captures, else noncapturing group
-      m === '(' ||
+      // Unnamed capture if no named captures present and `captureGroup` not enabled, else
+      // noncapturing group
+      (m === '(' && !context.captureGroup) ||
       // Noncapturing group
       m === '(?:'
     ) {
@@ -325,14 +337,18 @@ function getTokenWithDetails(context, pattern, m, lastIndex) {
         }),
       };
     }
-    // Named capture (checked after lookbehind due to similar syntax)
-    if (m2 === '<' || m2 === "'") {
+    // Named capture (checked after lookbehind due to similar syntax), or unnamed capture when
+    // `captureGroup` enabled
+    if (m2 === '<' || m2 === "'" || (m === '(' && context.captureGroup)) {
+      const token = createToken(TokenTypes.GroupOpen, m, {
+        kind: TokenGroupKinds.capturing,
+        // Will add `number` in a second pass
+      });
+      if (m !== '(') {
+        token.name = m.slice(3, -1);
+      }
       return {
-        token: createToken(TokenTypes.GroupOpen, m, {
-          kind: TokenGroupKinds.capturing,
-          name: m.slice(3, -1),
-          // Will add `number` in a second pass
-        }),
+        token,
       }
     }
     if (m2 === '(') {
diff --git a/src/transform.js b/src/transform.js
index 72b0e23..ffeea14 100644
--- a/src/transform.js
+++ b/src/transform.js
@@ -161,7 +161,10 @@ const FirstPassVisitor = {
     if (name && !isValidGroupNameJs(name)) {
       throw new Error(`Group name "${name}" invalid in JS`);
     }
-    subroutineRefMap.set(name ?? number, node);
+    subroutineRefMap.set(number, node);
+    if (name) {
+      subroutineRefMap.set(name, node);
+    }
   },
 
   CharacterSet({node, replaceWith}, {accuracy, minTargetEs2024, digitIsAscii, spaceIsAscii, wordIsAscii}) {
@@ -361,7 +364,7 @@ const SecondPassVisitor = {
   Backreference({node}, {multiplexCapturesToLeftByRef, reffedNodesByReferencer}) {
     const {orphan, ref} = node;
     if (!orphan) {
-      // Copy the current state for later multiplexing expansion. It's done in a subsequent pass
+      // Copy the current state for later multiplexing expansion. That's done in a subsequent pass
       // because backref numbers need to be recalculated after subroutine expansion
       reffedNodesByReferencer.set(node, [...multiplexCapturesToLeftByRef.get(ref).map(({node}) => node)]);
     }
@@ -399,25 +402,27 @@ const SecondPassVisitor = {
     ) {
       // Has value if we're within a subroutine expansion
       const origin = groupOriginByCopy.get(node);
-      const ref = node.name ?? node.number;
 
       // ## Handle recursion; runs after subroutine expansion
-      if (origin && openRefs.has(ref)) {
+      if (origin && openRefs.has(node.number)) {
         // Recursion doesn't affect any following backrefs to its `ref` (unlike other subroutines),
         // so don't wrap with a capture. The reffed group might have its name removed due to later
         // subroutine expansion
-        const recursion = createRecursion(ref);
-        reffedNodesByReferencer.set(recursion, openRefs.get(ref));
+        const recursion = createRecursion(node.number);
+        reffedNodesByReferencer.set(recursion, openRefs.get(node.number));
         replaceWith(recursion);
         // This node's kids have been removed from the tree, so no need to traverse them
         skip();
         return;
       }
-      // Name or number; not mixed since can't use numbered subroutines with named capture
-      openRefs.set(ref, node);
+      openRefs.set(node.number, node);
 
       // ## Track data for backref multiplexing
-      const multiplexNodes = getOrCreate(multiplexCapturesToLeftByRef, ref, []);
+      multiplexCapturesToLeftByRef.set(node.number, []);
+      if (node.name) {
+        getOrCreate(multiplexCapturesToLeftByRef, node.name, []);
+      }
+      const multiplexNodes = multiplexCapturesToLeftByRef.get(node.name ?? node.number);
       for (let i = 0; i < multiplexNodes.length; i++) {
         // Captures added via subroutine expansion (maybe indirectly because they were descendant
         // captures of the reffed group or in a nested subroutine expansion) form a set with their
@@ -438,7 +443,10 @@ const SecondPassVisitor = {
           break;
         }
       }
-      multiplexNodes.push({node, origin});
+      multiplexCapturesToLeftByRef.get(node.number).push({node, origin});
+      if (node.name) {
+        multiplexCapturesToLeftByRef.get(node.name).push({node, origin});
+      }
 
       // ## Track data for duplicate names within an alternation path
       // Pre-ES2025 doesn't allow duplicate names, but ES2025+ allows duplicate names that are
@@ -459,7 +467,7 @@ const SecondPassVisitor = {
       }
     },
     exit({node}, {openRefs}) {
-      openRefs.delete(node.name ?? node.number);
+      openRefs.delete(node.number);
     },
   },
 
@@ -483,7 +491,7 @@ const SecondPassVisitor = {
     // Other forms of recursion are handled by the `CapturingGroup` visitor
     const isGlobalRecursion = ref === 0;
     const expandedSubroutine = isGlobalRecursion ?
-      createRecursion(ref) :
+      createRecursion(0) :
       // The reffed group might itself contain subroutines, which are expanded during sub-traversal
       cloneCapturingGroup(reffedGroupNode, state.groupOriginByCopy, null);
     let replacement = expandedSubroutine;
@@ -790,8 +798,11 @@ function isValidGroupNameJs(name) {
 
 // Returns a single node, either the given node or all nodes wrapped in a noncapturing group
 function parseFragment(pattern, options) {
-  const skipPropertyNameValidation = !!options?.skipPropertyNameValidation;
-  const ast = parse(tokenize(pattern), {skipPropertyNameValidation});
+  const opts = {
+    skipPropertyNameValidation: false,
+    ...options,
+  };
+  const ast = parse(tokenize(pattern), opts);
   const alts = ast.pattern.alternatives;
   if (alts.length > 1 || alts[0].elements.length > 1) {
     return adoptAndSwapKids(createGroup(), alts);