diff --git a/README.md b/README.md index 15ed496..949c2bf 100644 --- a/README.md +++ b/README.md @@ -55,51 +55,76 @@ In browsers: ## 🔑 API -### `toDetails` +### `toRegExp` -Transpiles an Oniguruma pattern to the parts needed to construct a native JavaScript `RegExp`. +Transpiles an Oniguruma pattern and returns a native JavaScript `RegExp`. + +> [!TIP] +> Try it in the [demo REPL](https://slevithan.github.io/oniguruma-to-es/demo/). ```ts -function toDetails( +function toRegExp( pattern: string, - options?: CompileOptions -): { - pattern: string; - flags: string; -}; + options?: Options +): RegExp; ``` -The returned `pattern` and `flags` can be provided directly to the JavaScript `RegExp` constructor. Various JavaScript flags might have been added or removed compared to the Oniguruma flags provided, as part of the emulation process. - -#### Type `CompileOptions` +#### Type `Options` ```ts -type CompileOptions = { +type Options = { accuracy?: 'strict' | 'default' | 'loose'; - flags?: OnigurumaFlags, + avoidSubclass?: boolean; + flags?: OnigurumaFlags; global?: boolean; hasIndices?: boolean; maxRecursionDepth?: number | null; target?: 'ES2018' | 'ES2024' | 'ESNext'; + tmGrammar?: boolean; verbose?: boolean; }; ``` See [Options](#-options) for more details. -### `toRegExp` +### `toDetails` -Transpiles an Oniguruma pattern and returns a native JavaScript `RegExp`. +Transpiles an Oniguruma pattern to the parts needed to construct a native JavaScript `RegExp`. ```ts -function toRegExp( +function toDetails( pattern: string, - options?: CompileOptions & {avoidSubclass?: boolean} -): RegExp; + options?: Options +): { + pattern: string; + flags: string; + strategy?: { + name: string; + subpattern?: string; + }; +}; ``` -> [!TIP] -> Try it in the [demo REPL](https://slevithan.github.io/oniguruma-to-es/demo/). +The returned `pattern` and `flags` might be different than those provided, as a result of the emulation process. The returned `pattern`, `flags`, and `strategy` can be provided as arguments to the `EmulatedRegExp` constructor to produce the same result as `toRegExp`. + +If the only keys returned are `pattern` and `flags`, they can optionally be provided to JavaScript's `RegExp` constructor instead. Setting option `avoidSubclass` to `true` ensures that this is always the case, and any patterns that are emulatable only via `EmulatedRegExp` throw an error. + +### `EmulatedRegExp` + +Can be provided results from `toDetails` to produce the same result as `toRegExp`. + +```ts +class EmulatedRegExp extends RegExp { + constructor( + pattern: string | EmulatedRegExp, + flags?: string, + strategy?: { + name: string; + subpattern?: string; + } + ); +}; +``` ### `toOnigurumaAst` @@ -157,6 +182,12 @@ Supports all features of `default`, plus the following: - Oniguruma-To-ES uses a variety of strategies to accurately emulate many common uses of `\G`. When using `loose` accuracy, if a `\G` assertion is found that doesn't have a known emulation strategy, the `\G` is simply removed and JavaScript's `y` (`sticky`) flag is added. This might lead to some false positives and negatives. +### `avoidSubclass` + +*Default: `false`.* + +Prevents use of advanced emulation strategies that rely on returning a `RegExp` subclass, resulting in certain patterns not being emulatable. + ### `flags` Oniguruma flags; a string with `i`, `m`, and `x` in any order (all optional). @@ -214,6 +245,12 @@ Sets the JavaScript language version for generated patterns and flags. Later tar - Generated regexes might use features that require Node.js 23 or a 2024-era browser (except Safari, which lacks support). +### `tmGrammar` + +*Default: `false`.* + +Leave disabled unless the regex will be used in a TextMate grammar processor that merges backreferences across `begin` and `end` patterns. + ### `verbose` *Default: `false`.* diff --git a/demo/demo.js b/demo/demo.js index 017200f..62b93e8 100644 --- a/demo/demo.js +++ b/demo/demo.js @@ -60,7 +60,7 @@ function escapeHtml(str) { } function getFormattedSubclass(pattern, flags, {name, subpattern}) { - return `new WrappedRegExp('${ + return `new EmulatedRegExp('${ pattern.replace(/\\/g, '\\\\').replace(/'/g, "\\'") }', '${ flags diff --git a/src/generate.js b/src/generate.js index da5c58f..8e952e3 100644 --- a/src/generate.js +++ b/src/generate.js @@ -7,7 +7,7 @@ import {cp, getNewCurrentFlags, isMinTarget, r} from './utils.js'; /** Generates a `regex`-compatible `pattern`, `flags`, and `options` from a `regex` AST. @param {import('./transform.js').RegexAst} ast -@param {import('.').CompileOptions} [options] +@param {import('.').Options} [options] @returns {{ pattern: string; flags: string; diff --git a/src/index.js b/src/index.js index 7813b18..16f9461 100644 --- a/src/index.js +++ b/src/index.js @@ -30,20 +30,20 @@ import {recursion} from 'regex-recursion'; target?: keyof Target; tmGrammar?: boolean; verbose?: boolean; -}} CompileOptions +}} Options */ /** Transpiles an Oniguruma pattern to the parts needed to construct a native JavaScript `RegExp`. @param {string} pattern Oniguruma regex pattern. -@param {CompileOptions} [options] +@param {Options} [options] @returns {{ pattern: string; flags: string; strategy?: { name: string; subpattern?: string; - } + }; }} */ function toDetails(pattern, options) { @@ -98,34 +98,35 @@ function toOnigurumaAst(pattern, options) { /** Transpiles an Oniguruma pattern and returns a native JavaScript `RegExp`. @param {string} pattern Oniguruma regex pattern. -@param {CompileOptions} [options] +@param {Options} [options] @returns {RegExp} */ function toRegExp(pattern, options) { const result = toDetails(pattern, options); if (result.strategy) { - return new WrappedRegExp(result.pattern, result.flags, result.strategy); + return new EmulatedRegExp(result.pattern, result.flags, result.strategy); } return new RegExp(result.pattern, result.flags); } -class WrappedRegExp extends RegExp { +/** +@class +@param {string | EmulatedRegExp} pattern +@param {string} [flags] +@param {{ + name: string; + subpattern?: string; +}} [strategy] +*/ +class EmulatedRegExp extends RegExp { #strategy; - /** - @param {string | WrappedRegExp} pattern - @param {string} [flags] - @param {{ - name: string; - subpattern?: string; - }} [strategy] - */ constructor(pattern, flags, strategy) { super(pattern, flags); if (strategy) { this.#strategy = strategy; // The third argument isn't provided when regexes are copied as part of the internal handling // of string methods `matchAll` and `split` - } else if (pattern instanceof WrappedRegExp) { + } else if (pattern instanceof EmulatedRegExp) { // Can read private properties of the existing object since it was created by this class this.#strategy = pattern.#strategy; } @@ -196,6 +197,7 @@ class WrappedRegExp extends RegExp { } export { + EmulatedRegExp, toDetails, toOnigurumaAst, toRegExp, diff --git a/src/options.js b/src/options.js index 9a3bd97..74e8e6a 100644 --- a/src/options.js +++ b/src/options.js @@ -18,8 +18,8 @@ const Target = /** @type {const} */ ({ /** Returns a complete set of options, with default values set for options that weren't provided. -@param {import('.').CompileOptions} [options] -@returns {Required} +@param {import('.').Options} [options] +@returns {Required} */ function getOptions(options) { if (options?.target !== undefined && !EsVersion[options.target]) { @@ -27,29 +27,29 @@ function getOptions(options) { } // Set default values return { - // Sets the level of emulation rigor/strictness + // Sets the level of emulation rigor/strictness. accuracy: 'default', // Prevents use of advanced emulation strategies that rely on returning a `RegExp` subclass, - // resulting in certain patterns not being emulatable + // resulting in certain patterns not being emulatable. avoidSubclass: false, // Oniguruma flags; a string with `i`, `m`, and `x` in any order (all optional). Oniguruma's - // `m` is equivalent to JavaScript's `s` (`dotAll`) + // `m` is equivalent to JavaScript's `s` (`dotAll`). flags: '', - // Include JavaScript flag `g` in the result + // Include JavaScript flag `g` (`global`) in the result. global: false, - // Include JavaScript flag `d` in the result + // Include JavaScript flag `d` (`hasIndices`) in the result. hasIndices: false, // Specifies the recursion depth limit. Supported values are integers `2` to `100` and `null`. - // If `null`, any use of recursion results in an error + // If `null`, any use of recursion results in an error. maxRecursionDepth: 6, - // Disables optimizations that simplify the pattern when it doesn't change the meaning - verbose: false, // Sets the JavaScript language version for generated patterns and flags. Later targets allow - // faster processing, simpler generated source, and support for additional features + // faster processing, simpler generated source, and support for additional features. target: 'ES2024', // Leave disabled unless the regex will be used in a TextMate grammar processor that merges - // backreferences across `begin` and `end` patterns + // backreferences across `begin` and `end` patterns. tmGrammar: false, + // Disables optimizations that simplify the pattern when it doesn't change the meaning. + verbose: false, ...options, }; } diff --git a/src/transform.js b/src/transform.js index 997663b..4db00f9 100644 --- a/src/transform.js +++ b/src/transform.js @@ -577,7 +577,7 @@ function adoptAndSwapKids(parent, kids) { } function applySubclassStrategies(ast, accuracy) { - // Special case handling that requires coupling with a `RegExp` subclass (see `WrappedRegExp`). + // Special case handling that requires coupling with a `RegExp` subclass (see `EmulatedRegExp`). // These changes add emulation support for some common patterns that are otherwise unsupportable. // Only one subclass strategy is supported per pattern const alts = ast.pattern.alternatives;