Skip to content

Commit

Permalink
Scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 2, 2024
1 parent b133a78 commit 2901bfe
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 29 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ A lightweight **Oniguruma to JavaScript RegExp transpiler** that runs in the bro

Compared to running the actual [Oniguruma](https://github.com/kkos/oniguruma) C library in JavaScript via WASM bindings (e.g. via [vscode-oniguruma](https://github.com/microsoft/vscode-oniguruma)), this library is **much lighter weight** and its regexes **run much faster** since they run as native JavaScript.

> [!WARNING]
> This library is currently in alpha and has known bugs.
### [Try the demo REPL](https://slevithan.github.io/oniguruma-to-es/demo/)

Oniguruma-To-ES deeply understands all of the hundreds of large and small differences in Oniguruma and JavaScript regex syntax and behavior across multiple JavaScript version targets. It's *obsessive* about precisely following Oniguruma syntax rules and ensuring that the emulated features it supports have **exactly the same behavior**, even in extreme edge cases. A few uncommon features can't be perfectly emulated and allow rare differences, but if you don't want to allow this, you can disable the `allowBestEffort` option to throw for such patterns (see details below).
Expand Down Expand Up @@ -208,9 +211,9 @@ Sets the JavaScript language version for generated patterns and flags. Later tar

Following are the supported features by target.

> Targets `ES2024` and `ESNext` have the same emulation capabilities. Resulting regexes might differ, but not in the strings they match.
> Targets `ES2024` and `ESNext` have the same emulation capabilities. Resulting regexes might have different source and flags, but they match the same strings.
Notice that nearly every feature below has at least subtle differences from JavaScript. Some features and subfeatures listed as unsupported are not emulatable using native JavaScript regexes, but others might get support in future versions of Oniguruma-To-ES. Unsupported features throw an error.
Notice that nearly every feature below has at least subtle differences from JavaScript. Some features and subfeatures listed as unsupported are not emulatable using native JavaScript regexes, but support for others might be added in future versions of Oniguruma-To-ES. Unsupported features throw an error.

<table>
<tr>
Expand Down
36 changes: 14 additions & 22 deletions scripts/onig-compare.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {r} from '../src/utils.js';
import {areMatchDetailsEqual, color, err, ok, onigurumaResult, transpiledRegExpResult} from './utils.js';
import {areMatchDetailsEqual, color, err, ok, onigurumaResult, transpiledRegExpResult, value} from './utils.js';

// Help with improving this script or comparing with Oniguruma automaticlly in Jasmine specs would
// be very welcome
Expand All @@ -10,7 +10,7 @@ compare([
[r`\000`, `\0`],
[r`\0000`, `\u{0}0`],
[r`\c`, r`\c`],
[r`\O`, `\n`], // Ucase o
[r`\O`, `\n`], // Capital O
[r`\p`, r`\p`],
[r`\p{`, r`\p{`],
[r`\u`, r`\u`],
Expand All @@ -23,6 +23,18 @@ compare([
[r`\x1`, `\x01`],
[r`\x7F`, `\x7F`],
[r`\x80`, `\x80`],
[r`\x{`, r`\x{`],
[r`\x{1}`, `\x01`],
[r`\x{00000001}`, `\x10`], // 8 hex digits
[r`\x{000000001}`, `\x10`], // 9 hex digits
[r`\x{10FFFF}`, `\u{10FFFF}`],
[r`\x{0010FFFF}`, `\u{10FFFF}`], // 8 hex digits
[r`\x{00010FFFF}`, `\u{10FFFF}`], // 9 hex digits
[r`\x{110000}`, ``],
[r`\x{13FFFF}`, ``],
[r`\x{0013FFFF}`, ``], // 8 hex digits
[r`\x{00013FFFF}`, ``], // 9 hex digits
[r`\x{140000}`, ``],
]);

async function compare(tests) {
Expand Down Expand Up @@ -61,23 +73,3 @@ async function compare(tests) {
numDiff &&= `${color('red', numDiff)}`;
console.log(`\nFinished: ${numSame} same, ${numDiff} different`);
}

function value(value) {
if (value === null) {
return color('gray', value);
}
if (typeof value === 'number') {
return color('blue', value);
}
if (typeof value === 'string') {
return color('cyan', `"${esc(value)}"`);
}
return String(value);
}

function esc(str) {
return str.
replace(/\n/g, '\\n').
replace(/\r/g, '\\r').
replace(/\0/g, '\\0');
}
19 changes: 14 additions & 5 deletions scripts/onig-match.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import {areMatchDetailsEqual, err, ok, onigurumaResult, transpiledRegExpResult} from "./utils.js";
import {r} from "../src/utils.js";
import {areMatchDetailsEqual, color, err, ok, onigurumaResult, transpiledRegExpResult, value} from "./utils.js";

exec(process.argv.slice(2));

// Basic Oniguruma console-based tester that also does a comparison with Oniguruma-to-ES
// Basic Oniguruma tester for the console that also reports a comparison with Oniguruma-to-ES
async function exec([pattern, str]) {
if (!(typeof pattern === 'string' && typeof str === 'string')) {
err(null, 'pattern and str args expected');
return;
}
// [Hack] Replace unescaped `\u{...}` with code point value
str = str.replace(
/\\u\{([^\}]+)\}|\\?./gsu,
(m, code) => m.startsWith(r`\u{`) ? String.fromCodePoint(parseInt(code, 16)) : m
);

const libMatches = [];
let libMatch = transpiledRegExpResult(pattern, str, 0);
Expand All @@ -22,12 +28,15 @@ async function exec([pattern, str]) {
onigMatch = await onigurumaResult(pattern, str, onigMatch.index + onigMatch.result.length);
}

console.log('Pattern:', pattern);
console.log('String:', str);
console.log('Pattern:', color('yellow', pattern));
console.log('String:', `${value(str)} ${color('gray', `(len ${str.length})`)}`);
if (onigMatch.error) {
err(null, `Oniguruma error: ${onigMatch.error.message}`);
} else {
console.log('Oniguruma results:', onigMatches);
const result = !onigMatches.length ?
`${color('gray', 'No match')}` :
(onigMatches.length > 1 ? onigMatches : onigMatches[0]);
console.log(`Oniguruma results (${onigMatches.length}):`, result);
}
if (!!libMatch.error !== !!onigMatch.error) {
err(null, `Oniguruma and library results differed (only ${libMatch.error ? 'library' : 'Oniguruma'} threw error)`);
Expand Down
22 changes: 22 additions & 0 deletions scripts/utils.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {toRegExp} from '../dist/index.mjs';
import {r} from '../src/utils.js';
import {readFileSync} from 'node:fs';
// vscode-oniguruma 2.0.1 uses Oniguruma 6.9.8
import oniguruma from 'vscode-oniguruma';
Expand Down Expand Up @@ -129,11 +130,32 @@ function areMatchDetailsEqual(a, b) {
return !(a.index !== b.index || a.result !== b.result || !!a.error !== !!b.error);
}

function value(value) {
if (value === null) {
return color('gray', value);
}
if (typeof value === 'number') {
return color('blue', value);
}
if (typeof value === 'string') {
return color('cyan', `"${esc(value, ansi.cyan)}"`);
}
return String(value);
}

function esc(str, restoreEsc = '') {
return str.
replace(/\n/g, `${color('gray', r`\n`)}${restoreEsc}`).
replace(/\r/g, `${color('gray', r`\r`)}${restoreEsc}`).
replace(/\0/g, `${color('gray', r`\0`)}${restoreEsc}`);
}

export {
areMatchDetailsEqual,
color,
err,
ok,
onigurumaResult,
transpiledRegExpResult,
value,
};

0 comments on commit 2901bfe

Please sign in to comment.