Skip to content

Commit

Permalink
Add \O, \N
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 2, 2024
1 parent a21f0cc commit 56c3b48
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 28 deletions.
24 changes: 22 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ Notice that nearly every feature below has at least subtle differences from Java
</tr>

<tr valign="top">
<th align="left" rowspan="5">Character sets</th>
<th align="left" rowspan="7">Character sets</th>
<td>Digit, word</td>
<td><code>\d</code>, <code>\w</code>, etc.</td>
<td align="middle">✅</td>
Expand Down Expand Up @@ -417,6 +417,26 @@ Notice that nearly every feature below has at least subtle differences from Java
✔ Excludes only <code>\n</code> (unlike JS)<br>
</td>
</tr>
<tr valign="top">
<td>Any</td>
<td><code>\O</code></td>
<td align="middle">✅</td>
<td align="middle">✅</td>
<td>
✔ Any char (any flags)<br>
✔ Identity escape in char class<br>
</td>
</tr>
<tr valign="top">
<td>Non-newline</td>
<td><code>\N</code></td>
<td align="middle">✅</td>
<td align="middle">✅</td>
<td>
✔ Excludes only <code>\n</code> (any flags)<br>
✔ Identity escape in char class<br>
</td>
</tr>
<tr valign="top">
<td>Unicode property</td>
<td>
Expand Down Expand Up @@ -860,7 +880,7 @@ Notice that nearly every feature below has at least subtle differences from Java
<td align="middle">✅</td>
<td>
✔ <code>\u{…}</code> is an error<br>
✔ <code>[\q{…}]</code> matches one of <code>q</code>, <code>{</code>, etc.<br>
✔ <code>[\q{…}]</code> matches <code>q</code>, etc.<br>
✔ <code>[a--b]</code> includes the invalid reversed range <code>a</code> to <code>-</code><br>
</td>
</tr>
Expand Down
16 changes: 8 additions & 8 deletions dist/index.min.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions scripts/onig-compare.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ compare([
[r`\N`, `\r`],
[r`[\N]`, `\r`],
[r`[\N]`, `N`],
[r`\o`, `o`],
[r`[\o]`, `o`],
[r`\o{1}`, `\u{1}`],
[r`[\o{1}]`, `\u{1}`],
[r`\O`, `\n`],
[r`[\O]`, `\n`],
[r`\O`, `\r`],
[r`[\O]`, `\r`],
[r`[\O]`, `O`],
[r`\o`, `o`],
[r`[\o]`, `o`],
[r`\o{1}`, `\u{1}`],
[r`[\o{1}]`, `\u{1}`],
[r`\p`, `p`],
[r`[\p]`, `p`],
[r`\p{`, `p{`],
Expand Down
22 changes: 11 additions & 11 deletions scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,30 +70,30 @@ function getMatchDetails(match) {
}

/**
@type {MatchDetailsFn<Promise<MatchDetails>>}
@type {MatchDetailsFn}
*/
const onigurumaResult = async (pattern, str, pos) => {
const transpiledRegExpResult = (pattern, str, pos) => {
let result;
try {
result = await onigurumaExec(pattern, str, pos);
const options = pos ? {global: true} : undefined;
const re = toRegExp(pattern, '', options);
if (pos) {
re.lastIndex = pos;
}
result = re.exec(str);
} catch (err) {
result = err;
}
return getMatchDetails(result);
};

/**
@type {MatchDetailsFn}
@type {MatchDetailsFn<Promise<MatchDetails>>}
*/
const transpiledRegExpResult = (pattern, str, pos) => {
const onigurumaResult = async (pattern, str, pos) => {
let result;
try {
const options = pos ? {global: true} : undefined;
const re = toRegExp(pattern, '', options);
if (pos) {
re.lastIndex = pos;
}
result = re.exec(str);
result = await onigurumaExec(pattern, str, pos);
} catch (err) {
result = err;
}
Expand Down
41 changes: 41 additions & 0 deletions spec/match-char-set.spec.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import {r} from '../src/utils.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
Expand All @@ -6,6 +7,23 @@ beforeEach(() => {

describe('CharacterSet', () => {
describe('any', () => {
it('should match any character', () => {
expect([
'\0', '\n', '\r', 'a', '\x85', '\u2028', '\u2029', '\u{10000}', '\u{10FFFF}',
]).toExactlyMatch(r`\O`);
});

it('should match line feed with flag m disabled', () => {
expect('\n').toExactlyMatch(r`(?-m)\O`);
});

it('should be identity escape within a char class', () => {
expect('O').toExactlyMatch(r`[\O]`);
expect('a').not.toFindMatch(r`[\O]`);
});
});

describe('dot', () => {
it('should match any character except line feed', () => {
expect('\n').not.toFindMatch('.');
expect([
Expand All @@ -16,6 +34,29 @@ describe('CharacterSet', () => {
it('should match line feed with flag m enabled', () => {
expect('\n').toExactlyMatch({pattern: '.', flags: 'm'});
});

it('should be literal within a char class', () => {
expect('.').toExactlyMatch('[.]');
expect('a').not.toFindMatch('[.]');
});
});

describe('non_newline', () => {
it('should match any character except line feed', () => {
expect('\n').not.toFindMatch('.');
expect([
'\0', '\r', 'a', '\x85', '\u2028', '\u2029', '\u{10000}', '\u{10FFFF}',
]).toExactlyMatch(r`\N`);
});

it('should not match line feed with flag m enabled', () => {
expect('\n').not.toFindMatch({pattern: r`\N`, flags: 'm'});
});

it('should be identity escape within a char class', () => {
expect('N').toExactlyMatch(r`[\N]`);
expect('a').not.toFindMatch(r`[\N]`);
});
});

// TODO: Add remaining
Expand Down
2 changes: 1 addition & 1 deletion src/generate.js
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ function genCharacterClassRange(node, state) {
}

function genCharacterSet({kind, negate, value, key}, state) {
if (kind === AstCharacterSetKinds.any) {
if (kind === AstCharacterSetKinds.dot) {
return state.currentFlags.dotAll ?
((state.appliedGlobalFlags.dotAll || state.useFlagMods) ? '.' : '[^]') :
// Onig's only line break char is line feed, unlike JS
Expand Down
18 changes: 17 additions & 1 deletion src/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ const TokenTypes = /** @type {const} */ ({
const TokenCharacterSetKinds = {
any: 'any',
digit: 'digit',
dot: 'dot',
hex: 'hex',
non_newline: 'non_newline',
posix: 'posix',
property: 'property',
space: 'space',
Expand Down Expand Up @@ -228,6 +230,20 @@ function getTokenWithDetails(context, pattern, m, lastIndex) {
}),
};
}
if (m1 === 'N') {
return {
token: createToken(TokenTypes.CharacterSet, m, {
kind: TokenCharacterSetKinds.non_newline,
}),
};
}
if (m1 === 'O') {
return {
token: createToken(TokenTypes.CharacterSet, m, {
kind: TokenCharacterSetKinds.any,
}),
};
}
if ('RX'.includes(m1)) {
return {
token: createToken(TokenTypes.VariableLengthCharacterSet, m, {
Expand Down Expand Up @@ -335,7 +351,7 @@ function getTokenWithDetails(context, pattern, m, lastIndex) {
if (m === '.') {
return {
token: createToken(TokenTypes.CharacterSet, m, {
kind: TokenCharacterSetKinds.any,
kind: TokenCharacterSetKinds.dot,
}),
};
}
Expand Down
6 changes: 5 additions & 1 deletion src/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,12 @@ const FirstPassVisitor = {

CharacterSet({node, replaceWith}, {allowBestEffort, minTargetEs2024}) {
const {kind, negate, value} = node;
if (kind === AstCharacterSetKinds.hex) {
if (kind === AstCharacterSetKinds.any) {
replaceWith(createUnicodeProperty('Any'));
} else if (kind === AstCharacterSetKinds.hex) {
replaceWith(createUnicodeProperty('AHex', {negate}));
} else if (kind === AstCharacterSetKinds.non_newline) {
replaceWith(parseFragment(r`[^\n]`));
} else if (kind === AstCharacterSetKinds.posix) {
if (!minTargetEs2024 && (value === 'graph' || value === 'print')) {
if (!allowBestEffort) {
Expand Down

0 comments on commit 56c3b48

Please sign in to comment.