Skip to content

Commit

Permalink
Fix edge case for literal hyphen
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 3, 2024
1 parent b6da11e commit 8e5d94c
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 26 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -488,8 +488,9 @@ Notice that nearly every feature below has at least subtle differences from Java
<td align="middle">✅</td>
<td align="middle">✅</td>
<td>
✔ Unescaped <code>-</code> is literal char in some contexts (different than JS rules in any mode)<br>
✔ Unescaped <code>-</code> outside of range is literal in some contexts (different than JS rules in any mode)<br>
✔ Fewer chars require escaping than JS<br>
✔ Error for reversed range (same as JS)<br>
</td>
</tr>
<tr valign="top">
Expand Down
11 changes: 7 additions & 4 deletions spec/helpers/features.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const duplicateCaptureNamesSupported = (() => {
}
return true;
})();
const maxTargetForDuplicateNames = duplicateCaptureNamesSupported ? null : 'ES2024';
const maxTestTargetForDuplicateNames = duplicateCaptureNamesSupported ? null : 'ES2024';

const patternModsSupported = (() => {
try {
Expand All @@ -16,9 +16,12 @@ const patternModsSupported = (() => {
}
return true;
})();
const maxTargetForPatternMods = patternModsSupported ? null : 'ES2024';
const maxTestTargetForPatternMods = patternModsSupported ? null : 'ES2024';

const minTestTargetForFlagV = 'ES2024';

export {
maxTargetForDuplicateNames,
maxTargetForPatternMods,
maxTestTargetForDuplicateNames,
maxTestTargetForPatternMods,
minTestTargetForFlagV,
};
7 changes: 4 additions & 3 deletions spec/helpers/matchers.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ function getArgs(actual, expected) {
pattern: typeof expected === 'string' ? expected : expected.pattern,
flags: expected.flags ?? '',
maxTarget: expected.maxTarget ?? null,
minTarget: expected.minTarget ?? null,
};
const targets = ['ES2018', 'ES2024', 'ESNext'];
const targeted = opts.maxTarget ?
targets.filter(target => EsVersion[target] <= EsVersion[opts.maxTarget]) :
targets;
const targeted = targets.
filter(target => !opts.maxTarget || (EsVersion[target] <= EsVersion[opts.maxTarget])).
filter(target => !opts.minTarget || (EsVersion[target] >= EsVersion[opts.minTarget]));
return {
pattern: opts.pattern,
flags: opts.flags,
Expand Down
20 changes: 10 additions & 10 deletions spec/match-backreference.spec.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {compile} from '../dist/index.mjs';
import {cp, r} from '../src/utils.js';
import {maxTargetForDuplicateNames} from './helpers/features.js';
import {maxTestTargetForDuplicateNames} from './helpers/features.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
Expand Down Expand Up @@ -252,11 +252,11 @@ describe('Backreference', () => {
expect('').not.toFindMatch(r`(?<a>(?<a>\k<a>))`);
expect('aa').toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b\k<n>)`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'b', 'ba', 'bb']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b\k<n>)`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
});

Expand All @@ -265,11 +265,11 @@ describe('Backreference', () => {
expect('aba').toExactlyMatch(r`(?<n>a)(?<n>b\k<n>)`);
expect(['aa', 'bcb']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'bc', 'bca', 'bcc']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c\k<n>)`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
});

Expand Down Expand Up @@ -297,7 +297,7 @@ describe('Backreference', () => {
expect('aab').toExactlyMatch(r`(?<n>a)\k<n>(?<n>b)`);
expect('aa').toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
});

Expand Down Expand Up @@ -345,19 +345,19 @@ describe('Backreference', () => {
// rather than JS logic where they match the empty string
expect(['aa', 'bb']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)\k<n>`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'b', 'ba']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)\k<n>`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
expect(['aa', 'bcb', 'bcc']).toExactlyMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
expect(['a', 'bc', 'bca']).not.toFindMatch({
pattern: r`(?<n>a)\k<n>|(?<n>b)(?<n>c)\k<n>`,
maxTarget: maxTargetForDuplicateNames,
maxTarget: maxTestTargetForDuplicateNames,
});
});

Expand Down
1 change: 1 addition & 0 deletions spec/match-char-class-intersection.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ beforeEach(() => {
});

// TODO: Add me
// TODO: Test that it throws for target ES2018

describe('CharacterClassIntersection', () => {
it('should', () => {
Expand Down
62 changes: 58 additions & 4 deletions spec/match-char-class-range.spec.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,68 @@
import {r} from '../src/utils.js';
import {minTestTargetForFlagV} from './helpers/features.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
jasmine.addMatchers(matchers);
});

// TODO: Add me

describe('CharacterClassRange', () => {
it('should', () => {
expect('').toExactlyMatch(r``);
it('should match any char from range', () => {
expect(['a', 'b', 'c']).toExactlyMatch(r`[a-c]`);
expect('d').not.toFindMatch(r`[a-c]`);
});

it('should match unescaped hyphen as literal at start of class', () => {
expect('-').toExactlyMatch(r`[-a]`);
expect('-').toExactlyMatch(r`[-\w]`);
expect('-').not.toFindMatch(r`[^-a]`);
expect('-').toExactlyMatch(r`[^[^-a]]`);
expect('-').toExactlyMatch(r`[a[-b]]`);
expect('-').toExactlyMatch(r`[-[ab]]`);
});

it('should match unescaped hyphen as literal at end of class', () => {
expect('-').toExactlyMatch(r`[a-]`);
expect('-').toExactlyMatch(r`[\w-]`);
expect('-').toExactlyMatch(r`[a[b-]]`);
expect('-').toExactlyMatch(r`[a[bc]-]`);
});

it('should match unescaped hyphen as literal at intersection boundary', () => {
expect('-').toExactlyMatch({
pattern: r`[a-&&\p{Any}]`,
minTarget: minTestTargetForFlagV,
});
expect('-').toExactlyMatch({
pattern: r`[\w-&&\p{Any}]`,
minTarget: minTestTargetForFlagV,
});
expect('-').toExactlyMatch({
pattern: r`[\p{Any}&&-a]`,
minTarget: minTestTargetForFlagV,
});
expect('-').toExactlyMatch({
pattern: r`[\p{Any}&&-\w]`,
minTarget: minTestTargetForFlagV,
});
});

it('should match unescaped hyphen as literal at right of range', () => {
expect('-').toExactlyMatch(r`[a-z-0]`);
expect('-').toExactlyMatch(r`[a-z-\w]`);
expect('-').toExactlyMatch(r`[a-z-0-9]`);
});

it('should throw for reversed ranges', () => {
expect(() => compile(r`[z-a]`)).toThrow();
expect(() => compile(r`[\u{1}-\0]`)).toThrow();
});

it('should throw for range with set', () => {
expect(() => compile(r`[a-\w]`)).toThrow();
expect(() => compile(r`[\w-a]`)).toThrow();
expect(() => compile(r`[\w-a-z]`)).toThrow();
expect(() => compile(r`[a-z-\w]`)).toThrow();
expect(() => compile(r`[\w-\s]`)).toThrow();
});
});
1 change: 1 addition & 0 deletions spec/match-char-class.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,5 @@ describe('CharacterClass', () => {
});

// TODO: Add remaining
// TODO: Test that nested negated classes throw for target ES2018
});
4 changes: 2 additions & 2 deletions spec/match-char-set.spec.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {r} from '../src/utils.js';
import {maxTargetForPatternMods} from './helpers/features.js';
import {maxTestTargetForPatternMods} from './helpers/features.js';
import {matchers} from './helpers/matchers.js';

beforeEach(() => {
Expand All @@ -17,7 +17,7 @@ describe('CharacterSet', () => {
it('should match line feed with flag m disabled', () => {
expect('\n').toExactlyMatch({
pattern: r`(?-m)\O`,
maxTarget: maxTargetForPatternMods,
maxTarget: maxTestTargetForPatternMods,
});
});

Expand Down
4 changes: 3 additions & 1 deletion src/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ function parseCharacterClassHyphen(context, state) {
if (
prevSiblingNode &&
prevSiblingNode.type !== AstTypes.CharacterClass &&
prevSiblingNode.type !== AstTypes.CharacterClassRange &&
nextToken &&
nextToken.type !== TokenTypes.CharacterClassOpen &&
nextToken.type !== TokenTypes.CharacterClassClose &&
Expand Down Expand Up @@ -558,7 +559,8 @@ function createPattern() {
}

function createQuantifier(element, min, max, greedy, possessive) {
// TODO: Move validation to tokenizer?
// Could be checked in the tokenizer, but done here to parallel char class range validation and
// to prevent manually creating invalid quantifiers
if (max < min) {
throw new Error('Quantifier range out of order');
}
Expand Down
2 changes: 1 addition & 1 deletion src/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ function createTokenForSharedEscape(raw, {inCharClass}) {
}).decode(new Uint8Array(bytes));
const encoder = new TextEncoder();
const tokens = [...decoded].map(char => {
// Might have different casing for hex A-F than the input
// Since this regenerates `raw`, it might have different casing for hex A-F than the input
const raw = [...encoder.encode(char)].map(byte => `\\x${byte.toString(16)}`).join('');
return createToken(TokenTypes.Character, raw, {
value: char.codePointAt(0),
Expand Down

0 comments on commit 8e5d94c

Please sign in to comment.