Skip to content

Commit

Permalink
Implements IRI validation using RFC 3987 grammar
Browse files Browse the repository at this point in the history
  • Loading branch information
Tpt authored and rubensworks committed Aug 12, 2022
1 parent b2e327a commit 25da3ed
Show file tree
Hide file tree
Showing 3 changed files with 265 additions and 5 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ This package also works out-of-the-box in browsers via tools such as [webpack](h

## Usage

TODO
```
import { validateIri } from 'validate-iri`
const yourIri = 'https://example.com/john-doe'
validateIri(yourIri) // Will throw an error if the IRI is invalid.
```

## License

Expand Down
68 changes: 65 additions & 3 deletions lib/Validate.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,70 @@
function buildAbsoluteIriRegex(): RegExp {
// The syntax is defined in https://www.rfc-editor.org/rfc/rfc3987#section-2.2
// Rules are defined in reversed order

const sub_delims_raw = `!$&'()*+,;=`;
const sub_delims = `[${sub_delims_raw}]`;

const pct_encoded = `%[a-fA-F0-9]{2}`;

const dec_octet = '([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])';

const ipv4address = `${dec_octet}\\.${dec_octet}\\.${dec_octet}\\.${dec_octet}`;

const h16 = `[a-fA-F0-9]{1,4}`;
const ls32 = `(${h16}:${h16}|${ipv4address})`;
const ipv6address = `((${h16}:){6}${ls32}|::(${h16}:){5}${ls32}|(${h16})?::(${h16}:){4}${ls32}|((${h16}:){0,1}${h16})?::(${h16}:){3}${ls32}|((${h16}:){0,2}${h16})?::(${h16}:){2}${ls32}|((${h16}:){0,3}${h16})?::${h16}:${ls32}|((${h16}:){0,4}${h16})?::${ls32}|((${h16}:){0,5}${h16})?::${h16}|((${h16}:){0,6}${h16})?::)`;

const ipvfuture = `v[a-fA-F0-9]+\\.(${sub_delims}|${sub_delims}|":)+`;

const ip_literal = `\\[(${ipv6address}|${ipvfuture})\\]`;

const port = `[0-9]*`;

const scheme = `[a-zA-Z][a-zA-Z0-9+\\-.]*`;

const iprivate_raw = `\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}`;
const iprivate = `[${iprivate_raw}]`;

const ucschar_raw = `\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}`;

const iunreserved_raw = `a-zA-Z0-9\\-._~${ucschar_raw}`;
const iunreserved = `[${iunreserved_raw}]`;

const ipchar = `(${iunreserved}|${pct_encoded}|${sub_delims}|[:@])*`;

const ifragment = `(${ipchar}|[\\/?])*`;

const iquery = `(${ipchar}|${iprivate}|[\\/?])*`;

const isegment_nz = `(${ipchar})+`;
const isegment = `(${ipchar})*`;

const ipath_empty = '';
const ipath_rootless = `${isegment_nz}(\\/${isegment})*`;
const ipath_absolute = `\\/(${isegment_nz}(\\/${isegment})*)?`;
const ipath_abempty = `(\\/${isegment})*`;

const ireg_name = `(${iunreserved}|${pct_encoded}|${sub_delims})*`;

const ihost = `(${ip_literal}|${ipv4address}|${ireg_name})`;
const iuserinfo = `(${iunreserved}|${pct_encoded}|${sub_delims}|:)*`;
const iauthority = `(${iuserinfo}@)?${ihost}(:${port})?`;

const ihier_part = `(\\/\\/${iauthority}${ipath_abempty}|${ipath_absolute}|${ipath_rootless}|${ipath_empty})`;

const iri = `^${scheme}:${ihier_part}(\\?${iquery})?(#${ifragment})?$`;

return new RegExp(iri, 'u');
}

const IRI_REGEX: RegExp = buildAbsoluteIriRegex();

/**
* Validate a given IRI.
* @param {string} a string that may be an IRI.
* Validate a given IRI according to RFC 3987.
* @param {string} iri a string that may be an IRI.
* @return {Error | undefined} An error if the IRI is invalid, or undefined if it is valid.
*/
export function validateIri(iri: string): Error | undefined {
return new Error('validateIri has not been implemented yet');
return IRI_REGEX.test(iri) ? undefined : new Error(`Invalid IRI according to RFC 3987: '${iri}'`);
}
196 changes: 195 additions & 1 deletion test/Validate-test.ts
Original file line number Diff line number Diff line change
@@ -1 +1,195 @@
// TODO
import { validateIri } from '../lib/Validate';

const VALID_ABSOLUTE_IRIS = [
'file://foo',
'ftp://ftp.is.co.za/rfc/rfc1808.txt',
'http://www.ietf.org/rfc/rfc2396.txt',
'ldap://[2001:db8::7]/c=GB?objectClass?one',
'mailto:[email protected]',
'news:comp.infosystems.www.servers.unix',
'tel:+1-816-555-1212',
'telnet://192.0.2.16:80/',
'urn:oasis:names:specification:docbook:dtd:xml:4.1.2',
'http://example.com',
'http://example.com/',
'http://example.com/foo',
'http://example.com/foo/bar',
'http://example.com/foo/bar/',
'http://example.com/foo/bar?q=1&r=2',
'http://example.com/foo/bar/?q=1&r=2',
'http://example.com#toto',
'http://example.com/#toto',
'http://example.com/foo#toto',
'http://example.com/foo/bar#toto',
'http://example.com/foo/bar/#toto',
'http://example.com/foo/bar?q=1&r=2#toto',
'http://example.com/foo/bar/?q=1&r=2#toto',
'http://example.com/foo/bar/.././baz',
'http://a.example/AZaz\u{00C0}\u{00D6}\u{00D8}\u{00F6}\u{00F8}\u{02FF}\u{0370}\u{037D}\u{037F}\u{1FFF}',
'http://a.example/\u{200C}\u{200D}\u{2070}\u{218F}\u{2C00}\u{2FEF}\u{3001}\u{D7FF}\u{FA0E}\u{FDCF}',
'http://a.example/\u{FDF0}\u{FFEF}\u{10000}\u{EFFFD}',
'http://a.example/?AZaz\u{E000}\u{F8FF}\u{F0000}\u{FFFFD}\u{100000}\u{10FFFD}\u{00C0}\u{00D6}\u{00D8}',
'http://a.example/\u{00F6}\u{00F8}\u{02FF}\u{0370}\u{037D}\u{037F}\u{1FFF}\u{200C}\u{200D}\u{2070}\u{218F}\u{2C00}',
'http://a.example/\u{2FEF}\u{3001}\u{D7FF}\u{FA0E}\u{FDCF}\u{FDF0}\u{FFEF}\u{10000}\u{EFFFD}',
// From https://sourceforge.net/projects/foursuite/ under Apache License
'file:///foo/bar',
'mailto:user@host?subject=blah',
'dav:', // Empty opaque part / rel-path allowed by RFC 2396bis
'about:', // Empty opaque part / rel-path allowed by RFC 2396bis
// the following test cases are from a Perl script by David A. Wheeler
// at http://www.dwheeler.com/secure-programs/url.pl
'http://www.yahoo.com',
'http://www.yahoo.com/',
'http://1.2.3.4/',
'http://www.yahoo.com/stuff',
'http://www.yahoo.com/stuff/',
'http://www.yahoo.com/hello%20world/',
'http://www.yahoo.com?name=obi',
'http://www.yahoo.com?name=obi+wan&status=jedi',
'http://www.yahoo.com?onery',
'http://www.yahoo.com#bottom',
'http://www.yahoo.com/yelp.html#bottom',
'https://www.yahoo.com/',
'ftp://www.yahoo.com/',
'ftp://www.yahoo.com/hello',
// Wheeler"s script says these are invalid, but they aren"t
'http://www.yahoo.com?name=%00%01',
'http://www.yaho%6f.com',
'http://www.yahoo.com/hello%00world/',
'http://www.yahoo.com/hello+world/',
'http://www.yahoo.com?name=obi&',
'http://www.yahoo.com?name=obi&type=',
'http://www.yahoo.com/yelp.html#',
// The following test cases are from a Haskell program by Graham Klyne
// at http://www.ninebynine.org/Software/HaskellUtils/Network/URITest.hs
'http://example.org/aaa/bbb#ccc',
'mailto:[email protected]',
'mailto:[email protected]#frag',
'HTTP://EXAMPLE.ORG/AAA/BBB#CCC',
// -- escapes
'http://example.org/aaa%2fbbb#ccc',
'http://example.org/aaa%2Fbbb#ccc',
'http://example.com/%2F',
'http://example.com/?%2F',
'http://example.com/#?%2F',
'http://example.com/aaa%2Fbbb',
// -- ports
'http://example.org:80/aaa/bbb#ccc',
'http://example.org:/aaa/bbb#ccc',
'http://example.org./aaa/bbb#ccc',
'http://example.123./aaa/bbb#ccc',
// -- bare authority
'http://example.org',
// -- IPv6 literals (from RFC2732):
'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html',
'http://[1080:0:0:0:8:800:200C:417A]/index.html',
'http://[3ffe:2a00:100:7031::1]',
'http://[1080::8:800:200C:417A]/foo',
'http://[::192.9.5.5]/ipng',
'http://[::FFFF:129.144.52.38]:80/index.html',
'http://[2010:836B:4179::836B:4179]',
// -- Random other things that crop up
'http://example/Andrȷ',
'file:///C:/DEV/Haskell/lib/HXmlToolbox-3.01/examples/',
// Iprivate characters are allowed in query
'http://a/?\u{E000}',
'http://example.com/?\u{E000}',
];

const INVALID_ABSOLUTE_IRIS = [
'',
'foo',
'http://example.com/beepbeep\u0007\u0007',
'http://example.com/\n',
// "::", // not OK, per Roy Fielding on the W3C uri list on 2004-04-01
//
// the following test cases are from a Perl script by David A. Wheeler
// at http://www.dwheeler.com/secure-programs/url.pl
'http://www yahoo.com',
'http://www.yahoo.com/hello world/',
'http://www.yahoo.com/yelp.html#"',
//
// the following test cases are from a Haskell program by Graham Klyne
// at http://www.ninebynine.org/Software/HaskellUtils/Network/URITest.hs
// 'http://[2010:836B:4179::836B:4179]',
'http://example.com/ ',
'http://example.com/%',
'http://example.com/A%Z',
'http://example.com/%ZZ',
'http://example.com/%AZ',
'http://example.com/A C',
// "A'C",
'http://example.com/A`C',
'http://example.com/A<C',
'http://example.com/A>C',
'http://example.com/A^C',
'http://example.com/A\\C',
'http://example.com/A{C',
'http://example.com/A|C',
'http://example.com/A}C',
'http://example.com/A[C',
'http://example.com/A]C',
'http://example.com/A[**]C',
'http://[xyz]/',
'http://]/',
'http://example.org/[2010:836B:4179::836B:4179]',
'http://example.org/abc#[2010:836B:4179::836B:4179]',
'http://example.org/xxx/[qwerty]#a[b]',
// From a post to the W3C uri list on 2004-02-17
// 'http://w3c.org:80path1/path2',
// Iprivate characters are not allowed in path not in fragment
'http://example.com/\u{E000}',
'http://example.com/\u{E000}',
'http://example.com/#\u{E000}',
'http://example.com/#\u{E000}',
// Bad characters
'http://\u{FFFF}',
'http://example.com/?\u{FFFF}',
'http://example.com/\u{0000}',
'http://example.com/?\u{0000}',
'http://example.com/#\u{0000}',
'http://example.com/\u{E000}',
'http://example.com/\u{F8FF}',
'http://example.com/\u{F0000}',
'http://example.com/\u{FFFFD}',
'http://example.com/\u{100000}',
'http://example.com/\u{10FFFD}',
'http://example.com/?\u{FDEF}',
'http://example.com/?\u{FFFF}',
'http://example.com/\u{FDEF}',
'http://example.com/\u{FFFF}',
'http://example.com/\u{1FFFF}',
'http://example.com/\u{2FFFF}',
'http://example.com/\u{3FFFF}',
'http://example.com/\u{4FFFF}',
'http://example.com/\u{5FFFF}',
'http://example.com/\u{6FFFF}',
'http://example.com/\u{7FFFF}',
'http://example.com/\u{8FFFF}',
'http://example.com/\u{9FFFF}',
'http://example.com/\u{AFFFF}',
'http://example.com/\u{BFFFF}',
'http://example.com/\u{CFFFF}',
'http://example.com/\u{DFFFF}',
'http://example.com/\u{EFFFF}',
'http://example.com/\u{FFFFF}',
// Bad host
'http://[/',
'http://[::1]a/',
// Fuzzing bugs
'http://͏@[]',
];

describe('Validate', () => {
for (const iri of VALID_ABSOLUTE_IRIS) {
test(`the IRI '${iri}' should be valid`, () => {
expect(validateIri(iri)).toBeUndefined();
});
}

for (const iri of INVALID_ABSOLUTE_IRIS) {
test(`the IRI '${iri}' should be invalid`, () => {
expect(validateIri(iri)).toBeInstanceOf(Error);
});
}
});

0 comments on commit 25da3ed

Please sign in to comment.