-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implements IRI validation using RFC 3987 grammar
- Loading branch information
1 parent
b2e327a
commit 25da3ed
Showing
3 changed files
with
265 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,70 @@ | ||
function buildAbsoluteIriRegex(): RegExp { | ||
// The syntax is defined in https://www.rfc-editor.org/rfc/rfc3987#section-2.2 | ||
// Rules are defined in reversed order | ||
|
||
const sub_delims_raw = `!$&'()*+,;=`; | ||
const sub_delims = `[${sub_delims_raw}]`; | ||
|
||
const pct_encoded = `%[a-fA-F0-9]{2}`; | ||
|
||
const dec_octet = '([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'; | ||
|
||
const ipv4address = `${dec_octet}\\.${dec_octet}\\.${dec_octet}\\.${dec_octet}`; | ||
|
||
const h16 = `[a-fA-F0-9]{1,4}`; | ||
const ls32 = `(${h16}:${h16}|${ipv4address})`; | ||
const ipv6address = `((${h16}:){6}${ls32}|::(${h16}:){5}${ls32}|(${h16})?::(${h16}:){4}${ls32}|((${h16}:){0,1}${h16})?::(${h16}:){3}${ls32}|((${h16}:){0,2}${h16})?::(${h16}:){2}${ls32}|((${h16}:){0,3}${h16})?::${h16}:${ls32}|((${h16}:){0,4}${h16})?::${ls32}|((${h16}:){0,5}${h16})?::${h16}|((${h16}:){0,6}${h16})?::)`; | ||
|
||
const ipvfuture = `v[a-fA-F0-9]+\\.(${sub_delims}|${sub_delims}|":)+`; | ||
|
||
const ip_literal = `\\[(${ipv6address}|${ipvfuture})\\]`; | ||
|
||
const port = `[0-9]*`; | ||
|
||
const scheme = `[a-zA-Z][a-zA-Z0-9+\\-.]*`; | ||
|
||
const iprivate_raw = `\u{E000}-\u{F8FF}\u{F0000}-\u{FFFFD}\u{100000}-\u{10FFFD}`; | ||
const iprivate = `[${iprivate_raw}]`; | ||
|
||
const ucschar_raw = `\u{A0}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFEF}\u{10000}-\u{1FFFD}\u{20000}-\u{2FFFD}\u{30000}-\u{3FFFD}\u{40000}-\u{4FFFD}\u{50000}-\u{5FFFD}\u{60000}-\u{6FFFD}\u{70000}-\u{7FFFD}\u{80000}-\u{8FFFD}\u{90000}-\u{9FFFD}\u{A0000}-\u{AFFFD}\u{B0000}-\u{BFFFD}\u{C0000}-\u{CFFFD}\u{D0000}-\u{DFFFD}\u{E1000}-\u{EFFFD}`; | ||
|
||
const iunreserved_raw = `a-zA-Z0-9\\-._~${ucschar_raw}`; | ||
const iunreserved = `[${iunreserved_raw}]`; | ||
|
||
const ipchar = `(${iunreserved}|${pct_encoded}|${sub_delims}|[:@])*`; | ||
|
||
const ifragment = `(${ipchar}|[\\/?])*`; | ||
|
||
const iquery = `(${ipchar}|${iprivate}|[\\/?])*`; | ||
|
||
const isegment_nz = `(${ipchar})+`; | ||
const isegment = `(${ipchar})*`; | ||
|
||
const ipath_empty = ''; | ||
const ipath_rootless = `${isegment_nz}(\\/${isegment})*`; | ||
const ipath_absolute = `\\/(${isegment_nz}(\\/${isegment})*)?`; | ||
const ipath_abempty = `(\\/${isegment})*`; | ||
|
||
const ireg_name = `(${iunreserved}|${pct_encoded}|${sub_delims})*`; | ||
|
||
const ihost = `(${ip_literal}|${ipv4address}|${ireg_name})`; | ||
const iuserinfo = `(${iunreserved}|${pct_encoded}|${sub_delims}|:)*`; | ||
const iauthority = `(${iuserinfo}@)?${ihost}(:${port})?`; | ||
|
||
const ihier_part = `(\\/\\/${iauthority}${ipath_abempty}|${ipath_absolute}|${ipath_rootless}|${ipath_empty})`; | ||
|
||
const iri = `^${scheme}:${ihier_part}(\\?${iquery})?(#${ifragment})?$`; | ||
|
||
return new RegExp(iri, 'u'); | ||
} | ||
|
||
const IRI_REGEX: RegExp = buildAbsoluteIriRegex(); | ||
|
||
/** | ||
* Validate a given IRI. | ||
* @param {string} a string that may be an IRI. | ||
* Validate a given IRI according to RFC 3987. | ||
* @param {string} iri a string that may be an IRI. | ||
* @return {Error | undefined} An error if the IRI is invalid, or undefined if it is valid. | ||
*/ | ||
export function validateIri(iri: string): Error | undefined { | ||
return new Error('validateIri has not been implemented yet'); | ||
return IRI_REGEX.test(iri) ? undefined : new Error(`Invalid IRI according to RFC 3987: '${iri}'`); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,195 @@ | ||
// TODO | ||
import { validateIri } from '../lib/Validate'; | ||
|
||
const VALID_ABSOLUTE_IRIS = [ | ||
'file://foo', | ||
'ftp://ftp.is.co.za/rfc/rfc1808.txt', | ||
'http://www.ietf.org/rfc/rfc2396.txt', | ||
'ldap://[2001:db8::7]/c=GB?objectClass?one', | ||
'mailto:[email protected]', | ||
'news:comp.infosystems.www.servers.unix', | ||
'tel:+1-816-555-1212', | ||
'telnet://192.0.2.16:80/', | ||
'urn:oasis:names:specification:docbook:dtd:xml:4.1.2', | ||
'http://example.com', | ||
'http://example.com/', | ||
'http://example.com/foo', | ||
'http://example.com/foo/bar', | ||
'http://example.com/foo/bar/', | ||
'http://example.com/foo/bar?q=1&r=2', | ||
'http://example.com/foo/bar/?q=1&r=2', | ||
'http://example.com#toto', | ||
'http://example.com/#toto', | ||
'http://example.com/foo#toto', | ||
'http://example.com/foo/bar#toto', | ||
'http://example.com/foo/bar/#toto', | ||
'http://example.com/foo/bar?q=1&r=2#toto', | ||
'http://example.com/foo/bar/?q=1&r=2#toto', | ||
'http://example.com/foo/bar/.././baz', | ||
'http://a.example/AZaz\u{00C0}\u{00D6}\u{00D8}\u{00F6}\u{00F8}\u{02FF}\u{0370}\u{037D}\u{037F}\u{1FFF}', | ||
'http://a.example/\u{200C}\u{200D}\u{2070}\u{218F}\u{2C00}\u{2FEF}\u{3001}\u{D7FF}\u{FA0E}\u{FDCF}', | ||
'http://a.example/\u{FDF0}\u{FFEF}\u{10000}\u{EFFFD}', | ||
'http://a.example/?AZaz\u{E000}\u{F8FF}\u{F0000}\u{FFFFD}\u{100000}\u{10FFFD}\u{00C0}\u{00D6}\u{00D8}', | ||
'http://a.example/\u{00F6}\u{00F8}\u{02FF}\u{0370}\u{037D}\u{037F}\u{1FFF}\u{200C}\u{200D}\u{2070}\u{218F}\u{2C00}', | ||
'http://a.example/\u{2FEF}\u{3001}\u{D7FF}\u{FA0E}\u{FDCF}\u{FDF0}\u{FFEF}\u{10000}\u{EFFFD}', | ||
// From https://sourceforge.net/projects/foursuite/ under Apache License | ||
'file:///foo/bar', | ||
'mailto:user@host?subject=blah', | ||
'dav:', // Empty opaque part / rel-path allowed by RFC 2396bis | ||
'about:', // Empty opaque part / rel-path allowed by RFC 2396bis | ||
// the following test cases are from a Perl script by David A. Wheeler | ||
// at http://www.dwheeler.com/secure-programs/url.pl | ||
'http://www.yahoo.com', | ||
'http://www.yahoo.com/', | ||
'http://1.2.3.4/', | ||
'http://www.yahoo.com/stuff', | ||
'http://www.yahoo.com/stuff/', | ||
'http://www.yahoo.com/hello%20world/', | ||
'http://www.yahoo.com?name=obi', | ||
'http://www.yahoo.com?name=obi+wan&status=jedi', | ||
'http://www.yahoo.com?onery', | ||
'http://www.yahoo.com#bottom', | ||
'http://www.yahoo.com/yelp.html#bottom', | ||
'https://www.yahoo.com/', | ||
'ftp://www.yahoo.com/', | ||
'ftp://www.yahoo.com/hello', | ||
// Wheeler"s script says these are invalid, but they aren"t | ||
'http://www.yahoo.com?name=%00%01', | ||
'http://www.yaho%6f.com', | ||
'http://www.yahoo.com/hello%00world/', | ||
'http://www.yahoo.com/hello+world/', | ||
'http://www.yahoo.com?name=obi&', | ||
'http://www.yahoo.com?name=obi&type=', | ||
'http://www.yahoo.com/yelp.html#', | ||
// The following test cases are from a Haskell program by Graham Klyne | ||
// at http://www.ninebynine.org/Software/HaskellUtils/Network/URITest.hs | ||
'http://example.org/aaa/bbb#ccc', | ||
'mailto:[email protected]', | ||
'mailto:[email protected]#frag', | ||
'HTTP://EXAMPLE.ORG/AAA/BBB#CCC', | ||
// -- escapes | ||
'http://example.org/aaa%2fbbb#ccc', | ||
'http://example.org/aaa%2Fbbb#ccc', | ||
'http://example.com/%2F', | ||
'http://example.com/?%2F', | ||
'http://example.com/#?%2F', | ||
'http://example.com/aaa%2Fbbb', | ||
// -- ports | ||
'http://example.org:80/aaa/bbb#ccc', | ||
'http://example.org:/aaa/bbb#ccc', | ||
'http://example.org./aaa/bbb#ccc', | ||
'http://example.123./aaa/bbb#ccc', | ||
// -- bare authority | ||
'http://example.org', | ||
// -- IPv6 literals (from RFC2732): | ||
'http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html', | ||
'http://[1080:0:0:0:8:800:200C:417A]/index.html', | ||
'http://[3ffe:2a00:100:7031::1]', | ||
'http://[1080::8:800:200C:417A]/foo', | ||
'http://[::192.9.5.5]/ipng', | ||
'http://[::FFFF:129.144.52.38]:80/index.html', | ||
'http://[2010:836B:4179::836B:4179]', | ||
// -- Random other things that crop up | ||
'http://example/Andrȷ', | ||
'file:///C:/DEV/Haskell/lib/HXmlToolbox-3.01/examples/', | ||
// Iprivate characters are allowed in query | ||
'http://a/?\u{E000}', | ||
'http://example.com/?\u{E000}', | ||
]; | ||
|
||
const INVALID_ABSOLUTE_IRIS = [ | ||
'', | ||
'foo', | ||
'http://example.com/beepbeep\u0007\u0007', | ||
'http://example.com/\n', | ||
// "::", // not OK, per Roy Fielding on the W3C uri list on 2004-04-01 | ||
// | ||
// the following test cases are from a Perl script by David A. Wheeler | ||
// at http://www.dwheeler.com/secure-programs/url.pl | ||
'http://www yahoo.com', | ||
'http://www.yahoo.com/hello world/', | ||
'http://www.yahoo.com/yelp.html#"', | ||
// | ||
// the following test cases are from a Haskell program by Graham Klyne | ||
// at http://www.ninebynine.org/Software/HaskellUtils/Network/URITest.hs | ||
// 'http://[2010:836B:4179::836B:4179]', | ||
'http://example.com/ ', | ||
'http://example.com/%', | ||
'http://example.com/A%Z', | ||
'http://example.com/%ZZ', | ||
'http://example.com/%AZ', | ||
'http://example.com/A C', | ||
// "A'C", | ||
'http://example.com/A`C', | ||
'http://example.com/A<C', | ||
'http://example.com/A>C', | ||
'http://example.com/A^C', | ||
'http://example.com/A\\C', | ||
'http://example.com/A{C', | ||
'http://example.com/A|C', | ||
'http://example.com/A}C', | ||
'http://example.com/A[C', | ||
'http://example.com/A]C', | ||
'http://example.com/A[**]C', | ||
'http://[xyz]/', | ||
'http://]/', | ||
'http://example.org/[2010:836B:4179::836B:4179]', | ||
'http://example.org/abc#[2010:836B:4179::836B:4179]', | ||
'http://example.org/xxx/[qwerty]#a[b]', | ||
// From a post to the W3C uri list on 2004-02-17 | ||
// 'http://w3c.org:80path1/path2', | ||
// Iprivate characters are not allowed in path not in fragment | ||
'http://example.com/\u{E000}', | ||
'http://example.com/\u{E000}', | ||
'http://example.com/#\u{E000}', | ||
'http://example.com/#\u{E000}', | ||
// Bad characters | ||
'http://\u{FFFF}', | ||
'http://example.com/?\u{FFFF}', | ||
'http://example.com/\u{0000}', | ||
'http://example.com/?\u{0000}', | ||
'http://example.com/#\u{0000}', | ||
'http://example.com/\u{E000}', | ||
'http://example.com/\u{F8FF}', | ||
'http://example.com/\u{F0000}', | ||
'http://example.com/\u{FFFFD}', | ||
'http://example.com/\u{100000}', | ||
'http://example.com/\u{10FFFD}', | ||
'http://example.com/?\u{FDEF}', | ||
'http://example.com/?\u{FFFF}', | ||
'http://example.com/\u{FDEF}', | ||
'http://example.com/\u{FFFF}', | ||
'http://example.com/\u{1FFFF}', | ||
'http://example.com/\u{2FFFF}', | ||
'http://example.com/\u{3FFFF}', | ||
'http://example.com/\u{4FFFF}', | ||
'http://example.com/\u{5FFFF}', | ||
'http://example.com/\u{6FFFF}', | ||
'http://example.com/\u{7FFFF}', | ||
'http://example.com/\u{8FFFF}', | ||
'http://example.com/\u{9FFFF}', | ||
'http://example.com/\u{AFFFF}', | ||
'http://example.com/\u{BFFFF}', | ||
'http://example.com/\u{CFFFF}', | ||
'http://example.com/\u{DFFFF}', | ||
'http://example.com/\u{EFFFF}', | ||
'http://example.com/\u{FFFFF}', | ||
// Bad host | ||
'http://[/', | ||
'http://[::1]a/', | ||
// Fuzzing bugs | ||
'http://͏@[]', | ||
]; | ||
|
||
describe('Validate', () => { | ||
for (const iri of VALID_ABSOLUTE_IRIS) { | ||
test(`the IRI '${iri}' should be valid`, () => { | ||
expect(validateIri(iri)).toBeUndefined(); | ||
}); | ||
} | ||
|
||
for (const iri of INVALID_ABSOLUTE_IRIS) { | ||
test(`the IRI '${iri}' should be invalid`, () => { | ||
expect(validateIri(iri)).toBeInstanceOf(Error); | ||
}); | ||
} | ||
}); |