Skip to content

Commit

Permalink
Add more special handling for URI File scheme in general, and some wi…
Browse files Browse the repository at this point in the history
…ndows-specific code to disambiguate the drive letter from a scheme.

Fixes #443.
  • Loading branch information
gkellogg committed Oct 7, 2024
1 parent e478a7c commit 1db066a
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 7 deletions.
35 changes: 28 additions & 7 deletions lib/rdf/model/uri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class URI
# scheme, authority, path, query, fragment
IRI_PARTS = /^(?:([^:\/?#]+):)?(?:\/\/([^\/?#]*))?([^?#]*)(\?[^#]*)?(#.*)?$/.freeze

# Special version for file-scheme on Windows (path SHOULD begin with /, but may not)
# scheme, authority, path, query, fragment
FILE_PARTS = /^file:(?:\/\/(#{IHOST}))?(\/?[^?#]*)(\?[^#]*)?(#.*)?$/.freeze

# Remove dot expressions regular expressions
RDS_2A = /^\.?\.\/(.*)$/.freeze
RDS_2B1 = /^\/\.$/.freeze
Expand Down Expand Up @@ -851,8 +855,7 @@ def inspect
# lexical representation of URI, either absolute or relative
# @return [String]
def value
return @value if @value
@value = [
@value ||= [
("#{scheme}:" if absolute?),
("//#{authority}" if authority),
path,
Expand Down Expand Up @@ -883,17 +886,35 @@ def object
#
# @param [String, to_s] value
# @return [Object{Symbol => String}]
# @see https://datatracker.ietf.org/doc/html/rfc8089
def parse(value)
value = value.to_s.dup.force_encoding(Encoding::ASCII_8BIT)
value = value.to_s.dup.force_encoding(Encoding::UTF_8) unless value && value.encoding == Encoding::UTF_8
parts = {}
if matchdata = IRI_PARTS.match(value)
if matchdata = FILE_PARTS.match(value)
# A file-based URI is always in the folloring form:
# * file:/path - absolute path, no host name
# * file:///path - absolute path, empty host name
# * file://hostname/path - absolute path with authority.
# * file://path – is invalid, but treated as file:///path
scheme = 'file'
authority, path, query, fragment = matchdata[1..-1]
if authority && authority.match?(/^[A-Za-z]$/) && Gem.win_platform?
# In this case, if the authority is a drive letter and part of the path
authority, path = nil, "#{authority}#{path}"
end
# We accept paths that aren't absolute, but coerce them to be absolute
path = "/#{path}" unless path.start_with?('/')
elsif matchdata = IRI_PARTS.match(value)
scheme, authority, path, query, fragment = matchdata[1..-1]
authority = nil if authority && authority.empty?

if Gem.win_platform? && scheme && !authority && scheme.match?(/^[a-zA-Z]$/)
# A drive letter, not a scheme
scheme, path = nil, "#{scheme}:#{path}"
if scheme && scheme.match?(/^[A-Za-z]$/) && Gem.win_platform?
# On Windows treat D:/foo/bar as a path, not a scheme
scheme, authority, path = 'file', nil, "/#{scheme}:#{path}"
end
end

if matchdata
userinfo, hostport = authority.to_s.split('@', 2)
hostport, userinfo = userinfo, nil unless hostport
user, password = userinfo.to_s.split(':', 2)
Expand Down
1 change: 1 addition & 0 deletions rdf.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Gem::Specification.new do |gem|
gem.add_runtime_dependency 'logger', '~> 1.5'
gem.add_runtime_dependency 'ostruct', '~> 0.6'
gem.add_development_dependency 'base64', '~> 0.2'
gem.add_development_dependency 'fiddle', '~> 1.1'
gem.add_development_dependency 'rdf-spec', '~> 3.3'
gem.add_development_dependency 'rdf-turtle', '~> 3.3'
gem.add_development_dependency 'rdf-vocab', '~> 3.3'
Expand Down
38 changes: 38 additions & 0 deletions spec/model_uri_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,44 @@
expect(u1.canonicalize.hash).to eq u2.hash
end
end

context "Windows specific canonicalization", skip: ('only windows' unless Gem.win_platform?) do
{
"no scheme, relative path starting with drive letter" => [
"D:a/b",
"file:/D:a/b"
],
"no authority and relative path" => [
"file:D:a/b",
"file:/D:a/b"
],
"no authority and absolute path" => [
"file:/D:a/b",
"file:/D:a/b"
],
"scheme with //, no authority and absolute path" => [
"file://D:a/b",
"file:/D:a/b"
],
"empty authority and absolute path" => [
"file:///D:a/b",
"file:///D:a/b"
],
"authority and absolute path" => [
"file://host/D:a/b",
"file://host/D:a/b"
],
}.each do |name, (input, output)|
it name do
u1 = RDF::URI(input)
u2 = RDF::URI(output)
expect(u1.canonicalize.to_s).to eq u2.to_s
expect(u1.canonicalize).to eq u1.canonicalize
expect(u1.canonicalize.hash).to eq u2.hash
end
end
end

it "#canonicalize! alters resource" do
u1 = RDF::URI("eXAMPLE:example.com/foo")
u2 = RDF::URI("example:example.com/foo")
Expand Down

0 comments on commit 1db066a

Please sign in to comment.