diff --git a/pom.xml b/pom.xml
index 78894989ac9..fb774a518cc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -219,6 +219,7 @@
**/org/rascalmpl/test/AllSuiteParallel.java
+ **/org/rascalmpl/test/library/LibraryLangPaths.java
**/org/rascalmpl/test/value/AllTests.java
**/org/rascalmpl/*Test.java
diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc
index 1da26a7ec69..50b82dab347 100644
--- a/src/org/rascalmpl/library/Location.rsc
+++ b/src/org/rascalmpl/library/Location.rsc
@@ -24,6 +24,8 @@ import Set;
import String;
import Exception;
+import lang::paths::Windows;
+import lang::paths::Unix;
@synopsis{Extracts a path relative to a parent location.}
@description{
@@ -43,6 +45,28 @@ loc relativize(list[loc] haystack, loc needle) {
}
}
+@synopsis{Convert Windows path syntax to a `loc` value}
+@description{
+This conversion supports generic Windows path syntax, including:
+* Absolute drive-specific: `C:\Program Files`
+* Relative drive-specific: `C:hello.txt`
+* Relative: `hello.txt`
+* Directory-relative: `\hello.txt`
+* UNC format: `\\system07\C$\`
+
+Windows paths, against popular believe, support both `/` and `\` as path separators.
+}
+loc locFromWindowsPath(str path) = parseWindowsPath(path);
+
+@synopsis{Convert Unix path syntax to a `loc` value}
+@description{
+This conversion supports generic Unix path syntax, including:
+* Absolute: `/usr/local/bin`
+* Relative: `hello.txt`
+* Home: `~/hello.txt`
+* User: `~userName\hello.txt`
+}
+loc locFromUnixPath(str path) = parseUnixPath(path);
@synopsis{Check that two locations refer to the same file.}
bool isSameFile(loc l, loc r) = l.top[fragment=""] == r.top[fragment=""];
diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java
index 8bb4bbb4de1..33036ee01ab 100644
--- a/src/org/rascalmpl/library/Prelude.java
+++ b/src/org/rascalmpl/library/Prelude.java
@@ -3615,7 +3615,7 @@ public ISet findResources(IString fileName) {
public ISourceLocation relativize(ISourceLocation outside, ISourceLocation inside) {
return URIUtil.relativize(outside, inside);
}
-
+
public IValue readBinaryValueFile(IValue type, ISourceLocation loc){
if(trackIO) System.err.println("readBinaryValueFile: " + loc);
diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc
index d670d23668b..3e2cff8ab46 100644
--- a/src/org/rascalmpl/library/String.rsc
+++ b/src/org/rascalmpl/library/String.rsc
@@ -577,6 +577,7 @@ toLocation("http://grammarware.net");
toLocation("document.xml");
```
}
+@deprecated{Use ((Location::locFromWindowsPath)) for example. The current function does not handle all the different intricasies of path notation.}
public loc toLocation(str s) = (/\:\/\// := s) ? |://| : |cwd:///|;
diff --git a/src/org/rascalmpl/library/lang/paths/Unix.rsc b/src/org/rascalmpl/library/lang/paths/Unix.rsc
new file mode 100644
index 00000000000..4e89ac113e6
--- /dev/null
+++ b/src/org/rascalmpl/library/lang/paths/Unix.rsc
@@ -0,0 +1,108 @@
+@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.}
+@description{
+This syntax definition of POSIX paths and file names, including some of the conventions
+with ~ for the home folder and . and .. for relative directories.
+
+The main function of this module, ((parseUnixPath)):
+* faithfully maps any syntactically correctly Unix paths to syntactically correct `loc` values.
+* throws a ParseError if the path does not comply.
+* ensures that if the file exists on system A, then the `loc` representation
+resolves to the same file on system A via any ((Library:module:IO)) function.
+* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases.
+This is left to downstream processors of `loc` values, if necessary. The current transformation
+is purely syntactical, and tries to preserve the semantics of the path as much as possible.
+}
+@pitfalls{
+* the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly
+used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme.
+* otherwise, the path syntax may be different from what you have to type in _bash_ or _zsh_. This is because shells
+need to reserve characters, like spaces, for different purposes (commandline argument separation). The
+current definition is about the path notation that shells like _zsh_ and _bash_, and other programs, have to pass into the string arguments of
+OS features, after their own concatenation, splicing, variable expansion, de-escaping and unquoting routines have finished..
+}
+module lang::paths::Unix
+
+lexical UnixPath
+ = absolute: Slashes UnixFilePath?
+ | relative: UnixFilePath
+ | home : "~" (Slashes UnixFilePath)?
+ | user : "~" UserName name (Slashes UnixFilePath)?
+ ;
+
+lexical UserName = ![/~]+;
+
+lexical PathChar = ![/];
+
+lexical PathSegment
+ = current: "."
+ | parent : ".."
+ | name : (PathChar \ "~" PathChar*) \ ".." \ "." \ "~"
+ ;
+
+lexical Slashes = Slash+ !>> [/];
+
+lexical Slash = [/];
+
+lexical UnixFilePath = {PathSegment Slashes}+ segments Slashes?;
+
+import ParseTree;
+
+@synopsis{Convert a Unix path literal to a source location URI}
+@description{
+1. parses the path using the grammar for ((UnixPath))
+2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting
+hostname, share name and path segment names. Also all superfluous path separators are skipped.
+3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also
+the right path separators are introduced.
+}
+loc parseUnixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#UnixPath, input, src));
+
+@synopsis{Root is a special case}
+private loc mapPathToLoc((UnixPath) ``)
+ = |file:///|;
+
+@synopsis{Absolute: given the drive and relative to its root.}
+private loc mapPathToLoc((UnixPath) ``)
+ = appendPath(|file:///|, path);
+
+@synopsis{Relative: relative to the current working directory.}
+private loc mapPathToLoc((UnixPath) ``)
+ = appendPath(|cwd:///|, path);
+
+@synopsis{Home relative: relative to the current users home directory}
+private loc mapPathToLoc((UnixPath) `~`)
+ = appendPath(|home:///|, path);
+
+@synopsis{Home relative: relative to the current users home directory}
+private loc mapPathToLoc((UnixPath) `~`)
+ = |home:///|;
+
+@synopsis{User relative: relative to any specific user's home directory}
+private loc mapPathToLoc((UnixPath) `~`)
+ = appendPath(|home:///..//|, path);
+
+@synopsis{User relative: relative to any specific user's home directory}
+private loc mapPathToLoc((UnixPath) `~`)
+ = |home:///..//|;
+
+private loc appendPath(loc root, UnixFilePath path)
+ = (root | it + "" | segment <- path.segments);
+
+test bool root()
+ = parseUnixPath("/") == |file:///|;
+
+test bool absolutePath()
+ = parseUnixPath("/usr/local/bin")
+ == |file:///usr/local/bin|;
+
+test bool relativePath()
+ = parseUnixPath(".bash_rc")
+ == |cwd:///.bash_rc|;
+
+test bool homePath()
+ = parseUnixPath("~/.bash_profile")
+ == |home:///.bash_profile|;
+
+test bool userPath()
+ = parseUnixPath("~root/.bash_profile")
+ == |home:///../root/.bash_profile|;
diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc
new file mode 100644
index 00000000000..37d63c00a76
--- /dev/null
+++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc
@@ -0,0 +1,191 @@
+@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.}
+@description{
+This syntax definition of file paths and file names in Windows formalizes open-source implementations
+manually written in Java, C++ and C# code. These are parsers for Windows syntax of file and directory names,
+as well as shares on local networks (UNC notation). It also derives from openly available documentation
+sources on Windows and the .NET platform for confirmation and test examples.
+
+The main function of this module, ((parseWindowsPath)):
+* faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values.
+* throws a ParseError if the path does not comply. Typically file names ending in spaces do not comply.
+* ensures that if the file exists on system A, then the `loc` representation
+resolves to the same file on system A via any ((Library:module:IO)) function.
+* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases.
+This is left to downstream processors of `loc` values, if necessary. The current transformation
+is purely syntactical, and tries to preserve the semantics of the path as much as possible.
+}
+@pitfalls{
+* Length limitations are not implemnted by this parser. This means that overly long names will lead
+to IO exceptions when they are finally used.
+* The names of drives, files and devices are mapped as-is, without normalization. This means that
+the resulting `loc` value may not be a _canonical_ representation of the identified resource.
+Normalization of `loc` values is for a different function TBD.
+}
+module lang::paths::Windows
+
+import IO;
+import util::SystemAPI;
+
+lexical WindowsPath
+ = unc : Slash Slash Slashes? PathChar* \ "." Slashes PathChar* Slashes WindowsFilePath
+ | uncDOSDrive : Slash Slash Slashes? DOSDevice Slashes Drive ":" OptionalWindowsFilePath
+ | uncDOSPath : Slash Slash Slashes? DOSDevice Slashes PathChar* Slashes WindowsFilePath
+ | absolute : Drive ":" Slashes WindowsFilePath
+ | driveRelative : Drive ":" WindowsFilePath
+ | directoryRelative: Slash WindowsFilePath
+ | relative : WindowsFilePath
+ ;
+
+lexical OptionalWindowsFilePath
+ = ()
+ | Slashes WindowsFilePath
+ ;
+
+lexical DOSDevice = [.?];
+
+lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]);
+
+lexical PathSegment
+ = current: "."
+ | parent : ".."
+ | name : PathChar+ \ ".." \ "."
+ ;
+
+lexical Drive = [A-Za-z];
+
+lexical Slashes = Slash+ !>> [\\/];
+
+lexical Slash = [\\/];
+
+lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ .] !<< (); // only the last segment must not end in spaces.
+
+import ParseTree;
+
+@synopsis{Convert a windows path literal to a source location URI}
+@description{
+1. parses the path using the grammar for ((WindowsPath))
+2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting
+hostname, share name and path segment names. Also all superfluous path separators are skipped.
+3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also
+the right path separators are introduced.
+}
+loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#WindowsPath, input, src));
+
+@synopsis{UNC}
+private loc mapPathToLoc((WindowsPath) ``)
+ = appendPath(|unc:///| + "", path);
+
+@synopsis{DOS UNC Device Drive}
+private loc mapPathToLoc((WindowsPath) `:`)
+ = appendPath(|unc:///| + ":", path);
+
+@synopsis{DOS UNC Device Path}
+private loc mapPathToLoc((WindowsPath) ``)
+ = appendPath(|unc:///| + "", path);
+
+private str deviceIndicator((DOSDevice) `?`) = "%3F";
+private str deviceIndicator((DOSDevice) `.`) = ".";
+
+@synopsis{DOS UNCPath}
+private loc mapPathToLoc((WindowsPath) `?`)
+ = appendPath(|unc://%3F/| + "", path);
+
+
+@synopsis{Absolute: given the drive and relative to its root.}
+private loc mapPathToLoc((WindowsPath) `:`)
+ = appendPath(|file:///:/|, path);
+
+@synopsis{Drive relative: relative to the current working directory on the given drive.}
+private loc mapPathToLoc((WindowsPath) `:`)
+ = appendPath(|file:///:.|, path);
+
+@synopsis{Directory relative: relative to the root of the current drive.}
+private loc mapPathToLoc((WindowsPath) ``)
+ = appendPath(|cwdrive:///|, path);
+
+@synopsis{Relative to the current working directory on the current drive.}
+private loc mapPathToLoc((WindowsPath) ``)
+ = appendPath(|cwd:///|, path);
+
+private loc appendPath(loc root, WindowsFilePath path)
+ = (root | it + "" | segment <- path.segments);
+
+private loc appendPath(loc root, (OptionalWindowsFilePath) ``) = root;
+
+private loc appendPath(loc root, (OptionalWindowsFilePath) ``)
+ = appendPath(root, path);
+
+private bool IS_WINDOWS = /win/i := getSystemProperty("os.name");
+
+test bool uncSharePath()
+ = parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt")
+ == |unc://Server2/Share/Test/Foo.txt|;
+
+test bool uncDrivePath()
+ = parseWindowsPath("\\\\system07\\C$\\")
+ == |unc://system07/C$|;
+
+
+test bool uncDOSDevicePathLocalFileQuestion() {
+ loc l = parseWindowsPath("\\\\?\\c:\\windows\\system32\\cmd.exe");
+
+ if (IS_WINDOWS) {
+ assert exists(l);
+ }
+
+ return l == |unc://%3F/c:/windows/system32/cmd.exe|;
+}
+
+test bool uncDOSDevicePathLocalFileDot() {
+ loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt");
+
+ return l == |unc://./C:/Test/Foo.txt|;
+}
+
+test bool uncDOSDeviceUNCSharePath() {
+ // the entire UNC namespace is looped back into the DOS Device UNC encoding via
+ // the reserved name "UNC":
+ loc m1 = parseWindowsPath("\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt");
+ loc m2 = parseWindowsPath("\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt");
+
+ return m1 == |unc://%3F/UNC/Server/Share/Test/Foo.txt|
+ && m2 == |unc://./UNC/Server/Share/Test/Foo.txt|;
+}
+
+test bool uncDOSDeviceVolumeGUIDReference() {
+ loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt");
+
+ return l == |unc://./Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|;
+}
+
+test bool uncDOSDeviceBootPartition() {
+ loc l = parseWindowsPath("\\\\.\\BootPartition\\");
+ return l == |unc://./BootPartition|;
+}
+
+test bool simpleDrivePathC()
+ = parseWindowsPath("C:\\Program Files\\Rascal")
+ == |file:///C:/Program%20Files/Rascal|;
+
+test bool mixedSlashesDrivePathC()
+ = parseWindowsPath("C:\\Program Files/Rascal")
+ == |file:///C:/Program%20Files/Rascal|;
+
+test bool trailingSlashesDrivePathC()
+ = parseWindowsPath("C:\\Program Files\\Rascal\\\\")
+ == |file:///C:/Program%20Files/Rascal|;
+
+test bool simpleDrivePathD()
+ = parseWindowsPath("D:\\Program Files\\Rascal")
+ == |file:///D:/Program%20Files/Rascal|;
+
+test bool uncNetworkShareOk() {
+ loc l = parseWindowsPath("\\\\localhost\\ADMIN$\\System32\\cmd.exe");
+
+ if (IS_WINDOWS) {
+ return exists(l);
+ }
+ else {
+ return |unc://localhost/ADMIN$/System32/cmd.exe| == l;
+ }
+}
diff --git a/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java
new file mode 100644
index 00000000000..84bd45d893f
--- /dev/null
+++ b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java
@@ -0,0 +1,29 @@
+/*******************************************************************************
+ * Copyright (c) 2009-2024 CWI
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * * Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI
+ ******************************************************************************/
+package org.rascalmpl.uri.file;
+
+import java.nio.file.Paths;
+
+/**
+ * For reading and writing files relative to the current working drive.
+ * This is different from the current working directory, namely it is the
+ * root of the current working directory.
+ */
+public class CurrentWorkingDriveResolver extends AliasedFileResolver {
+
+ public CurrentWorkingDriveResolver() {
+ super("cwdrive", deriveCurrentWorkingDrive(System.getProperty("user.dir")));
+ }
+
+ private static String deriveCurrentWorkingDrive(String userDir) {
+ return Paths.get(userDir).toAbsolutePath().getRoot().toString();
+ }
+}
diff --git a/src/org/rascalmpl/uri/file/FileURIResolver.java b/src/org/rascalmpl/uri/file/FileURIResolver.java
index bfa8c358228..9358c1804e7 100644
--- a/src/org/rascalmpl/uri/file/FileURIResolver.java
+++ b/src/org/rascalmpl/uri/file/FileURIResolver.java
@@ -143,6 +143,7 @@ public boolean exists(ISourceLocation uri) {
* To override to build resolvers to specific locations using a prefix for example.
*/
protected String getPath(ISourceLocation uri) {
+ assert !uri.hasAuthority();
return uri.getPath();
}
diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java
new file mode 100644
index 00000000000..b4ea21b4c6b
--- /dev/null
+++ b/src/org/rascalmpl/uri/file/UNCResolver.java
@@ -0,0 +1,50 @@
+package org.rascalmpl.uri.file;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+import io.usethesource.vallang.ISourceLocation;
+
+/**
+ * Implements the UNC-available network shares on Windows systems.
+ */
+public class UNCResolver extends FileURIResolver {
+ private boolean onWindows = System.getProperty("os.name").toLowerCase().startsWith("win");
+
+ public UNCResolver() throws IOException {
+ super();
+ }
+
+ @Override
+ protected String getPath(ISourceLocation uri) {
+ if (!onWindows) {
+ throw new RuntimeException(new FileNotFoundException(uri.toString() + "; UNC is only available on Windows"));
+ }
+
+ if (uri.hasAuthority()) {
+ String path = uri.getPath();
+
+ if (path.startsWith("/")) {
+ // that will be the backslash added before the path later
+ path = path.substring(1);
+ }
+
+ if (path.endsWith(":")) {
+ // current folder on drive not supported in UNC notation, this becomes the root of the drive
+ path = path + "\\";
+ }
+
+ return "\\\\" + uri.getAuthority() + "\\" + path;
+ }
+ else {
+ // just a normal absolute path
+ return uri.getPath();
+ }
+ }
+
+ @Override
+ public String scheme() {
+ return "unc";
+ }
+}
diff --git a/src/org/rascalmpl/uri/resolvers.config b/src/org/rascalmpl/uri/resolvers.config
index acfc7ce38cd..5e35fb9c326 100644
--- a/src/org/rascalmpl/uri/resolvers.config
+++ b/src/org/rascalmpl/uri/resolvers.config
@@ -9,6 +9,8 @@ org.rascalmpl.uri.jar.JarURIResolver
org.rascalmpl.uri.zip.ZipURIResolver
org.rascalmpl.uri.file.HomeURIResolver
org.rascalmpl.uri.file.CWDURIResolver
+org.rascalmpl.uri.file.CurrentWorkingDriveResolver
+org.rascalmpl.uri.file.UNCResolver
org.rascalmpl.uri.file.SystemPathURIResolver
org.rascalmpl.uri.libraries.MemoryResolver
org.rascalmpl.uri.libraries.RascalLibraryURIResolver
diff --git a/test/org/rascalmpl/test/library/LibraryLangPaths.java b/test/org/rascalmpl/test/library/LibraryLangPaths.java
new file mode 100644
index 00000000000..da795fc2edc
--- /dev/null
+++ b/test/org/rascalmpl/test/library/LibraryLangPaths.java
@@ -0,0 +1,11 @@
+package org.rascalmpl.test.library;
+
+import org.junit.runner.RunWith;
+import org.rascalmpl.test.infrastructure.RascalJUnitTestPrefix;
+import org.rascalmpl.test.infrastructure.RascalJUnitTestRunner;
+
+@RunWith(RascalJUnitTestRunner.class)
+@RascalJUnitTestPrefix("lang::paths")
+public class LibraryLangPaths {
+
+}