From 532d5f7e3a9583ecc3034fe38fcf183fb9950e0b Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 10 May 2024 13:49:15 +0200 Subject: [PATCH 01/46] added new function for converting string paths to loc values, depending on each different OS filesystem type, including documentation --- src/org/rascalmpl/library/Location.rsc | 57 ++++++++++++++++++++++ src/org/rascalmpl/library/Prelude.java | 67 ++++++++++++++++++++++++++ src/org/rascalmpl/library/String.rsc | 1 + 3 files changed, 125 insertions(+) diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc index 1da26a7ec69..c0744eda32b 100644 --- a/src/org/rascalmpl/library/Location.rsc +++ b/src/org/rascalmpl/library/Location.rsc @@ -43,6 +43,63 @@ loc relativize(list[loc] haystack, loc needle) { } } +@synopsis{Names of file system path syntaxes that can be found in the wild.} +@description{ +* `generic` captures all unix-like systems like the BSD family, SunOs, Solaris, Irix, etc. +* `mac_osx` captures modern Mac's that also have a unix-like filesystem but with different exceptions to permissable characters and case-sensitivity. +* `windows` is for all DOS-based filesystems and what came after, with the slashes in the other direction. +* `linux` is for all the filesystems in Linux distributions. + +Use this as a parameter to ((locFromFileSystem)) to choose how to parse a string as a filesystem path. +} +data FileSystemSyntax + = generic() + | mac_osx() + | windows() + | linux() + ; + +@javaClass{org.rascalmpl.library.Prelude} +@synopsis{Utility to retrieve the current notation for file system paths.} +@description{ +This is uses to configure the default syntax parameter of ((locFromFileSystem)). +} +java FileSystemSyntax currentFileSystem(); + +@synopsis{Converts the OS-specific string representation of a file or directory PATH to a `loc`.} +@description{ +This converts a string that hold a path to a file, written in a specific notation for paths, to +a canonical `loc` in URI format. + +* if `legalize` is true, then every path segment is legalized by replacing offending characters to `_` +* if `legalize` is false, and offending characters appear between the path separators, then an IO exception is thrown. +* if the requested file system syntax is either case insensitive or not case preserving (or both), then all uppercase characters will be replaced by lowercase characters. +* on windows systems the drive letter `C:` is added if a drive letter is missing. +* on all the other systems, if the path starts with a path separator, it is taken as absolute in `file:///`. Otherwise the root will be `cwd:///`. +} +// Wait for bootstrap +// @examples{ +// ```rascal-shell +// import Location; +// locFromFileSystem("C:\\Documents\\Newsletters\\Summer2018.pdf", \syntax=windows()) +// ``` +// } +@benefits{ +* After conversion there are many utility functions that operate safely and portably on `loc` values. See ((module:IO)) and ((module:util::FileSystem)) for examples. +* ((module:util::ShellExec)) features `loc`-based versions for passing the names of binaries and the names of file parameters on the commandlines as `loc` values. +* The file names identified by the path strings do not need to exist. They could typically be names in a CSV file or a spreadsheet data source, for which +no reflection exists in the mounted drives of the current (virtual) computer. Consider them "data", until passed in the functions of ((module:IO)), for example. +} +@pitfalls{ +* Delaying this conversion until _just before_ file ((module:IO)), misses out on: + * efficiency; loc values have internal sharing and memoization features. + * portability: OS-specific string paths can break on other machines. + * equational reasoning: ((locFromFileSystem)) has canonicalizing features to remove common cases of aliases (such as uppercase vs lowercase). +* Path syntax does not support `..` or `.` notation, simply because this conversion does not require +the paths to even exist on the current system. The `..` notation remains part of the name of a file accordingly, without being interpreted against a mounted file system. +} +@javaClass{org.rascalmpl.library.Prelude} +java loc locFromFileSystem(str pathString, FileSystemSyntax \syntax=currentFileSystem(), bool legalize=false); @synopsis{Check that two locations refer to the same file.} bool isSameFile(loc l, loc r) = l.top[fragment=""] == r.top[fragment=""]; diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 8bb4bbb4de1..5759869938f 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -46,6 +46,7 @@ import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.ParseException; @@ -69,6 +70,8 @@ import java.util.function.Consumer; import java.util.regex.Pattern; +import org.apache.commons.io.FileSystem; +import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.CharSetUtils; import org.rascalmpl.debug.IRascalMonitor; import org.rascalmpl.exceptions.JavaCompilation; @@ -3615,6 +3618,70 @@ public ISet findResources(IString fileName) { public ISourceLocation relativize(ISourceLocation outside, ISourceLocation inside) { return URIUtil.relativize(outside, inside); } + + public IConstructor currentFileSystem() { + var tf = TypeFactory.getInstance(); + var ts = new TypeStore(); + var fs = tf.abstractDataType(ts, "FileSystemSyntax"); + Type cons = tf.constructor(ts, fs, org.apache.commons.io.FileSystem.getCurrent().name().toLowerCase()); + return values.constructor(cons); + } + + public ISourceLocation locFromFileSystem(IString pathString, IConstructor syntax, IBool legalize) { + var fs = FileSystem.valueOf(syntax.getName().toUpperCase()); + var sep = String.valueOf(fs.getNameSeparator()); + var path = pathString.getValue(); + var drive = fs.supportsDriveLetter() && path.length() >= 2 && path.charAt(1) == ':' + ? path.substring(0, 2) + : ""; + + if (drive.length() > 0) { + // cut off the drive letter + path = path.substring(2); + } + + if (FileSystem.WINDOWS == fs && drive.isEmpty()) { + // supply missing drive letter + drive = "C:"; + } + + if (legalize.getValue()) { + // replace illegals by _ + String[] legal = Arrays.stream(path.split(Pattern.quote(sep))) + .map(segment -> fs.toLegalFileName(segment, '_')) + .toArray(String[]::new); + path = String.join(sep, legal); + } + + // throw IO when illegal char occur + Arrays.stream(path.split(Pattern.quote(sep))) + .forEach(segment -> { + if (!segment.isEmpty() && !fs.isLegalFileName(segment)) { + throw RuntimeExceptionFactory.io("\""+ segment + "\" is not a legal " + syntax.getName() + " filename."); + } + }); + + // now switch to forward slashes + if (!sep.equals("/")) { + path = FilenameUtils.separatorsToUnix(path); + } + + // canonicalize if required or necessary + if (!fs.isCasePreserving() || !fs.isCaseSensitive()) { + // to try and avoid having different loc values point to the same file + // the fewer aliases, the better, even though due to symlinks, + // mounts and such we can not avoid them alltogether. + path = path.toLowerCase(); + } + + // generate either a root file loc or a current working directory loc. + if (path.startsWith(sep) && drive.isEmpty()) { + return URIUtil.correctLocation("file", "", drive + path); + } + else { + return URIUtil.correctLocation("cwd", "", drive + path); + } + } public IValue readBinaryValueFile(IValue type, ISourceLocation loc){ if(trackIO) System.err.println("readBinaryValueFile: " + loc); diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index d670d23668b..66ba683f9a2 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -577,6 +577,7 @@ toLocation("http://grammarware.net"); toLocation("document.xml"); ``` } +@deprecated{Use ((Locations::fromOSPath))} public loc toLocation(str s) = (/\:\/\// := s) ? |://| : |cwd:///|; From ff02684a0b5393d15779602627de07279bf51927 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 10 May 2024 13:57:19 +0200 Subject: [PATCH 02/46] fixed link in deprecation message --- src/org/rascalmpl/library/String.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index 66ba683f9a2..bf47acba07d 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -577,7 +577,7 @@ toLocation("http://grammarware.net"); toLocation("document.xml"); ``` } -@deprecated{Use ((Locations::fromOSPath))} +@deprecated{Use ((Location::fromOSlocFromFileSystem))} public loc toLocation(str s) = (/\:\/\// := s) ? |://| : |cwd:///|; From 847ca051d11cc44d78f3b27cbbc2911499caa137 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Sun, 12 May 2024 15:26:22 +0200 Subject: [PATCH 03/46] added a grammar for all kinds of Window's path and an initial converter to source locations --- .../rascalmpl/library/lang/paths/Windows.rsc | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/paths/Windows.rsc diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc new file mode 100644 index 00000000000..2a23a982901 --- /dev/null +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -0,0 +1,63 @@ +@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} +@description{ +This syntax definition ports open-source manually written Java, C++ and C# code parsers for Windows +syntax of file and directory names, as well as shares on local networks (UNC notation). + +Instead of following the strict definitions of what is admissable and what is not, all implementations +try to be a as lenient as possible. And so this is reflected in the grammar below as well. +} +module lang::paths::Windows + +start syntax WindowsPathString = WindowsPath; + +lexical WindowsPath + = unc : "\\\\" PathSep* PathChar* hostName PathSep+ PathChar* shareName PathSep+ WindowsFilePath path + | absolute : [A-Za-z] drive ":" PathSep WindowsFilePath path + | driveRelative : [A-Za-z] drive ":" WindowsFilePath path + | directoryRelative: PathSep WindowsFilePath + | relative : WindowsFilePath path + ; + +lexical PathChar = + @synopsis{This is the most admissable we can be. Note that some paths will be incorrect for older versions of DOS and Windows} + ![\a00-\a20 \< \> : \" | ? * \\ /]; + +lexical PathSegment + = current: "." + | parent : ".." + | name : PathChar+ \ ".." \ "." + ; + +lexical PathSep = [\\/]; + +lexical WindowsFilePath = {PathSegment PathSep+}* segments [\ ] !<< (); // only the last segment must not end in spaces. + +@synopsis{Convert a windows path literal to a source location URI} +@description{ +1. parses the path using the grammar for ((WindowsPath)) +2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting +hostname, share name and path segment names. Also all superfluous path separators are skipped. +3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also +the right path separators are introduced. +} +loc parseWindowsPath(str input) = mapPathToLoc([WindowsPath] input); + +@synopsis{UNC} +loc mapPathToLoc((WindowsPath) `\\\\`) + = (|file:///| + "" | it + "" | segment <- path.segments ); + +@synopsis{Absolute} +loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) + = (|file:///:/| | it + "" | segment <- path.segments); + +@synopsis{Drive relative} +loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) + = (|file:///:| | it + "" | segment <- path.segments); + +@synopsis{Directory relative} +loc mapPathToLoc((WindowsPath) ``) + = (|file:///| | it + "" | segment <- path.segments); + +@synopsis{Relative} +loc mapPathToLoc((WindowsPath) ``) + = (|cwd:///| | it + "" | segment <- path.segments); \ No newline at end of file From 173437da16281e5478b5274055633e4921d1c4c2 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Sun, 12 May 2024 15:41:28 +0200 Subject: [PATCH 04/46] working on minor issues of first version --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 2a23a982901..cf0c4a893f9 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -20,7 +20,7 @@ lexical WindowsPath lexical PathChar = @synopsis{This is the most admissable we can be. Note that some paths will be incorrect for older versions of DOS and Windows} - ![\a00-\a20 \< \> : \" | ? * \\ /]; + !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); lexical PathSegment = current: "." @@ -47,12 +47,12 @@ loc mapPathToLoc((WindowsPath) `\\\\ = (|file:///| + "" | it + "" | segment <- path.segments ); @synopsis{Absolute} -loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) - = (|file:///:/| | it + "" | segment <- path.segments); +loc mapPathToLoc((WindowsPath) `C:`) + = (|file:///C:/| | it + "" | segment <- path.segments); @synopsis{Drive relative} -loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) - = (|file:///:| | it + "" | segment <- path.segments); +loc mapPathToLoc((WindowsPath) `C:`) + = (|file:///C:| | it + "" | segment <- path.segments); @synopsis{Directory relative} loc mapPathToLoc((WindowsPath) ``) @@ -60,4 +60,8 @@ loc mapPathToLoc((WindowsPath) ``) @synopsis{Relative} loc mapPathToLoc((WindowsPath) ``) - = (|cwd:///| | it + "" | segment <- path.segments); \ No newline at end of file + = (|cwd:///| | it + "" | segment <- path.segments); + +test bool simpleDrivePath() + = parseWindowsPath("C:\\Program Files\\Rascal") + == |file:///C:/Program%20Files/Rascal|; \ No newline at end of file From e435789cf86a6068910a6778a8c4ab5efb42af74 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Sun, 12 May 2024 16:04:22 +0200 Subject: [PATCH 05/46] resolved ambiguity --- .../rascalmpl/library/lang/paths/Windows.rsc | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index cf0c4a893f9..4f53af6a069 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -11,16 +11,13 @@ module lang::paths::Windows start syntax WindowsPathString = WindowsPath; lexical WindowsPath - = unc : "\\\\" PathSep* PathChar* hostName PathSep+ PathChar* shareName PathSep+ WindowsFilePath path + = unc : "\\\\" PathSep? PathChar* hostName PathSep PathChar* shareName PathSep WindowsFilePath path | absolute : [A-Za-z] drive ":" PathSep WindowsFilePath path | driveRelative : [A-Za-z] drive ":" WindowsFilePath path - | directoryRelative: PathSep WindowsFilePath + | directoryRelative: [\\/] WindowsFilePath | relative : WindowsFilePath path ; - -lexical PathChar = - @synopsis{This is the most admissable we can be. Note that some paths will be incorrect for older versions of DOS and Windows} - !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); +lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); lexical PathSegment = current: "." @@ -28,7 +25,9 @@ lexical PathSegment | name : PathChar+ \ ".." \ "." ; -lexical PathSep = [\\/]; +lexical PathSep = [\\/]+ !>> [\\/]; + +lexical Drive = [A-Za-z]; lexical WindowsFilePath = {PathSegment PathSep+}* segments [\ ] !<< (); // only the last segment must not end in spaces. @@ -43,10 +42,11 @@ the right path separators are introduced. loc parseWindowsPath(str input) = mapPathToLoc([WindowsPath] input); @synopsis{UNC} -loc mapPathToLoc((WindowsPath) `\\\\`) +loc mapPathToLoc((WindowsPath) `\\\\`) = (|file:///| + "" | it + "" | segment <- path.segments ); @synopsis{Absolute} +// loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) loc mapPathToLoc((WindowsPath) `C:`) = (|file:///C:/| | it + "" | segment <- path.segments); @@ -62,6 +62,10 @@ loc mapPathToLoc((WindowsPath) ``) loc mapPathToLoc((WindowsPath) ``) = (|cwd:///| | it + "" | segment <- path.segments); +test bool uncPath() + = parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt") + == |file://Server2/Share/Test/Foo.txt|; + test bool simpleDrivePath() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; \ No newline at end of file From 2f584a175e1e01c8e3f7f3d452e8fb874215578e Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Sun, 12 May 2024 16:10:36 +0200 Subject: [PATCH 06/46] more fixes and more tests --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 4f53af6a069..9114bf0ca1b 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -14,7 +14,7 @@ lexical WindowsPath = unc : "\\\\" PathSep? PathChar* hostName PathSep PathChar* shareName PathSep WindowsFilePath path | absolute : [A-Za-z] drive ":" PathSep WindowsFilePath path | driveRelative : [A-Za-z] drive ":" WindowsFilePath path - | directoryRelative: [\\/] WindowsFilePath + | directoryRelative: PathSep \ "\\\\" WindowsFilePath | relative : WindowsFilePath path ; lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); @@ -62,10 +62,14 @@ loc mapPathToLoc((WindowsPath) ``) loc mapPathToLoc((WindowsPath) ``) = (|cwd:///| | it + "" | segment <- path.segments); -test bool uncPath() +test bool uncSharePath() = parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt") == |file://Server2/Share/Test/Foo.txt|; +test bool uncDrivePath() + = parseWindowsPath("\\\\system07\\C$\\") + == |file://system07/C$|; + test bool simpleDrivePath() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; \ No newline at end of file From aed9b54f886c536b5f61bc6aee87e52d9ce0e6bc Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 10:40:58 +0200 Subject: [PATCH 07/46] generalized to any drive --- .../rascalmpl/library/lang/paths/Windows.rsc | 54 +++++++++++-------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 9114bf0ca1b..4db00852680 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -1,22 +1,26 @@ @synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} @description{ -This syntax definition ports open-source manually written Java, C++ and C# code parsers for Windows -syntax of file and directory names, as well as shares on local networks (UNC notation). - -Instead of following the strict definitions of what is admissable and what is not, all implementations -try to be a as lenient as possible. And so this is reflected in the grammar below as well. +This syntax definition of file paths and file names in Windows fprmalizes open-source implementations +manually written Java, C++ and C# code. These are parsers for Windows syntax of file and directory names, +as well as shares on local networks (UNC notation). It also derives from openly available documentation +sources on Windows and the .NET platform for confirmation and test examples. + +The main function of this module, ((parseWindowsPath)): +* faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values. +* ensures that if the file exists on system A, then the `loc` representation +resolves to the same file on system A via any ((module::IO)) function. +* and nothing more. } module lang::paths::Windows -start syntax WindowsPathString = WindowsPath; - lexical WindowsPath - = unc : "\\\\" PathSep? PathChar* hostName PathSep PathChar* shareName PathSep WindowsFilePath path - | absolute : [A-Za-z] drive ":" PathSep WindowsFilePath path - | driveRelative : [A-Za-z] drive ":" WindowsFilePath path - | directoryRelative: PathSep \ "\\\\" WindowsFilePath + = unc : Slash Slash Slashes? PathChar* hostName Slashes PathChar* shareName Slashes WindowsFilePath path + | absolute : Drive drive ":" Slashes WindowsFilePath path + | driveRelative : Drive drive ":" WindowsFilePath path + | directoryRelative: Slash WindowsFilePath | relative : WindowsFilePath path ; + lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); lexical PathSegment @@ -25,11 +29,15 @@ lexical PathSegment | name : PathChar+ \ ".." \ "." ; -lexical PathSep = [\\/]+ !>> [\\/]; +lexical Drive = [A-Za-z]; + +lexical Slashes = Slash+ !>> [\\/]; + +lexical Slash = [\\/]; lexical Drive = [A-Za-z]; -lexical WindowsFilePath = {PathSegment PathSep+}* segments [\ ] !<< (); // only the last segment must not end in spaces. +lexical WindowsFilePath = {PathSegment Slashes}* segments [\ ] !<< (); // only the last segment must not end in spaces. @synopsis{Convert a windows path literal to a source location URI} @description{ @@ -42,20 +50,20 @@ the right path separators are introduced. loc parseWindowsPath(str input) = mapPathToLoc([WindowsPath] input); @synopsis{UNC} -loc mapPathToLoc((WindowsPath) `\\\\`) +loc mapPathToLoc((WindowsPath) ``) = (|file:///| + "" | it + "" | segment <- path.segments ); @synopsis{Absolute} // loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) -loc mapPathToLoc((WindowsPath) `C:`) - = (|file:///C:/| | it + "" | segment <- path.segments); +loc mapPathToLoc((WindowsPath) `:`) + = (|file:///:/| | it + "" | segment <- path.segments); @synopsis{Drive relative} -loc mapPathToLoc((WindowsPath) `C:`) - = (|file:///C:| | it + "" | segment <- path.segments); +loc mapPathToLoc((WindowsPath) `:`) + = (|file:///:| | it + "" | segment <- path.segments); @synopsis{Directory relative} -loc mapPathToLoc((WindowsPath) ``) +loc mapPathToLoc((WindowsPath) ``) = (|file:///| | it + "" | segment <- path.segments); @synopsis{Relative} @@ -70,6 +78,10 @@ test bool uncDrivePath() = parseWindowsPath("\\\\system07\\C$\\") == |file://system07/C$|; -test bool simpleDrivePath() +test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") - == |file:///C:/Program%20Files/Rascal|; \ No newline at end of file + == |file:///C:/Program%20Files/Rascal|; + +test bool simpleDrivePathD() + = parseWindowsPath("D:\\Program Files\\Rascal") + == |file:///D:/Program%20Files/Rascal|; \ No newline at end of file From 437e8d96730165c042242288b37125c86999b187 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 10:47:37 +0200 Subject: [PATCH 08/46] removed previous experiment in favor of full syntax definition for Windows paths --- src/org/rascalmpl/library/Location.rsc | 64 ++++---------------------- src/org/rascalmpl/library/Prelude.java | 56 ---------------------- 2 files changed, 10 insertions(+), 110 deletions(-) diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc index c0744eda32b..003f1b0c627 100644 --- a/src/org/rascalmpl/library/Location.rsc +++ b/src/org/rascalmpl/library/Location.rsc @@ -24,6 +24,7 @@ import Set; import String; import Exception; +import lang::paths::Windows; @synopsis{Extracts a path relative to a parent location.} @description{ @@ -43,63 +44,18 @@ loc relativize(list[loc] haystack, loc needle) { } } -@synopsis{Names of file system path syntaxes that can be found in the wild.} +@synopsis{Convert Windows path syntax to a `loc` value} @description{ -* `generic` captures all unix-like systems like the BSD family, SunOs, Solaris, Irix, etc. -* `mac_osx` captures modern Mac's that also have a unix-like filesystem but with different exceptions to permissable characters and case-sensitivity. -* `windows` is for all DOS-based filesystems and what came after, with the slashes in the other direction. -* `linux` is for all the filesystems in Linux distributions. +This conversion supports generic Windows path syntax, including: +* Absolute drive-specific: `C:\Program Files` +* Relative drive-specific: `C:hello.txt` +* Relative: `hello.txt` +* Directory-relative: `\hello.txt` +* UNC format: `\\\\system07\\C$\\` -Use this as a parameter to ((locFromFileSystem)) to choose how to parse a string as a filesystem path. +Windows paths, againat popular believe, support both `/` and `\` as path separators. } -data FileSystemSyntax - = generic() - | mac_osx() - | windows() - | linux() - ; - -@javaClass{org.rascalmpl.library.Prelude} -@synopsis{Utility to retrieve the current notation for file system paths.} -@description{ -This is uses to configure the default syntax parameter of ((locFromFileSystem)). -} -java FileSystemSyntax currentFileSystem(); - -@synopsis{Converts the OS-specific string representation of a file or directory PATH to a `loc`.} -@description{ -This converts a string that hold a path to a file, written in a specific notation for paths, to -a canonical `loc` in URI format. - -* if `legalize` is true, then every path segment is legalized by replacing offending characters to `_` -* if `legalize` is false, and offending characters appear between the path separators, then an IO exception is thrown. -* if the requested file system syntax is either case insensitive or not case preserving (or both), then all uppercase characters will be replaced by lowercase characters. -* on windows systems the drive letter `C:` is added if a drive letter is missing. -* on all the other systems, if the path starts with a path separator, it is taken as absolute in `file:///`. Otherwise the root will be `cwd:///`. -} -// Wait for bootstrap -// @examples{ -// ```rascal-shell -// import Location; -// locFromFileSystem("C:\\Documents\\Newsletters\\Summer2018.pdf", \syntax=windows()) -// ``` -// } -@benefits{ -* After conversion there are many utility functions that operate safely and portably on `loc` values. See ((module:IO)) and ((module:util::FileSystem)) for examples. -* ((module:util::ShellExec)) features `loc`-based versions for passing the names of binaries and the names of file parameters on the commandlines as `loc` values. -* The file names identified by the path strings do not need to exist. They could typically be names in a CSV file or a spreadsheet data source, for which -no reflection exists in the mounted drives of the current (virtual) computer. Consider them "data", until passed in the functions of ((module:IO)), for example. -} -@pitfalls{ -* Delaying this conversion until _just before_ file ((module:IO)), misses out on: - * efficiency; loc values have internal sharing and memoization features. - * portability: OS-specific string paths can break on other machines. - * equational reasoning: ((locFromFileSystem)) has canonicalizing features to remove common cases of aliases (such as uppercase vs lowercase). -* Path syntax does not support `..` or `.` notation, simply because this conversion does not require -the paths to even exist on the current system. The `..` notation remains part of the name of a file accordingly, without being interpreted against a mounted file system. -} -@javaClass{org.rascalmpl.library.Prelude} -java loc locFromFileSystem(str pathString, FileSystemSyntax \syntax=currentFileSystem(), bool legalize=false); +loc locFromWindowsPath(str path) = parseWindowsPath(path); @synopsis{Check that two locations refer to the same file.} bool isSameFile(loc l, loc r) = l.top[fragment=""] == r.top[fragment=""]; diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 5759869938f..b704c24ceb5 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3627,62 +3627,6 @@ public IConstructor currentFileSystem() { return values.constructor(cons); } - public ISourceLocation locFromFileSystem(IString pathString, IConstructor syntax, IBool legalize) { - var fs = FileSystem.valueOf(syntax.getName().toUpperCase()); - var sep = String.valueOf(fs.getNameSeparator()); - var path = pathString.getValue(); - var drive = fs.supportsDriveLetter() && path.length() >= 2 && path.charAt(1) == ':' - ? path.substring(0, 2) - : ""; - - if (drive.length() > 0) { - // cut off the drive letter - path = path.substring(2); - } - - if (FileSystem.WINDOWS == fs && drive.isEmpty()) { - // supply missing drive letter - drive = "C:"; - } - - if (legalize.getValue()) { - // replace illegals by _ - String[] legal = Arrays.stream(path.split(Pattern.quote(sep))) - .map(segment -> fs.toLegalFileName(segment, '_')) - .toArray(String[]::new); - path = String.join(sep, legal); - } - - // throw IO when illegal char occur - Arrays.stream(path.split(Pattern.quote(sep))) - .forEach(segment -> { - if (!segment.isEmpty() && !fs.isLegalFileName(segment)) { - throw RuntimeExceptionFactory.io("\""+ segment + "\" is not a legal " + syntax.getName() + " filename."); - } - }); - - // now switch to forward slashes - if (!sep.equals("/")) { - path = FilenameUtils.separatorsToUnix(path); - } - - // canonicalize if required or necessary - if (!fs.isCasePreserving() || !fs.isCaseSensitive()) { - // to try and avoid having different loc values point to the same file - // the fewer aliases, the better, even though due to symlinks, - // mounts and such we can not avoid them alltogether. - path = path.toLowerCase(); - } - - // generate either a root file loc or a current working directory loc. - if (path.startsWith(sep) && drive.isEmpty()) { - return URIUtil.correctLocation("file", "", drive + path); - } - else { - return URIUtil.correctLocation("cwd", "", drive + path); - } - } - public IValue readBinaryValueFile(IValue type, ISourceLocation loc){ if(trackIO) System.err.println("readBinaryValueFile: " + loc); From 981d614620b1eef8261fa30a83d6011346241d93 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 14:21:13 +0200 Subject: [PATCH 09/46] added |cwdrive:///| to be able to accommodate current-drive-relative absolute file names --- .../rascalmpl/library/lang/paths/Windows.rsc | 12 ++++---- .../uri/file/CurrentWorkingDriveResolver.java | 30 +++++++++++++++++++ src/org/rascalmpl/uri/resolvers.config | 1 + 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 4db00852680..062956ba5cd 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -53,20 +53,20 @@ loc parseWindowsPath(str input) = mapPathToLoc([WindowsPath] input); loc mapPathToLoc((WindowsPath) ``) = (|file:///| + "" | it + "" | segment <- path.segments ); -@synopsis{Absolute} +@synopsis{Absolute: given the drive and relative to its root.} // loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) loc mapPathToLoc((WindowsPath) `:`) = (|file:///:/| | it + "" | segment <- path.segments); -@synopsis{Drive relative} +@synopsis{Drive relative: relative to the current working directory on the given drive.} loc mapPathToLoc((WindowsPath) `:`) - = (|file:///:| | it + "" | segment <- path.segments); + = (|file:///:.| | it + "" | segment <- path.segments); -@synopsis{Directory relative} +@synopsis{Directory relative: relative to the root of the current drive.} loc mapPathToLoc((WindowsPath) ``) - = (|file:///| | it + "" | segment <- path.segments); + = (|cwdrive:///| | it + "" | segment <- path.segments); -@synopsis{Relative} +@synopsis{Relative to the current working directory on the current drive.} loc mapPathToLoc((WindowsPath) ``) = (|cwd:///| | it + "" | segment <- path.segments); diff --git a/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java new file mode 100644 index 00000000000..10d4e528ab7 --- /dev/null +++ b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2009-2024 CWI + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * * Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI + ******************************************************************************/ +package org.rascalmpl.uri.file; + +import java.io.File; +import java.nio.file.Paths; + +/** + * For reading and writing files relative to the current working drive. + * This is different from the current working directory, namely it is the + * root of the current working directory. + */ +public class CurrentWorkingDriveResolver extends AliasedFileResolver { + + public CurrentWorkingDriveResolver() { + super("cwdrive", deriveCurrentWorkingDrive(System.getProperty("user.dir"))); + } + + private static String deriveCurrentWorkingDrive(String property) { + return Paths.get(property).getRoot().toString(); + } +} diff --git a/src/org/rascalmpl/uri/resolvers.config b/src/org/rascalmpl/uri/resolvers.config index acfc7ce38cd..a162f42e49f 100644 --- a/src/org/rascalmpl/uri/resolvers.config +++ b/src/org/rascalmpl/uri/resolvers.config @@ -9,6 +9,7 @@ org.rascalmpl.uri.jar.JarURIResolver org.rascalmpl.uri.zip.ZipURIResolver org.rascalmpl.uri.file.HomeURIResolver org.rascalmpl.uri.file.CWDURIResolver +org.rascalmpl.uri.file.CurrentWorkingDriveResolver org.rascalmpl.uri.file.SystemPathURIResolver org.rascalmpl.uri.libraries.MemoryResolver org.rascalmpl.uri.libraries.RascalLibraryURIResolver From 8552be60d591cce4c3e2080ab28f56570b7b475f Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 15:12:53 +0200 Subject: [PATCH 10/46] cleanup --- src/org/rascalmpl/library/Prelude.java | 3 --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 11 +++++++---- .../uri/file/CurrentWorkingDriveResolver.java | 5 ++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index b704c24ceb5..94808851804 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -46,7 +46,6 @@ import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; -import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.ParseException; @@ -70,8 +69,6 @@ import java.util.function.Consumer; import java.util.regex.Pattern; -import org.apache.commons.io.FileSystem; -import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.CharSetUtils; import org.rascalmpl.debug.IRascalMonitor; import org.rascalmpl.exceptions.JavaCompilation; diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 062956ba5cd..2bcd2cc7d37 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -1,15 +1,18 @@ @synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} @description{ -This syntax definition of file paths and file names in Windows fprmalizes open-source implementations -manually written Java, C++ and C# code. These are parsers for Windows syntax of file and directory names, +This syntax definition of file paths and file names in Windows formalizes open-source implementations +manually written in Java, C++ and C# code. These are parsers for Windows syntax of file and directory names, as well as shares on local networks (UNC notation). It also derives from openly available documentation sources on Windows and the .NET platform for confirmation and test examples. The main function of this module, ((parseWindowsPath)): * faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values. +* throws a ParseError if the path does not comply. Typically file names ending in spaces do not comply. * ensures that if the file exists on system A, then the `loc` representation resolves to the same file on system A via any ((module::IO)) function. -* and nothing more. +* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. +This is left to downstream processors of `loc` values, if necessary. The current transformation +is purely syntactical, and tries to preserve the semantics of the path as much as possible. } module lang::paths::Windows @@ -37,7 +40,7 @@ lexical Slash = [\\/]; lexical Drive = [A-Za-z]; -lexical WindowsFilePath = {PathSegment Slashes}* segments [\ ] !<< (); // only the last segment must not end in spaces. +lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ ] !<< (); // only the last segment must not end in spaces. @synopsis{Convert a windows path literal to a source location URI} @description{ diff --git a/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java index 10d4e528ab7..84bd45d893f 100644 --- a/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java +++ b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java @@ -10,7 +10,6 @@ ******************************************************************************/ package org.rascalmpl.uri.file; -import java.io.File; import java.nio.file.Paths; /** @@ -24,7 +23,7 @@ public CurrentWorkingDriveResolver() { super("cwdrive", deriveCurrentWorkingDrive(System.getProperty("user.dir"))); } - private static String deriveCurrentWorkingDrive(String property) { - return Paths.get(property).getRoot().toString(); + private static String deriveCurrentWorkingDrive(String userDir) { + return Paths.get(userDir).toAbsolutePath().getRoot().toString(); } } From ae8e3d3f49b34ac1ef0268a246d27531003d7673 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 15:27:40 +0200 Subject: [PATCH 11/46] last segment may not end in a dot either --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 2bcd2cc7d37..0d353f26f8d 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -40,7 +40,9 @@ lexical Slash = [\\/]; lexical Drive = [A-Za-z]; -lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ ] !<< (); // only the last segment must not end in spaces. +lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ .] !<< (); // only the last segment must not end in spaces. + +import ParseTree; @synopsis{Convert a windows path literal to a source location URI} @description{ @@ -50,14 +52,13 @@ hostname, share name and path segment names. Also all superfluous path separator 3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also the right path separators are introduced. } -loc parseWindowsPath(str input) = mapPathToLoc([WindowsPath] input); +loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#WindowsPath, input, src)); @synopsis{UNC} loc mapPathToLoc((WindowsPath) ``) = (|file:///| + "" | it + "" | segment <- path.segments ); @synopsis{Absolute: given the drive and relative to its root.} -// loc mapPathToLoc((WindowsPath) `<[A-Za-z] drive>:`) loc mapPathToLoc((WindowsPath) `:`) = (|file:///:/| | it + "" | segment <- path.segments); @@ -85,6 +86,14 @@ test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; +test bool mixedSlashesDrivePathC() + = parseWindowsPath("C:\\Program Files/Rascal") + == |file:///C:/Program%20Files/Rascal|; + +test bool trailingSlashesDrivePathC() + = parseWindowsPath("C:\\Program Files\\Rascal\\\\") + == |file:///C:/Program%20Files/Rascal|; + test bool simpleDrivePathD() = parseWindowsPath("D:\\Program Files\\Rascal") == |file:///D:/Program%20Files/Rascal|; \ No newline at end of file From a908cecd2294a7e74ecbe8a36311c8bfbd7adcb6 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 15:48:21 +0200 Subject: [PATCH 12/46] need to now some share names to be able to write good tests --- .github/workflows/build.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 57296457eb0..56b728738db 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -133,6 +133,12 @@ jobs: # single quotes to help windows deal with argument splitting run: mvn -B '-Drascal.compile.skip' '-Drascal.tutor.skip' '-Drascal.test.memory=3' test + - name: Little experiment + if: runner.os == 'Windows' + run: | + pushd //localhost/ + dir + - uses: codecov/codecov-action@v3 continue-on-error: true # sometimes this one fails, that shouldn't stop a build with: From 54deeccb8575690c39556852ed3624034f88a513 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 15:52:07 +0200 Subject: [PATCH 13/46] docs --- src/org/rascalmpl/library/String.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index bf47acba07d..3e2cff8ab46 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -577,7 +577,7 @@ toLocation("http://grammarware.net"); toLocation("document.xml"); ``` } -@deprecated{Use ((Location::fromOSlocFromFileSystem))} +@deprecated{Use ((Location::locFromWindowsPath)) for example. The current function does not handle all the different intricasies of path notation.} public loc toLocation(str s) = (/\:\/\// := s) ? |://| : |cwd:///|; From 4a2eed95dd1ab58557636e05c6f3b8ea51af7eef Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 15:57:12 +0200 Subject: [PATCH 14/46] removed clone --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 0d353f26f8d..a542ac51aa7 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -38,8 +38,6 @@ lexical Slashes = Slash+ !>> [\\/]; lexical Slash = [\\/]; -lexical Drive = [A-Za-z]; - lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ .] !<< (); // only the last segment must not end in spaces. import ParseTree; From 709693d7a9d780767f58407365b0d7c4fed3c86f Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 15:59:20 +0200 Subject: [PATCH 15/46] better way to list shares --- .github/workflows/build.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 56b728738db..b37cf74522c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -136,8 +136,7 @@ jobs: - name: Little experiment if: runner.os == 'Windows' run: | - pushd //localhost/ - dir + net share - uses: codecov/codecov-action@v3 continue-on-error: true # sometimes this one fails, that shouldn't stop a build From aeb920f04e79cd195f8215e5d6f2fd0f4e3701ab Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 16:16:41 +0200 Subject: [PATCH 16/46] added failing test for windows share --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index a542ac51aa7..a31607de924 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -16,6 +16,9 @@ is purely syntactical, and tries to preserve the semantics of the path as much a } module lang::paths::Windows +import IO; +import util::SystemAPI; + lexical WindowsPath = unc : Slash Slash Slashes? PathChar* hostName Slashes PathChar* shareName Slashes WindowsFilePath path | absolute : Drive drive ":" Slashes WindowsFilePath path @@ -94,4 +97,15 @@ test bool trailingSlashesDrivePathC() test bool simpleDrivePathD() = parseWindowsPath("D:\\Program Files\\Rascal") - == |file:///D:/Program%20Files/Rascal|; \ No newline at end of file + == |file:///D:/Program%20Files/Rascal|; + +test bool uncNetworkShareOk() { + loc l = parseWindowsPath("\\\\localhost\\ADMIN$\\System32\\cmd.exe"); + + if (/win/i := getSystemProperty("os.name")) { + return exists(l); + } + else { + return |file://localhost/ADMIN$/System32/cmd.exe| == l; + } +} \ No newline at end of file From 8756bcdead356de78c6c19837532c09fe16af0ec Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 16:18:04 +0200 Subject: [PATCH 17/46] improved still failing test --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index a31607de924..4bfa6b1516a 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -75,6 +75,8 @@ loc mapPathToLoc((WindowsPath) ``) loc mapPathToLoc((WindowsPath) ``) = (|cwd:///| | it + "" | segment <- path.segments); +private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); + test bool uncSharePath() = parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt") == |file://Server2/Share/Test/Foo.txt|; @@ -102,7 +104,7 @@ test bool simpleDrivePathD() test bool uncNetworkShareOk() { loc l = parseWindowsPath("\\\\localhost\\ADMIN$\\System32\\cmd.exe"); - if (/win/i := getSystemProperty("os.name")) { + if (IS_WINDOWS) { return exists(l); } else { From 8639410abd88288e119606c8495b0fad29e61f6e Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 16:41:46 +0200 Subject: [PATCH 18/46] added UNC resolver to make sure we do not generate file:/// paths that seem to work but actually do not work outside of Windows systems --- .../rascalmpl/library/lang/paths/Windows.rsc | 4 +- .../rascalmpl/uri/file/FileURIResolver.java | 1 + src/org/rascalmpl/uri/file/UNCResolver.java | 39 +++++++++++++++++++ src/org/rascalmpl/uri/resolvers.config | 1 + 4 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 src/org/rascalmpl/uri/file/UNCResolver.java diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 4bfa6b1516a..fefd9e723a0 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -57,7 +57,7 @@ loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#Win @synopsis{UNC} loc mapPathToLoc((WindowsPath) ``) - = (|file:///| + "" | it + "" | segment <- path.segments ); + = (|unc:///| + "" | it + "" | segment <- path.segments ); @synopsis{Absolute: given the drive and relative to its root.} loc mapPathToLoc((WindowsPath) `:`) @@ -108,6 +108,6 @@ test bool uncNetworkShareOk() { return exists(l); } else { - return |file://localhost/ADMIN$/System32/cmd.exe| == l; + return |unc://localhost/ADMIN$/System32/cmd.exe| == l; } } \ No newline at end of file diff --git a/src/org/rascalmpl/uri/file/FileURIResolver.java b/src/org/rascalmpl/uri/file/FileURIResolver.java index bfa8c358228..2c87f50a3e0 100644 --- a/src/org/rascalmpl/uri/file/FileURIResolver.java +++ b/src/org/rascalmpl/uri/file/FileURIResolver.java @@ -143,6 +143,7 @@ public boolean exists(ISourceLocation uri) { * To override to build resolvers to specific locations using a prefix for example. */ protected String getPath(ISourceLocation uri) { + assert uri.hasAuthority(); return uri.getPath(); } diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java new file mode 100644 index 00000000000..af2e30a36fd --- /dev/null +++ b/src/org/rascalmpl/uri/file/UNCResolver.java @@ -0,0 +1,39 @@ +package org.rascalmpl.uri.file; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import io.usethesource.vallang.ISourceLocation; + +/** + * Implements the UNC-available network shares on Windows systems. + */ +public class UNCResolver extends FileURIResolver { + private boolean onWindows = System.getProperty("os.name").toLowerCase().startsWith("win"); + + public UNCResolver() throws IOException { + super(); + } + + @Override + protected String getPath(ISourceLocation uri) { + if (!onWindows) { + throw new RuntimeException(new FileNotFoundException(uri.toString() + "; UNC is only available on Windows")); + } + + if (uri.hasAuthority()) { + // downstream methods will use `new File` and `new FileInputStream` + // which are able to parse UNC's on Windows. + return "\\\\" + uri.getAuthority() + "\\" + uri.getPath(); + } + else { + // just a normal absolute path + return uri.getPath(); + } + } + + @Override + public String scheme() { + return "unc"; + } +} diff --git a/src/org/rascalmpl/uri/resolvers.config b/src/org/rascalmpl/uri/resolvers.config index a162f42e49f..5e35fb9c326 100644 --- a/src/org/rascalmpl/uri/resolvers.config +++ b/src/org/rascalmpl/uri/resolvers.config @@ -10,6 +10,7 @@ org.rascalmpl.uri.zip.ZipURIResolver org.rascalmpl.uri.file.HomeURIResolver org.rascalmpl.uri.file.CWDURIResolver org.rascalmpl.uri.file.CurrentWorkingDriveResolver +org.rascalmpl.uri.file.UNCResolver org.rascalmpl.uri.file.SystemPathURIResolver org.rascalmpl.uri.libraries.MemoryResolver org.rascalmpl.uri.libraries.RascalLibraryURIResolver From bb643fa8942912dad87030e9b929b60b6c1817bd Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Mon, 13 May 2024 17:41:24 +0200 Subject: [PATCH 19/46] test up-to-date with new unc scheme --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index fefd9e723a0..d50fb7b841b 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -79,11 +79,11 @@ private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); test bool uncSharePath() = parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt") - == |file://Server2/Share/Test/Foo.txt|; + == |unc://Server2/Share/Test/Foo.txt|; test bool uncDrivePath() = parseWindowsPath("\\\\system07\\C$\\") - == |file://system07/C$|; + == |unc://system07/C$|; test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") From 1d39bf0b568b62541fb5d1558b8a91e2f9c4593b Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 18:51:36 +0200 Subject: [PATCH 20/46] fixed assert and refactored lang::paths::Windows to factor common appendPath function instead of repeating the reducer 5 times --- src/org/rascalmpl/library/Location.rsc | 4 ++-- src/org/rascalmpl/library/Prelude.java | 8 -------- src/org/rascalmpl/library/lang/paths/Windows.rsc | 15 +++++++++------ src/org/rascalmpl/uri/file/FileURIResolver.java | 2 +- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc index 003f1b0c627..416ea2961e3 100644 --- a/src/org/rascalmpl/library/Location.rsc +++ b/src/org/rascalmpl/library/Location.rsc @@ -51,9 +51,9 @@ This conversion supports generic Windows path syntax, including: * Relative drive-specific: `C:hello.txt` * Relative: `hello.txt` * Directory-relative: `\hello.txt` -* UNC format: `\\\\system07\\C$\\` +* UNC format: `\\system07\C$\` -Windows paths, againat popular believe, support both `/` and `\` as path separators. +Windows paths, against popular believe, support both `/` and `\` as path separators. } loc locFromWindowsPath(str path) = parseWindowsPath(path); diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 94808851804..33036ee01ab 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3616,14 +3616,6 @@ public ISourceLocation relativize(ISourceLocation outside, ISourceLocation insid return URIUtil.relativize(outside, inside); } - public IConstructor currentFileSystem() { - var tf = TypeFactory.getInstance(); - var ts = new TypeStore(); - var fs = tf.abstractDataType(ts, "FileSystemSyntax"); - Type cons = tf.constructor(ts, fs, org.apache.commons.io.FileSystem.getCurrent().name().toLowerCase()); - return values.constructor(cons); - } - public IValue readBinaryValueFile(IValue type, ISourceLocation loc){ if(trackIO) System.err.println("readBinaryValueFile: " + loc); diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index d50fb7b841b..09c68a886a5 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -9,7 +9,7 @@ The main function of this module, ((parseWindowsPath)): * faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values. * throws a ParseError if the path does not comply. Typically file names ending in spaces do not comply. * ensures that if the file exists on system A, then the `loc` representation -resolves to the same file on system A via any ((module::IO)) function. +resolves to the same file on system A via any ((Library:module:IO)) function. * and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. This is left to downstream processors of `loc` values, if necessary. The current transformation is purely syntactical, and tries to preserve the semantics of the path as much as possible. @@ -57,23 +57,26 @@ loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#Win @synopsis{UNC} loc mapPathToLoc((WindowsPath) ``) - = (|unc:///| + "" | it + "" | segment <- path.segments ); + = appendPath(|unc:///| + "", path); @synopsis{Absolute: given the drive and relative to its root.} loc mapPathToLoc((WindowsPath) `:`) - = (|file:///:/| | it + "" | segment <- path.segments); + = appendPath(|file:///:/|, path); @synopsis{Drive relative: relative to the current working directory on the given drive.} loc mapPathToLoc((WindowsPath) `:`) - = (|file:///:.| | it + "" | segment <- path.segments); + = appendPath(|file:///:.|, path); @synopsis{Directory relative: relative to the root of the current drive.} loc mapPathToLoc((WindowsPath) ``) - = (|cwdrive:///| | it + "" | segment <- path.segments); + = appendPath(|cwdrive:///|, path); @synopsis{Relative to the current working directory on the current drive.} loc mapPathToLoc((WindowsPath) ``) - = (|cwd:///| | it + "" | segment <- path.segments); + = appendPath(|cwd:///|, path); + +loc appendPath(loc root, WindowsFilePath path) + = (root | it + "" | segment <- path.segments); private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); diff --git a/src/org/rascalmpl/uri/file/FileURIResolver.java b/src/org/rascalmpl/uri/file/FileURIResolver.java index 2c87f50a3e0..9358c1804e7 100644 --- a/src/org/rascalmpl/uri/file/FileURIResolver.java +++ b/src/org/rascalmpl/uri/file/FileURIResolver.java @@ -143,7 +143,7 @@ public boolean exists(ISourceLocation uri) { * To override to build resolvers to specific locations using a prefix for example. */ protected String getPath(ISourceLocation uri) { - assert uri.hasAuthority(); + assert !uri.hasAuthority(); return uri.getPath(); } From 66d44db2437c3c2fc4ca831c5e7eadf1b24ca96f Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 19:06:05 +0200 Subject: [PATCH 21/46] added failing test for DOS UNC drive letter C and added a rule to the grammar to cover it --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 09c68a886a5..0d647073f79 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -21,6 +21,7 @@ import util::SystemAPI; lexical WindowsPath = unc : Slash Slash Slashes? PathChar* hostName Slashes PathChar* shareName Slashes WindowsFilePath path + | uncDOS : Slash Slash Slashes? "?" Slashes PathChar* shareName Slashes WindowsFilePath path | absolute : Drive drive ":" Slashes WindowsFilePath path | driveRelative : Drive drive ":" WindowsFilePath path | directoryRelative: Slash WindowsFilePath @@ -59,6 +60,10 @@ loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#Win loc mapPathToLoc((WindowsPath) ``) = appendPath(|unc:///| + "", path); +@synopsis{DOC UNC} +loc mapPathToLoc((WindowsPath) `?`) + = appendPath(|unc://%3F/| + "", path); + @synopsis{Absolute: given the drive and relative to its root.} loc mapPathToLoc((WindowsPath) `:`) = appendPath(|file:///:/|, path); @@ -88,6 +93,16 @@ test bool uncDrivePath() = parseWindowsPath("\\\\system07\\C$\\") == |unc://system07/C$|; +test bool uncDOSDrive() { + loc l = parseWindowsPath("\\\\?\\C$\\"); + + if (IS_WINDOWS) { + assert exists(l); + } + + return l == |unc://%3F/C$|; +} + test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; From c19b825f2f4636cf1fcdfe1b10fe4d8a6be8f917 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 19:07:16 +0200 Subject: [PATCH 22/46] forgot to add test activator --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 2 +- test/org/rascalmpl/test/library/LibraryLangPaths.java | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 test/org/rascalmpl/test/library/LibraryLangPaths.java diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 0d647073f79..84ccb785b12 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -97,7 +97,7 @@ test bool uncDOSDrive() { loc l = parseWindowsPath("\\\\?\\C$\\"); if (IS_WINDOWS) { - assert exists(l); + assert exists(l + "Program Files"); } return l == |unc://%3F/C$|; diff --git a/test/org/rascalmpl/test/library/LibraryLangPaths.java b/test/org/rascalmpl/test/library/LibraryLangPaths.java new file mode 100644 index 00000000000..da795fc2edc --- /dev/null +++ b/test/org/rascalmpl/test/library/LibraryLangPaths.java @@ -0,0 +1,11 @@ +package org.rascalmpl.test.library; + +import org.junit.runner.RunWith; +import org.rascalmpl.test.infrastructure.RascalJUnitTestPrefix; +import org.rascalmpl.test.infrastructure.RascalJUnitTestRunner; + +@RunWith(RascalJUnitTestRunner.class) +@RascalJUnitTestPrefix("lang::paths") +public class LibraryLangPaths { + +} From f7ba7f11f858d6fc33881ff239c18299e0947c21 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 19:09:33 +0200 Subject: [PATCH 23/46] helper functions are now private to keep namespaces clean --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 84ccb785b12..eef9d20db0c 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -57,30 +57,30 @@ the right path separators are introduced. loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#WindowsPath, input, src)); @synopsis{UNC} -loc mapPathToLoc((WindowsPath) ``) +private loc mapPathToLoc((WindowsPath) ``) = appendPath(|unc:///| + "", path); @synopsis{DOC UNC} -loc mapPathToLoc((WindowsPath) `?`) +private loc mapPathToLoc((WindowsPath) `?`) = appendPath(|unc://%3F/| + "", path); @synopsis{Absolute: given the drive and relative to its root.} -loc mapPathToLoc((WindowsPath) `:`) +private loc mapPathToLoc((WindowsPath) `:`) = appendPath(|file:///:/|, path); @synopsis{Drive relative: relative to the current working directory on the given drive.} -loc mapPathToLoc((WindowsPath) `:`) +private loc mapPathToLoc((WindowsPath) `:`) = appendPath(|file:///:.|, path); @synopsis{Directory relative: relative to the root of the current drive.} -loc mapPathToLoc((WindowsPath) ``) +private loc mapPathToLoc((WindowsPath) ``) = appendPath(|cwdrive:///|, path); @synopsis{Relative to the current working directory on the current drive.} -loc mapPathToLoc((WindowsPath) ``) +private loc mapPathToLoc((WindowsPath) ``) = appendPath(|cwd:///|, path); -loc appendPath(loc root, WindowsFilePath path) +private loc appendPath(loc root, WindowsFilePath path) = (root | it + "" | segment <- path.segments); private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); From 66e68aace0e9d35d468c4abcbc6a80b6be7664d9 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 19:19:51 +0200 Subject: [PATCH 24/46] activated extra tests --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 6cf51d5ff36..7cca6eef9fa 100644 --- a/pom.xml +++ b/pom.xml @@ -218,6 +218,7 @@ **/org/rascalmpl/test/AllSuiteParallel.java + **/org/rascalmpl/test/library/LibraryLangPaths.java **/org/rascalmpl/test/value/AllTests.java **/org/rascalmpl/*Test.java From a5f14a7492078d3757aef9aeb4812382cceecf14 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 20:10:53 +0200 Subject: [PATCH 25/46] added Posix paths --- .../rascalmpl/library/lang/paths/Posix.rsc | 104 ++++++++++++++++++ .../rascalmpl/library/lang/paths/Windows.rsc | 4 +- 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 src/org/rascalmpl/library/lang/paths/Posix.rsc diff --git a/src/org/rascalmpl/library/lang/paths/Posix.rsc b/src/org/rascalmpl/library/lang/paths/Posix.rsc new file mode 100644 index 00000000000..3747813df7a --- /dev/null +++ b/src/org/rascalmpl/library/lang/paths/Posix.rsc @@ -0,0 +1,104 @@ +@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} +@description{ +This syntax definition of POSIX paths and file names, including some of the conventions +with ~ for the home folder and . and .. for relative directories. + +The main function of this module, ((parsePosixPath)): +* faithfully maps any syntactically correctly Posix paths to syntactically correct `loc` values. +* throws a ParseError if the path does not comply. +* ensures that if the file exists on system A, then the `loc` representation +resolves to the same file on system A via any ((Library:module:IO)) function. +* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. +This is left to downstream processors of `loc` values, if necessary. The current transformation +is purely syntactical, and tries to preserve the semantics of the path as much as possible. +} +@pitfalls{ +* the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly +used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme. +} +module lang::paths::Posix + +lexical PosixPath + = absolute: Slashes PosixFilePath? + | relative: PosixFilePath + | home : "~" (Slashes PosixFilePath)? + | user : "~" UserName name (Slashes PosixFilePath)? + ; + +lexical UserName = ![/~]+; + +lexical PathChar = ![/]; + +lexical PathSegment + = current: "." + | parent : ".." + | name : (PathChar \ "~" PathChar*) \ ".." \ "." \ "~" + ; + +lexical Slashes = Slash+ !>> [/]; + +lexical Slash = [/]; + +lexical PosixFilePath = {PathSegment Slashes}+ segments Slashes?; + +import ParseTree; + +@synopsis{Convert a Posix path literal to a source location URI} +@description{ +1. parses the path using the grammar for ((PosixPath)) +2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting +hostname, share name and path segment names. Also all superfluous path separators are skipped. +3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also +the right path separators are introduced. +} +loc parsePosixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#PosixPath, input, src)); + +@synopsis{Root is a special case} +private loc mapPathToLoc((PosixPath) ``) + = |file:///|; + +@synopsis{Absolute: given the drive and relative to its root.} +private loc mapPathToLoc((PosixPath) ``) + = appendPath(|file:///|, path); + +@synopsis{Relative: relative to the current working directory.} +private loc mapPathToLoc((PosixPath) ``) + = appendPath(|cwd:///|, path); + +@synopsis{Home relative: relative to the current users home directory} +private loc mapPathToLoc((PosixPath) `~`) + = appendPath(|home:///|, path); + +@synopsis{Home relative: relative to the current users home directory} +private loc mapPathToLoc((PosixPath) `~`) + = |home:///|; + +@synopsis{User relative: relative to any specific user's home directory} +private loc mapPathToLoc((PosixPath) `~`) + = appendPath(|home:///..//|, path); + +@synopsis{User relative: relative to any specific user's home directory} +private loc mapPathToLoc((PosixPath) `~`) + = |home:///..//|; + +private loc appendPath(loc root, PosixFilePath path) + = (root | it + "" | segment <- path.segments); + +test bool root() + = parsePosixPath("/") == |file:///|; + +test bool absolutePath() + = parsePosixPath("/usr/local/bin") + == |file:///usr/local/bin|; + +test bool relativePath() + = parsePosixPath(".bash_rc") + == |cwd:///.bash_rc|; + +test bool homePath() + = parsePosixPath("~/.bash_profile") + == |home:///.bash_profile|; + +test bool userPath() + = parsePosixPath("~root/.bash_profile") + == |home:///../root/.bash_profile|; diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index eef9d20db0c..5c54855c0b6 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -18,7 +18,7 @@ module lang::paths::Windows import IO; import util::SystemAPI; - + lexical WindowsPath = unc : Slash Slash Slashes? PathChar* hostName Slashes PathChar* shareName Slashes WindowsFilePath path | uncDOS : Slash Slash Slashes? "?" Slashes PathChar* shareName Slashes WindowsFilePath path @@ -128,4 +128,4 @@ test bool uncNetworkShareOk() { else { return |unc://localhost/ADMIN$/System32/cmd.exe| == l; } -} \ No newline at end of file +} From 8b9d45cc30a0715d669500b5bb38bcc7ea369f05 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 20:13:56 +0200 Subject: [PATCH 26/46] renamed Posix to Unix --- src/org/rascalmpl/library/Location.rsc | 10 ++++ .../lang/paths/{Posix.rsc => Unix.rsc} | 50 +++++++++---------- 2 files changed, 35 insertions(+), 25 deletions(-) rename src/org/rascalmpl/library/lang/paths/{Posix.rsc => Unix.rsc} (66%) diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc index 416ea2961e3..b38d612b708 100644 --- a/src/org/rascalmpl/library/Location.rsc +++ b/src/org/rascalmpl/library/Location.rsc @@ -57,6 +57,16 @@ Windows paths, against popular believe, support both `/` and `\` as path separat } loc locFromWindowsPath(str path) = parseWindowsPath(path); +@synopsis{Convert Unix path syntax to a `loc` value} +@description{ +This conversion supports generic Unix path syntax, including: +* Absolute: `/usr/local/bin` +* Relative: `hello.txt` +* Home: `~/hello.txt` +* User: `~userName\hello.txt` +} +loc locFromUnixPath(str path) = parseUnixPath(path); + @synopsis{Check that two locations refer to the same file.} bool isSameFile(loc l, loc r) = l.top[fragment=""] == r.top[fragment=""]; diff --git a/src/org/rascalmpl/library/lang/paths/Posix.rsc b/src/org/rascalmpl/library/lang/paths/Unix.rsc similarity index 66% rename from src/org/rascalmpl/library/lang/paths/Posix.rsc rename to src/org/rascalmpl/library/lang/paths/Unix.rsc index 3747813df7a..da6afe44b32 100644 --- a/src/org/rascalmpl/library/lang/paths/Posix.rsc +++ b/src/org/rascalmpl/library/lang/paths/Unix.rsc @@ -3,8 +3,8 @@ This syntax definition of POSIX paths and file names, including some of the conventions with ~ for the home folder and . and .. for relative directories. -The main function of this module, ((parsePosixPath)): -* faithfully maps any syntactically correctly Posix paths to syntactically correct `loc` values. +The main function of this module, ((parseUnixPath)): +* faithfully maps any syntactically correctly Unix paths to syntactically correct `loc` values. * throws a ParseError if the path does not comply. * ensures that if the file exists on system A, then the `loc` representation resolves to the same file on system A via any ((Library:module:IO)) function. @@ -16,13 +16,13 @@ is purely syntactical, and tries to preserve the semantics of the path as much a * the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme. } -module lang::paths::Posix +module lang::paths::Unix -lexical PosixPath - = absolute: Slashes PosixFilePath? - | relative: PosixFilePath - | home : "~" (Slashes PosixFilePath)? - | user : "~" UserName name (Slashes PosixFilePath)? +lexical UnixPath + = absolute: Slashes UnixFilePath? + | relative: UnixFilePath + | home : "~" (Slashes UnixFilePath)? + | user : "~" UserName name (Slashes UnixFilePath)? ; lexical UserName = ![/~]+; @@ -39,66 +39,66 @@ lexical Slashes = Slash+ !>> [/]; lexical Slash = [/]; -lexical PosixFilePath = {PathSegment Slashes}+ segments Slashes?; +lexical UnixFilePath = {PathSegment Slashes}+ segments Slashes?; import ParseTree; -@synopsis{Convert a Posix path literal to a source location URI} +@synopsis{Convert a Unix path literal to a source location URI} @description{ -1. parses the path using the grammar for ((PosixPath)) +1. parses the path using the grammar for ((UnixPath)) 2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting hostname, share name and path segment names. Also all superfluous path separators are skipped. 3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also the right path separators are introduced. } -loc parsePosixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#PosixPath, input, src)); +loc parseUnixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#UnixPath, input, src)); @synopsis{Root is a special case} -private loc mapPathToLoc((PosixPath) ``) +private loc mapPathToLoc((UnixPath) ``) = |file:///|; @synopsis{Absolute: given the drive and relative to its root.} -private loc mapPathToLoc((PosixPath) ``) +private loc mapPathToLoc((UnixPath) ``) = appendPath(|file:///|, path); @synopsis{Relative: relative to the current working directory.} -private loc mapPathToLoc((PosixPath) ``) +private loc mapPathToLoc((UnixPath) ``) = appendPath(|cwd:///|, path); @synopsis{Home relative: relative to the current users home directory} -private loc mapPathToLoc((PosixPath) `~`) +private loc mapPathToLoc((UnixPath) `~`) = appendPath(|home:///|, path); @synopsis{Home relative: relative to the current users home directory} -private loc mapPathToLoc((PosixPath) `~`) +private loc mapPathToLoc((UnixPath) `~`) = |home:///|; @synopsis{User relative: relative to any specific user's home directory} -private loc mapPathToLoc((PosixPath) `~`) +private loc mapPathToLoc((UnixPath) `~`) = appendPath(|home:///..//|, path); @synopsis{User relative: relative to any specific user's home directory} -private loc mapPathToLoc((PosixPath) `~`) +private loc mapPathToLoc((UnixPath) `~`) = |home:///..//|; -private loc appendPath(loc root, PosixFilePath path) +private loc appendPath(loc root, UnixFilePath path) = (root | it + "" | segment <- path.segments); test bool root() - = parsePosixPath("/") == |file:///|; + = parseUnixPath("/") == |file:///|; test bool absolutePath() - = parsePosixPath("/usr/local/bin") + = parseUnixPath("/usr/local/bin") == |file:///usr/local/bin|; test bool relativePath() - = parsePosixPath(".bash_rc") + = parseUnixPath(".bash_rc") == |cwd:///.bash_rc|; test bool homePath() - = parsePosixPath("~/.bash_profile") + = parseUnixPath("~/.bash_profile") == |home:///.bash_profile|; test bool userPath() - = parsePosixPath("~root/.bash_profile") + = parseUnixPath("~root/.bash_profile") == |home:///../root/.bash_profile|; From b858f18eca9026a8e98c1445a979878ba49319e6 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 21:19:27 +0200 Subject: [PATCH 27/46] added import --- src/org/rascalmpl/library/Location.rsc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc index b38d612b708..50b82dab347 100644 --- a/src/org/rascalmpl/library/Location.rsc +++ b/src/org/rascalmpl/library/Location.rsc @@ -25,6 +25,7 @@ import String; import Exception; import lang::paths::Windows; +import lang::paths::Unix; @synopsis{Extracts a path relative to a parent location.} @description{ From 1d0034c604102d80228264144aa50c16627ee168 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 21:20:39 +0200 Subject: [PATCH 28/46] removed net command experiment --- .github/workflows/build.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b37cf74522c..57296457eb0 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -133,11 +133,6 @@ jobs: # single quotes to help windows deal with argument splitting run: mvn -B '-Drascal.compile.skip' '-Drascal.tutor.skip' '-Drascal.test.memory=3' test - - name: Little experiment - if: runner.os == 'Windows' - run: | - net share - - uses: codecov/codecov-action@v3 continue-on-error: true # sometimes this one fails, that shouldn't stop a build with: From de6061a36afaa057536fbbb06a1572cb50005038 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 22:52:33 +0200 Subject: [PATCH 29/46] made test easier --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 5c54855c0b6..904da101957 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -97,7 +97,7 @@ test bool uncDOSDrive() { loc l = parseWindowsPath("\\\\?\\C$\\"); if (IS_WINDOWS) { - assert exists(l + "Program Files"); + assert exists(l); } return l == |unc://%3F/C$|; From 27a116348fb4fd078513e22afc63aa806643c1a9 Mon Sep 17 00:00:00 2001 From: Davy Landman Date: Wed, 15 May 2024 11:32:04 +0200 Subject: [PATCH 30/46] Added some more UNC related tests --- .../rascalmpl/library/lang/paths/Windows.rsc | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 904da101957..2b659b24e9c 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -93,14 +93,25 @@ test bool uncDrivePath() = parseWindowsPath("\\\\system07\\C$\\") == |unc://system07/C$|; -test bool uncDOSDrive() { - loc l = parseWindowsPath("\\\\?\\C$\\"); + +test bool uncDOSDevicePathLocalFileNonNormalized() { + loc l = parseWindowsPath("\\\\?\\c:"); + + if (IS_WINDOWS) { + assert exists(l); + } + + return l == |unc://%3F/C:|; +} + +test bool uncDOSDevicePathLocalFileNormalized() { + loc l = parseWindowsPath("\\\\.\\c:"); if (IS_WINDOWS) { assert exists(l); } - return l == |unc://%3F/C$|; + return l == |unc://./C:|; } test bool simpleDrivePathC() @@ -129,3 +140,13 @@ test bool uncNetworkShareOk() { return |unc://localhost/ADMIN$/System32/cmd.exe| == l; } } + +test bool uncDOSDevicePathShare() { + loc l = parseWindowsPath("\\\\?\\UNC\\localhost\\ADMIN$\\System32\\cmd.exe"); + + if (IS_WINDOWS) { + assert exists(l); + } + + return |unc://%3F/UNC/localhost/ADMIN$/System32/cmd.exe| == l; +} From 11a3da8464f2cbe4650ea34058ad47d07adcd849 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 14 May 2024 23:29:02 +0200 Subject: [PATCH 31/46] typo --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 2b659b24e9c..90d6feb021b 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -60,7 +60,7 @@ loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#Win private loc mapPathToLoc((WindowsPath) ``) = appendPath(|unc:///| + "", path); -@synopsis{DOC UNC} +@synopsis{DOS UNC} private loc mapPathToLoc((WindowsPath) `?`) = appendPath(|unc://%3F/| + "", path); From 9979e48b5c6532d0daa788f333f76aacbbf647cd Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 14:54:15 +0200 Subject: [PATCH 32/46] docs --- src/org/rascalmpl/library/lang/paths/Unix.rsc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/org/rascalmpl/library/lang/paths/Unix.rsc b/src/org/rascalmpl/library/lang/paths/Unix.rsc index da6afe44b32..4e89ac113e6 100644 --- a/src/org/rascalmpl/library/lang/paths/Unix.rsc +++ b/src/org/rascalmpl/library/lang/paths/Unix.rsc @@ -15,6 +15,10 @@ is purely syntactical, and tries to preserve the semantics of the path as much a @pitfalls{ * the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme. +* otherwise, the path syntax may be different from what you have to type in _bash_ or _zsh_. This is because shells +need to reserve characters, like spaces, for different purposes (commandline argument separation). The +current definition is about the path notation that shells like _zsh_ and _bash_, and other programs, have to pass into the string arguments of +OS features, after their own concatenation, splicing, variable expansion, de-escaping and unquoting routines have finished.. } module lang::paths::Unix From b760b47b64f475e06f78ee60688aa733cd7f72f0 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 15:29:07 +0200 Subject: [PATCH 33/46] fixed DOS Device syntax by allowing colons after drive letters and disallowing . and .. as the first path of a DOS Device UNC path. Also removed normalization test since the parser is not going to normalize anything. Too many ifs and buts that need to be dealt with carefully for all kinds of filesystems. --- .../rascalmpl/library/lang/paths/Windows.rsc | 51 ++++++++++++------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 90d6feb021b..42c66ce18bb 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -20,14 +20,22 @@ import IO; import util::SystemAPI; lexical WindowsPath - = unc : Slash Slash Slashes? PathChar* hostName Slashes PathChar* shareName Slashes WindowsFilePath path - | uncDOS : Slash Slash Slashes? "?" Slashes PathChar* shareName Slashes WindowsFilePath path - | absolute : Drive drive ":" Slashes WindowsFilePath path - | driveRelative : Drive drive ":" WindowsFilePath path + = unc : Slash Slash Slashes? PathChar* Slashes PathChar* Slashes WindowsFilePath + | uncDOSDrive : Slash Slash Slashes? DOSDevice Slashes Drive ":" OptionalWindowsFilePath + | uncDOSPath : Slash Slash Slashes? DOSDevice Slashes PathChar* Slashes WindowsFilePath + | absolute : Drive ":" Slashes WindowsFilePath + | driveRelative : Drive ":" WindowsFilePath | directoryRelative: Slash WindowsFilePath - | relative : WindowsFilePath path + | relative : WindowsFilePath ; +lexical OptionalWindowsFilePath + = () + | Slashes WindowsFilePath + ; + +lexical DOSDevice = [.?]; + lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); lexical PathSegment @@ -60,10 +68,22 @@ loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#Win private loc mapPathToLoc((WindowsPath) ``) = appendPath(|unc:///| + "", path); -@synopsis{DOS UNC} +@synopsis{DOS UNC Device Drive} +private loc mapPathToLoc((WindowsPath) `:`) + = appendPath(|unc:///| + ":", path); + +@synopsis{DOS UNC Device Path} +private loc mapPathToLoc((WindowsPath) ``) + = appendPath(|unc:///| + "", path); + +private str deviceIndicator((DOSDevice) `?`) = "%3F"; +private str deviceIndicator((DOSDevice) `.`) = "%2E"; + +@synopsis{DOS UNCPath} private loc mapPathToLoc((WindowsPath) `?`) = appendPath(|unc://%3F/| + "", path); + @synopsis{Absolute: given the drive and relative to its root.} private loc mapPathToLoc((WindowsPath) `:`) = appendPath(|file:///:/|, path); @@ -83,6 +103,11 @@ private loc mapPathToLoc((WindowsPath) ``) private loc appendPath(loc root, WindowsFilePath path) = (root | it + "" | segment <- path.segments); +private loc appendPath(loc root, (OptionalWindowsFilePath) ``) = root; + +private loc appendPath(loc root, (OptionalWindowsFilePath) ``) + = appendPath(root, path); + private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); test bool uncSharePath() @@ -94,24 +119,14 @@ test bool uncDrivePath() == |unc://system07/C$|; -test bool uncDOSDevicePathLocalFileNonNormalized() { +test bool uncDOSDevicePathLocalFile() { loc l = parseWindowsPath("\\\\?\\c:"); if (IS_WINDOWS) { assert exists(l); } - return l == |unc://%3F/C:|; -} - -test bool uncDOSDevicePathLocalFileNormalized() { - loc l = parseWindowsPath("\\\\.\\c:"); - - if (IS_WINDOWS) { - assert exists(l); - } - - return l == |unc://./C:|; + return l == |unc://%3F/c:|; } test bool simpleDrivePathC() From 6252e058b7dd125675f85529a19e4ec63324149e Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 15:36:09 +0200 Subject: [PATCH 34/46] added docs --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 42c66ce18bb..c4bb44312dd 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -14,6 +14,13 @@ resolves to the same file on system A via any ((Library:module:IO)) function. This is left to downstream processors of `loc` values, if necessary. The current transformation is purely syntactical, and tries to preserve the semantics of the path as much as possible. } +@pitfalls{ +* Length limitations are not implemnted by this parser. This means that overly long names will lead +to IO exceptions when they are finally used. +* The names of drives, files and devices are mapped as-is, without normalization. This means that +the resulting `loc` value may not be a _canonical_ representation of the identified resource. +Normalization of `loc` values is for a different function TBD. +} module lang::paths::Windows import IO; From 9419843580cc05162503e47999f41d2d94db98a0 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 16:16:04 +0200 Subject: [PATCH 35/46] added test with a dot DOS Device path instead of ? --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index c4bb44312dd..92b1fc74589 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -126,7 +126,7 @@ test bool uncDrivePath() == |unc://system07/C$|; -test bool uncDOSDevicePathLocalFile() { +test bool uncDOSDevicePathLocalFileQuestion() { loc l = parseWindowsPath("\\\\?\\c:"); if (IS_WINDOWS) { @@ -136,6 +136,16 @@ test bool uncDOSDevicePathLocalFile() { return l == |unc://%3F/c:|; } +test bool uncDOSDevicePathLocalFileDot() { + loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt"); + + if (IS_WINDOWS) { + assert exists(l); + } + + return l == |unc://%2E/C:/Test/Foo.txt|; +} + test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; From 21bffec617096414e4187496b31cf104534f48fe Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 16:19:23 +0200 Subject: [PATCH 36/46] added a failing test for DOS Device UNCs --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 92b1fc74589..4988d2da920 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -146,6 +146,12 @@ test bool uncDOSDevicePathLocalFileDot() { return l == |unc://%2E/C:/Test/Foo.txt|; } +test bool uncDOSDeviceVolumeGUIDReference() { + loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"); + + return l == |unc://%2E/Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|; +} + test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; From 1a147131c35c3069a003c69e4ff3c15ea58f2c74 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 16:27:46 +0200 Subject: [PATCH 37/46] fixed ambiguity --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 4988d2da920..ba6c804ea87 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -27,7 +27,7 @@ import IO; import util::SystemAPI; lexical WindowsPath - = unc : Slash Slash Slashes? PathChar* Slashes PathChar* Slashes WindowsFilePath + = unc : Slash Slash Slashes? PathChar* \ "." Slashes PathChar* Slashes WindowsFilePath | uncDOSDrive : Slash Slash Slashes? DOSDevice Slashes Drive ":" OptionalWindowsFilePath | uncDOSPath : Slash Slash Slashes? DOSDevice Slashes PathChar* Slashes WindowsFilePath | absolute : Drive ":" Slashes WindowsFilePath @@ -152,6 +152,11 @@ test bool uncDOSDeviceVolumeGUIDReference() { return l == |unc://%2E/Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|; } +test bool uncDOSDeviceBootPartition() { + loc l = parseWindowsPath("\\\\.\\BootPartition\\"); + return l == |unc://%2E/BootPartition|; +} + test bool simpleDrivePathC() = parseWindowsPath("C:\\Program Files\\Rascal") == |file:///C:/Program%20Files/Rascal|; From 241b68bb48857e068574cc4383302ebb8e231c67 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 16:31:56 +0200 Subject: [PATCH 38/46] added test for the UNC loopback DOS Device UNC path --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index ba6c804ea87..833a2631121 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -138,6 +138,7 @@ test bool uncDOSDevicePathLocalFileQuestion() { test bool uncDOSDevicePathLocalFileDot() { loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt"); + if (IS_WINDOWS) { assert exists(l); @@ -146,6 +147,16 @@ test bool uncDOSDevicePathLocalFileDot() { return l == |unc://%2E/C:/Test/Foo.txt|; } +test bool uncDOSDeviceUNCSharePath() { + // the entire UNC namespace is looped back into the DOS Device UNC encoding via + // the reserved name "UNC": + loc m1 = parseWindowsPath("\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt"); + loc m2 = parseWindowsPath("\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt"); + + return m1 == |unc://%3F/UNC/Server/Share/Test/Foo.txt| + && m2 == |unc://%2E/UNC/Server/Share/Test/Foo.txt|; +} + test bool uncDOSDeviceVolumeGUIDReference() { loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"); From 824c19d61cd45310469e8529db79d1ca2e102590 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 17:01:37 +0200 Subject: [PATCH 39/46] added debug statements to debug failing test on Windows --- src/org/rascalmpl/uri/file/UNCResolver.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java index af2e30a36fd..3cac911589c 100644 --- a/src/org/rascalmpl/uri/file/UNCResolver.java +++ b/src/org/rascalmpl/uri/file/UNCResolver.java @@ -24,7 +24,9 @@ protected String getPath(ISourceLocation uri) { if (uri.hasAuthority()) { // downstream methods will use `new File` and `new FileInputStream` // which are able to parse UNC's on Windows. - return "\\\\" + uri.getAuthority() + "\\" + uri.getPath(); + // TODO: remove debug statements + System.err.println("UNC resolver produced this path: " + "\\\\" + uri.getAuthority() + "\\" + uri.getPath()); + return "\\\\" + uri.getAuthority() + "\\" + uri.getPath().replaceAll("/", "\\"); } else { // just a normal absolute path From 10b7bc9087e7d62dabb5e9c126c9622ceb609535 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 15 May 2024 17:15:04 +0200 Subject: [PATCH 40/46] sigh --- src/org/rascalmpl/uri/file/UNCResolver.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java index 3cac911589c..faf2e34d2c3 100644 --- a/src/org/rascalmpl/uri/file/UNCResolver.java +++ b/src/org/rascalmpl/uri/file/UNCResolver.java @@ -2,6 +2,7 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.util.regex.Pattern; import io.usethesource.vallang.ISourceLocation; @@ -26,7 +27,7 @@ protected String getPath(ISourceLocation uri) { // which are able to parse UNC's on Windows. // TODO: remove debug statements System.err.println("UNC resolver produced this path: " + "\\\\" + uri.getAuthority() + "\\" + uri.getPath()); - return "\\\\" + uri.getAuthority() + "\\" + uri.getPath().replaceAll("/", "\\"); + return "\\\\" + uri.getAuthority() + "\\" + uri.getPath().replaceAll(Pattern.quote("/"), Pattern.quote("\\")); } else { // just a normal absolute path From 62a0069cf907136197419a2f07f840e143484f7a Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 16 May 2024 13:06:47 +0200 Subject: [PATCH 41/46] debugging a hunch that extra slash is painful before a drive letter --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 2 +- src/org/rascalmpl/uri/file/UNCResolver.java | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 833a2631121..9c0cb46f369 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -128,7 +128,7 @@ test bool uncDrivePath() test bool uncDOSDevicePathLocalFileQuestion() { loc l = parseWindowsPath("\\\\?\\c:"); - + if (IS_WINDOWS) { assert exists(l); } diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java index faf2e34d2c3..77996158da7 100644 --- a/src/org/rascalmpl/uri/file/UNCResolver.java +++ b/src/org/rascalmpl/uri/file/UNCResolver.java @@ -27,7 +27,12 @@ protected String getPath(ISourceLocation uri) { // which are able to parse UNC's on Windows. // TODO: remove debug statements System.err.println("UNC resolver produced this path: " + "\\\\" + uri.getAuthority() + "\\" + uri.getPath()); - return "\\\\" + uri.getAuthority() + "\\" + uri.getPath().replaceAll(Pattern.quote("/"), Pattern.quote("\\")); + String path = uri.getPath(); + if (path.startsWith("/")) { + path = path.substring(1); + } + // path = path.replaceAll(Pattern.quote("/"), Pattern.quote("\\")); + return "\\\\" + uri.getAuthority() + "\\" + path; } else { // just a normal absolute path From 03d0cb43f0c673ff8e999e3a6645bd2555ea52d8 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 16 May 2024 13:46:56 +0200 Subject: [PATCH 42/46] dot does not need escaping in authority part of unc uris --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 9c0cb46f369..04dfad68b0b 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -84,7 +84,7 @@ private loc mapPathToLoc((WindowsPath) `/| + "", path); private str deviceIndicator((DOSDevice) `?`) = "%3F"; -private str deviceIndicator((DOSDevice) `.`) = "%2E"; +private str deviceIndicator((DOSDevice) `.`) = "."; @synopsis{DOS UNCPath} private loc mapPathToLoc((WindowsPath) `?`) @@ -144,7 +144,7 @@ test bool uncDOSDevicePathLocalFileDot() { assert exists(l); } - return l == |unc://%2E/C:/Test/Foo.txt|; + return l == |unc://./C:/Test/Foo.txt|; } test bool uncDOSDeviceUNCSharePath() { @@ -154,18 +154,18 @@ test bool uncDOSDeviceUNCSharePath() { loc m2 = parseWindowsPath("\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt"); return m1 == |unc://%3F/UNC/Server/Share/Test/Foo.txt| - && m2 == |unc://%2E/UNC/Server/Share/Test/Foo.txt|; + && m2 == |unc://./UNC/Server/Share/Test/Foo.txt|; } test bool uncDOSDeviceVolumeGUIDReference() { loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"); - return l == |unc://%2E/Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|; + return l == |unc://./Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|; } test bool uncDOSDeviceBootPartition() { loc l = parseWindowsPath("\\\\.\\BootPartition\\"); - return l == |unc://%2E/BootPartition|; + return l == |unc://./BootPartition|; } test bool simpleDrivePathC() From d8bb19a20b782d8abd7964dbc485bcf4ef2cfb3b Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 16 May 2024 16:01:06 +0200 Subject: [PATCH 43/46] fixed copy/paste error found by @davylandman --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 04dfad68b0b..719c645f63b 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -139,11 +139,6 @@ test bool uncDOSDevicePathLocalFileQuestion() { test bool uncDOSDevicePathLocalFileDot() { loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt"); - - if (IS_WINDOWS) { - assert exists(l); - } - return l == |unc://./C:/Test/Foo.txt|; } From 4b9c73bf35a25b23b631f532d1eda2eaf3d6c2c5 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 22 May 2024 11:36:18 +0200 Subject: [PATCH 44/46] removed flaky test --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 719c645f63b..416a93f32b1 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -189,13 +189,3 @@ test bool uncNetworkShareOk() { return |unc://localhost/ADMIN$/System32/cmd.exe| == l; } } - -test bool uncDOSDevicePathShare() { - loc l = parseWindowsPath("\\\\?\\UNC\\localhost\\ADMIN$\\System32\\cmd.exe"); - - if (IS_WINDOWS) { - assert exists(l); - } - - return |unc://%3F/UNC/localhost/ADMIN$/System32/cmd.exe| == l; -} From e50fd37aa94e1957330314734d1d2435037695b9 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 22 May 2024 15:30:48 +0200 Subject: [PATCH 45/46] fixed issue with UNC provider, where the current directory on the current drive interpretation is not supported. //?/C: becomes the root of the drive C:" --- src/org/rascalmpl/uri/file/UNCResolver.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java index 77996158da7..b4ea21b4c6b 100644 --- a/src/org/rascalmpl/uri/file/UNCResolver.java +++ b/src/org/rascalmpl/uri/file/UNCResolver.java @@ -23,15 +23,18 @@ protected String getPath(ISourceLocation uri) { } if (uri.hasAuthority()) { - // downstream methods will use `new File` and `new FileInputStream` - // which are able to parse UNC's on Windows. - // TODO: remove debug statements - System.err.println("UNC resolver produced this path: " + "\\\\" + uri.getAuthority() + "\\" + uri.getPath()); String path = uri.getPath(); + if (path.startsWith("/")) { + // that will be the backslash added before the path later path = path.substring(1); } - // path = path.replaceAll(Pattern.quote("/"), Pattern.quote("\\")); + + if (path.endsWith(":")) { + // current folder on drive not supported in UNC notation, this becomes the root of the drive + path = path + "\\"; + } + return "\\\\" + uri.getAuthority() + "\\" + path; } else { From bd8c267f7f577b4e6b080c2d3aea56c765662d03 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Wed, 22 May 2024 16:59:52 +0200 Subject: [PATCH 46/46] made test for unc path with C drive more specific' --- src/org/rascalmpl/library/lang/paths/Windows.rsc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc index 416a93f32b1..37d63c00a76 100644 --- a/src/org/rascalmpl/library/lang/paths/Windows.rsc +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -127,13 +127,13 @@ test bool uncDrivePath() test bool uncDOSDevicePathLocalFileQuestion() { - loc l = parseWindowsPath("\\\\?\\c:"); + loc l = parseWindowsPath("\\\\?\\c:\\windows\\system32\\cmd.exe"); if (IS_WINDOWS) { assert exists(l); } - return l == |unc://%3F/c:|; + return l == |unc://%3F/c:/windows/system32/cmd.exe|; } test bool uncDOSDevicePathLocalFileDot() {