diff --git a/pom.xml b/pom.xml index 78894989ac9..fb774a518cc 100644 --- a/pom.xml +++ b/pom.xml @@ -219,6 +219,7 @@ **/org/rascalmpl/test/AllSuiteParallel.java + **/org/rascalmpl/test/library/LibraryLangPaths.java **/org/rascalmpl/test/value/AllTests.java **/org/rascalmpl/*Test.java diff --git a/src/org/rascalmpl/library/Location.rsc b/src/org/rascalmpl/library/Location.rsc index 1da26a7ec69..50b82dab347 100644 --- a/src/org/rascalmpl/library/Location.rsc +++ b/src/org/rascalmpl/library/Location.rsc @@ -24,6 +24,8 @@ import Set; import String; import Exception; +import lang::paths::Windows; +import lang::paths::Unix; @synopsis{Extracts a path relative to a parent location.} @description{ @@ -43,6 +45,28 @@ loc relativize(list[loc] haystack, loc needle) { } } +@synopsis{Convert Windows path syntax to a `loc` value} +@description{ +This conversion supports generic Windows path syntax, including: +* Absolute drive-specific: `C:\Program Files` +* Relative drive-specific: `C:hello.txt` +* Relative: `hello.txt` +* Directory-relative: `\hello.txt` +* UNC format: `\\system07\C$\` + +Windows paths, against popular believe, support both `/` and `\` as path separators. +} +loc locFromWindowsPath(str path) = parseWindowsPath(path); + +@synopsis{Convert Unix path syntax to a `loc` value} +@description{ +This conversion supports generic Unix path syntax, including: +* Absolute: `/usr/local/bin` +* Relative: `hello.txt` +* Home: `~/hello.txt` +* User: `~userName\hello.txt` +} +loc locFromUnixPath(str path) = parseUnixPath(path); @synopsis{Check that two locations refer to the same file.} bool isSameFile(loc l, loc r) = l.top[fragment=""] == r.top[fragment=""]; diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 8bb4bbb4de1..33036ee01ab 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3615,7 +3615,7 @@ public ISet findResources(IString fileName) { public ISourceLocation relativize(ISourceLocation outside, ISourceLocation inside) { return URIUtil.relativize(outside, inside); } - + public IValue readBinaryValueFile(IValue type, ISourceLocation loc){ if(trackIO) System.err.println("readBinaryValueFile: " + loc); diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index d670d23668b..3e2cff8ab46 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -577,6 +577,7 @@ toLocation("http://grammarware.net"); toLocation("document.xml"); ``` } +@deprecated{Use ((Location::locFromWindowsPath)) for example. The current function does not handle all the different intricasies of path notation.} public loc toLocation(str s) = (/\:\/\// := s) ? |://| : |cwd:///|; diff --git a/src/org/rascalmpl/library/lang/paths/Unix.rsc b/src/org/rascalmpl/library/lang/paths/Unix.rsc new file mode 100644 index 00000000000..4e89ac113e6 --- /dev/null +++ b/src/org/rascalmpl/library/lang/paths/Unix.rsc @@ -0,0 +1,108 @@ +@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} +@description{ +This syntax definition of POSIX paths and file names, including some of the conventions +with ~ for the home folder and . and .. for relative directories. + +The main function of this module, ((parseUnixPath)): +* faithfully maps any syntactically correctly Unix paths to syntactically correct `loc` values. +* throws a ParseError if the path does not comply. +* ensures that if the file exists on system A, then the `loc` representation +resolves to the same file on system A via any ((Library:module:IO)) function. +* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. +This is left to downstream processors of `loc` values, if necessary. The current transformation +is purely syntactical, and tries to preserve the semantics of the path as much as possible. +} +@pitfalls{ +* the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly +used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme. +* otherwise, the path syntax may be different from what you have to type in _bash_ or _zsh_. This is because shells +need to reserve characters, like spaces, for different purposes (commandline argument separation). The +current definition is about the path notation that shells like _zsh_ and _bash_, and other programs, have to pass into the string arguments of +OS features, after their own concatenation, splicing, variable expansion, de-escaping and unquoting routines have finished.. +} +module lang::paths::Unix + +lexical UnixPath + = absolute: Slashes UnixFilePath? + | relative: UnixFilePath + | home : "~" (Slashes UnixFilePath)? + | user : "~" UserName name (Slashes UnixFilePath)? + ; + +lexical UserName = ![/~]+; + +lexical PathChar = ![/]; + +lexical PathSegment + = current: "." + | parent : ".." + | name : (PathChar \ "~" PathChar*) \ ".." \ "." \ "~" + ; + +lexical Slashes = Slash+ !>> [/]; + +lexical Slash = [/]; + +lexical UnixFilePath = {PathSegment Slashes}+ segments Slashes?; + +import ParseTree; + +@synopsis{Convert a Unix path literal to a source location URI} +@description{ +1. parses the path using the grammar for ((UnixPath)) +2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting +hostname, share name and path segment names. Also all superfluous path separators are skipped. +3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also +the right path separators are introduced. +} +loc parseUnixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#UnixPath, input, src)); + +@synopsis{Root is a special case} +private loc mapPathToLoc((UnixPath) ``) + = |file:///|; + +@synopsis{Absolute: given the drive and relative to its root.} +private loc mapPathToLoc((UnixPath) ``) + = appendPath(|file:///|, path); + +@synopsis{Relative: relative to the current working directory.} +private loc mapPathToLoc((UnixPath) ``) + = appendPath(|cwd:///|, path); + +@synopsis{Home relative: relative to the current users home directory} +private loc mapPathToLoc((UnixPath) `~`) + = appendPath(|home:///|, path); + +@synopsis{Home relative: relative to the current users home directory} +private loc mapPathToLoc((UnixPath) `~`) + = |home:///|; + +@synopsis{User relative: relative to any specific user's home directory} +private loc mapPathToLoc((UnixPath) `~`) + = appendPath(|home:///..//|, path); + +@synopsis{User relative: relative to any specific user's home directory} +private loc mapPathToLoc((UnixPath) `~`) + = |home:///..//|; + +private loc appendPath(loc root, UnixFilePath path) + = (root | it + "" | segment <- path.segments); + +test bool root() + = parseUnixPath("/") == |file:///|; + +test bool absolutePath() + = parseUnixPath("/usr/local/bin") + == |file:///usr/local/bin|; + +test bool relativePath() + = parseUnixPath(".bash_rc") + == |cwd:///.bash_rc|; + +test bool homePath() + = parseUnixPath("~/.bash_profile") + == |home:///.bash_profile|; + +test bool userPath() + = parseUnixPath("~root/.bash_profile") + == |home:///../root/.bash_profile|; diff --git a/src/org/rascalmpl/library/lang/paths/Windows.rsc b/src/org/rascalmpl/library/lang/paths/Windows.rsc new file mode 100644 index 00000000000..37d63c00a76 --- /dev/null +++ b/src/org/rascalmpl/library/lang/paths/Windows.rsc @@ -0,0 +1,191 @@ +@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} +@description{ +This syntax definition of file paths and file names in Windows formalizes open-source implementations +manually written in Java, C++ and C# code. These are parsers for Windows syntax of file and directory names, +as well as shares on local networks (UNC notation). It also derives from openly available documentation +sources on Windows and the .NET platform for confirmation and test examples. + +The main function of this module, ((parseWindowsPath)): +* faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values. +* throws a ParseError if the path does not comply. Typically file names ending in spaces do not comply. +* ensures that if the file exists on system A, then the `loc` representation +resolves to the same file on system A via any ((Library:module:IO)) function. +* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. +This is left to downstream processors of `loc` values, if necessary. The current transformation +is purely syntactical, and tries to preserve the semantics of the path as much as possible. +} +@pitfalls{ +* Length limitations are not implemnted by this parser. This means that overly long names will lead +to IO exceptions when they are finally used. +* The names of drives, files and devices are mapped as-is, without normalization. This means that +the resulting `loc` value may not be a _canonical_ representation of the identified resource. +Normalization of `loc` values is for a different function TBD. +} +module lang::paths::Windows + +import IO; +import util::SystemAPI; + +lexical WindowsPath + = unc : Slash Slash Slashes? PathChar* \ "." Slashes PathChar* Slashes WindowsFilePath + | uncDOSDrive : Slash Slash Slashes? DOSDevice Slashes Drive ":" OptionalWindowsFilePath + | uncDOSPath : Slash Slash Slashes? DOSDevice Slashes PathChar* Slashes WindowsFilePath + | absolute : Drive ":" Slashes WindowsFilePath + | driveRelative : Drive ":" WindowsFilePath + | directoryRelative: Slash WindowsFilePath + | relative : WindowsFilePath + ; + +lexical OptionalWindowsFilePath + = () + | Slashes WindowsFilePath + ; + +lexical DOSDevice = [.?]; + +lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); + +lexical PathSegment + = current: "." + | parent : ".." + | name : PathChar+ \ ".." \ "." + ; + +lexical Drive = [A-Za-z]; + +lexical Slashes = Slash+ !>> [\\/]; + +lexical Slash = [\\/]; + +lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ .] !<< (); // only the last segment must not end in spaces. + +import ParseTree; + +@synopsis{Convert a windows path literal to a source location URI} +@description{ +1. parses the path using the grammar for ((WindowsPath)) +2. takes the _literal_ name components using string interpolation `""`. This means no decoding/encoding happens at all while extracting +hostname, share name and path segment names. Also all superfluous path separators are skipped. +3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also +the right path separators are introduced. +} +loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#WindowsPath, input, src)); + +@synopsis{UNC} +private loc mapPathToLoc((WindowsPath) ``) + = appendPath(|unc:///| + "", path); + +@synopsis{DOS UNC Device Drive} +private loc mapPathToLoc((WindowsPath) `:`) + = appendPath(|unc:///| + ":", path); + +@synopsis{DOS UNC Device Path} +private loc mapPathToLoc((WindowsPath) ``) + = appendPath(|unc:///| + "", path); + +private str deviceIndicator((DOSDevice) `?`) = "%3F"; +private str deviceIndicator((DOSDevice) `.`) = "."; + +@synopsis{DOS UNCPath} +private loc mapPathToLoc((WindowsPath) `?`) + = appendPath(|unc://%3F/| + "", path); + + +@synopsis{Absolute: given the drive and relative to its root.} +private loc mapPathToLoc((WindowsPath) `:`) + = appendPath(|file:///:/|, path); + +@synopsis{Drive relative: relative to the current working directory on the given drive.} +private loc mapPathToLoc((WindowsPath) `:`) + = appendPath(|file:///:.|, path); + +@synopsis{Directory relative: relative to the root of the current drive.} +private loc mapPathToLoc((WindowsPath) ``) + = appendPath(|cwdrive:///|, path); + +@synopsis{Relative to the current working directory on the current drive.} +private loc mapPathToLoc((WindowsPath) ``) + = appendPath(|cwd:///|, path); + +private loc appendPath(loc root, WindowsFilePath path) + = (root | it + "" | segment <- path.segments); + +private loc appendPath(loc root, (OptionalWindowsFilePath) ``) = root; + +private loc appendPath(loc root, (OptionalWindowsFilePath) ``) + = appendPath(root, path); + +private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); + +test bool uncSharePath() + = parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt") + == |unc://Server2/Share/Test/Foo.txt|; + +test bool uncDrivePath() + = parseWindowsPath("\\\\system07\\C$\\") + == |unc://system07/C$|; + + +test bool uncDOSDevicePathLocalFileQuestion() { + loc l = parseWindowsPath("\\\\?\\c:\\windows\\system32\\cmd.exe"); + + if (IS_WINDOWS) { + assert exists(l); + } + + return l == |unc://%3F/c:/windows/system32/cmd.exe|; +} + +test bool uncDOSDevicePathLocalFileDot() { + loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt"); + + return l == |unc://./C:/Test/Foo.txt|; +} + +test bool uncDOSDeviceUNCSharePath() { + // the entire UNC namespace is looped back into the DOS Device UNC encoding via + // the reserved name "UNC": + loc m1 = parseWindowsPath("\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt"); + loc m2 = parseWindowsPath("\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt"); + + return m1 == |unc://%3F/UNC/Server/Share/Test/Foo.txt| + && m2 == |unc://./UNC/Server/Share/Test/Foo.txt|; +} + +test bool uncDOSDeviceVolumeGUIDReference() { + loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"); + + return l == |unc://./Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|; +} + +test bool uncDOSDeviceBootPartition() { + loc l = parseWindowsPath("\\\\.\\BootPartition\\"); + return l == |unc://./BootPartition|; +} + +test bool simpleDrivePathC() + = parseWindowsPath("C:\\Program Files\\Rascal") + == |file:///C:/Program%20Files/Rascal|; + +test bool mixedSlashesDrivePathC() + = parseWindowsPath("C:\\Program Files/Rascal") + == |file:///C:/Program%20Files/Rascal|; + +test bool trailingSlashesDrivePathC() + = parseWindowsPath("C:\\Program Files\\Rascal\\\\") + == |file:///C:/Program%20Files/Rascal|; + +test bool simpleDrivePathD() + = parseWindowsPath("D:\\Program Files\\Rascal") + == |file:///D:/Program%20Files/Rascal|; + +test bool uncNetworkShareOk() { + loc l = parseWindowsPath("\\\\localhost\\ADMIN$\\System32\\cmd.exe"); + + if (IS_WINDOWS) { + return exists(l); + } + else { + return |unc://localhost/ADMIN$/System32/cmd.exe| == l; + } +} diff --git a/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java new file mode 100644 index 00000000000..84bd45d893f --- /dev/null +++ b/src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2009-2024 CWI + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * * Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI + ******************************************************************************/ +package org.rascalmpl.uri.file; + +import java.nio.file.Paths; + +/** + * For reading and writing files relative to the current working drive. + * This is different from the current working directory, namely it is the + * root of the current working directory. + */ +public class CurrentWorkingDriveResolver extends AliasedFileResolver { + + public CurrentWorkingDriveResolver() { + super("cwdrive", deriveCurrentWorkingDrive(System.getProperty("user.dir"))); + } + + private static String deriveCurrentWorkingDrive(String userDir) { + return Paths.get(userDir).toAbsolutePath().getRoot().toString(); + } +} diff --git a/src/org/rascalmpl/uri/file/FileURIResolver.java b/src/org/rascalmpl/uri/file/FileURIResolver.java index bfa8c358228..9358c1804e7 100644 --- a/src/org/rascalmpl/uri/file/FileURIResolver.java +++ b/src/org/rascalmpl/uri/file/FileURIResolver.java @@ -143,6 +143,7 @@ public boolean exists(ISourceLocation uri) { * To override to build resolvers to specific locations using a prefix for example. */ protected String getPath(ISourceLocation uri) { + assert !uri.hasAuthority(); return uri.getPath(); } diff --git a/src/org/rascalmpl/uri/file/UNCResolver.java b/src/org/rascalmpl/uri/file/UNCResolver.java new file mode 100644 index 00000000000..b4ea21b4c6b --- /dev/null +++ b/src/org/rascalmpl/uri/file/UNCResolver.java @@ -0,0 +1,50 @@ +package org.rascalmpl.uri.file; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.regex.Pattern; + +import io.usethesource.vallang.ISourceLocation; + +/** + * Implements the UNC-available network shares on Windows systems. + */ +public class UNCResolver extends FileURIResolver { + private boolean onWindows = System.getProperty("os.name").toLowerCase().startsWith("win"); + + public UNCResolver() throws IOException { + super(); + } + + @Override + protected String getPath(ISourceLocation uri) { + if (!onWindows) { + throw new RuntimeException(new FileNotFoundException(uri.toString() + "; UNC is only available on Windows")); + } + + if (uri.hasAuthority()) { + String path = uri.getPath(); + + if (path.startsWith("/")) { + // that will be the backslash added before the path later + path = path.substring(1); + } + + if (path.endsWith(":")) { + // current folder on drive not supported in UNC notation, this becomes the root of the drive + path = path + "\\"; + } + + return "\\\\" + uri.getAuthority() + "\\" + path; + } + else { + // just a normal absolute path + return uri.getPath(); + } + } + + @Override + public String scheme() { + return "unc"; + } +} diff --git a/src/org/rascalmpl/uri/resolvers.config b/src/org/rascalmpl/uri/resolvers.config index acfc7ce38cd..5e35fb9c326 100644 --- a/src/org/rascalmpl/uri/resolvers.config +++ b/src/org/rascalmpl/uri/resolvers.config @@ -9,6 +9,8 @@ org.rascalmpl.uri.jar.JarURIResolver org.rascalmpl.uri.zip.ZipURIResolver org.rascalmpl.uri.file.HomeURIResolver org.rascalmpl.uri.file.CWDURIResolver +org.rascalmpl.uri.file.CurrentWorkingDriveResolver +org.rascalmpl.uri.file.UNCResolver org.rascalmpl.uri.file.SystemPathURIResolver org.rascalmpl.uri.libraries.MemoryResolver org.rascalmpl.uri.libraries.RascalLibraryURIResolver diff --git a/test/org/rascalmpl/test/library/LibraryLangPaths.java b/test/org/rascalmpl/test/library/LibraryLangPaths.java new file mode 100644 index 00000000000..da795fc2edc --- /dev/null +++ b/test/org/rascalmpl/test/library/LibraryLangPaths.java @@ -0,0 +1,11 @@ +package org.rascalmpl.test.library; + +import org.junit.runner.RunWith; +import org.rascalmpl.test.infrastructure.RascalJUnitTestPrefix; +import org.rascalmpl.test.infrastructure.RascalJUnitTestRunner; + +@RunWith(RascalJUnitTestRunner.class) +@RascalJUnitTestPrefix("lang::paths") +public class LibraryLangPaths { + +}