-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1942 from usethesource/loc-from-filesystem-converter
add functionality to convert windows paths (all shapes and sizes) to `loc` values
- Loading branch information
Showing
11 changed files
with
419 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} | ||
@description{ | ||
This syntax definition of POSIX paths and file names, including some of the conventions | ||
with ~ for the home folder and . and .. for relative directories. | ||
The main function of this module, ((parseUnixPath)): | ||
* faithfully maps any syntactically correctly Unix paths to syntactically correct `loc` values. | ||
* throws a ParseError if the path does not comply. | ||
* ensures that if the file exists on system A, then the `loc` representation | ||
resolves to the same file on system A via any ((Library:module:IO)) function. | ||
* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. | ||
This is left to downstream processors of `loc` values, if necessary. The current transformation | ||
is purely syntactical, and tries to preserve the semantics of the path as much as possible. | ||
} | ||
@pitfalls{ | ||
* the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly | ||
used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme. | ||
* otherwise, the path syntax may be different from what you have to type in _bash_ or _zsh_. This is because shells | ||
need to reserve characters, like spaces, for different purposes (commandline argument separation). The | ||
current definition is about the path notation that shells like _zsh_ and _bash_, and other programs, have to pass into the string arguments of | ||
OS features, after their own concatenation, splicing, variable expansion, de-escaping and unquoting routines have finished.. | ||
} | ||
module lang::paths::Unix | ||
|
||
lexical UnixPath | ||
= absolute: Slashes UnixFilePath? | ||
| relative: UnixFilePath | ||
| home : "~" (Slashes UnixFilePath)? | ||
| user : "~" UserName name (Slashes UnixFilePath)? | ||
; | ||
|
||
lexical UserName = ![/~]+; | ||
|
||
lexical PathChar = ![/]; | ||
|
||
lexical PathSegment | ||
= current: "." | ||
| parent : ".." | ||
| name : (PathChar \ "~" PathChar*) \ ".." \ "." \ "~" | ||
; | ||
|
||
lexical Slashes = Slash+ !>> [/]; | ||
|
||
lexical Slash = [/]; | ||
|
||
lexical UnixFilePath = {PathSegment Slashes}+ segments Slashes?; | ||
|
||
import ParseTree; | ||
|
||
@synopsis{Convert a Unix path literal to a source location URI} | ||
@description{ | ||
1. parses the path using the grammar for ((UnixPath)) | ||
2. takes the _literal_ name components using string interpolation `"<segment>"`. This means no decoding/encoding happens at all while extracting | ||
hostname, share name and path segment names. Also all superfluous path separators are skipped. | ||
3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also | ||
the right path separators are introduced. | ||
} | ||
loc parseUnixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#UnixPath, input, src)); | ||
|
||
@synopsis{Root is a special case} | ||
private loc mapPathToLoc((UnixPath) `<Slashes _>`) | ||
= |file:///|; | ||
|
||
@synopsis{Absolute: given the drive and relative to its root.} | ||
private loc mapPathToLoc((UnixPath) `<Slashes _><UnixFilePath path>`) | ||
= appendPath(|file:///|, path); | ||
|
||
@synopsis{Relative: relative to the current working directory.} | ||
private loc mapPathToLoc((UnixPath) `<UnixFilePath path>`) | ||
= appendPath(|cwd:///|, path); | ||
|
||
@synopsis{Home relative: relative to the current users home directory} | ||
private loc mapPathToLoc((UnixPath) `~<Slash _><UnixFilePath path>`) | ||
= appendPath(|home:///|, path); | ||
|
||
@synopsis{Home relative: relative to the current users home directory} | ||
private loc mapPathToLoc((UnixPath) `~`) | ||
= |home:///|; | ||
|
||
@synopsis{User relative: relative to any specific user's home directory} | ||
private loc mapPathToLoc((UnixPath) `~<UserName name><Slash _><UnixFilePath path>`) | ||
= appendPath(|home:///../<name>/|, path); | ||
|
||
@synopsis{User relative: relative to any specific user's home directory} | ||
private loc mapPathToLoc((UnixPath) `~<UserName name>`) | ||
= |home:///../<name>/|; | ||
|
||
private loc appendPath(loc root, UnixFilePath path) | ||
= (root | it + "<segment>" | segment <- path.segments); | ||
|
||
test bool root() | ||
= parseUnixPath("/") == |file:///|; | ||
|
||
test bool absolutePath() | ||
= parseUnixPath("/usr/local/bin") | ||
== |file:///usr/local/bin|; | ||
|
||
test bool relativePath() | ||
= parseUnixPath(".bash_rc") | ||
== |cwd:///.bash_rc|; | ||
|
||
test bool homePath() | ||
= parseUnixPath("~/.bash_profile") | ||
== |home:///.bash_profile|; | ||
|
||
test bool userPath() | ||
= parseUnixPath("~root/.bash_profile") | ||
== |home:///../root/.bash_profile|; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.} | ||
@description{ | ||
This syntax definition of file paths and file names in Windows formalizes open-source implementations | ||
manually written in Java, C++ and C# code. These are parsers for Windows syntax of file and directory names, | ||
as well as shares on local networks (UNC notation). It also derives from openly available documentation | ||
sources on Windows and the .NET platform for confirmation and test examples. | ||
The main function of this module, ((parseWindowsPath)): | ||
* faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values. | ||
* throws a ParseError if the path does not comply. Typically file names ending in spaces do not comply. | ||
* ensures that if the file exists on system A, then the `loc` representation | ||
resolves to the same file on system A via any ((Library:module:IO)) function. | ||
* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases. | ||
This is left to downstream processors of `loc` values, if necessary. The current transformation | ||
is purely syntactical, and tries to preserve the semantics of the path as much as possible. | ||
} | ||
@pitfalls{ | ||
* Length limitations are not implemnted by this parser. This means that overly long names will lead | ||
to IO exceptions when they are finally used. | ||
* The names of drives, files and devices are mapped as-is, without normalization. This means that | ||
the resulting `loc` value may not be a _canonical_ representation of the identified resource. | ||
Normalization of `loc` values is for a different function TBD. | ||
} | ||
module lang::paths::Windows | ||
|
||
import IO; | ||
import util::SystemAPI; | ||
|
||
lexical WindowsPath | ||
= unc : Slash Slash Slashes? PathChar* \ "." Slashes PathChar* Slashes WindowsFilePath | ||
| uncDOSDrive : Slash Slash Slashes? DOSDevice Slashes Drive ":" OptionalWindowsFilePath | ||
| uncDOSPath : Slash Slash Slashes? DOSDevice Slashes PathChar* Slashes WindowsFilePath | ||
| absolute : Drive ":" Slashes WindowsFilePath | ||
| driveRelative : Drive ":" WindowsFilePath | ||
| directoryRelative: Slash WindowsFilePath | ||
| relative : WindowsFilePath | ||
; | ||
|
||
lexical OptionalWindowsFilePath | ||
= () | ||
| Slashes WindowsFilePath | ||
; | ||
|
||
lexical DOSDevice = [.?]; | ||
|
||
lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]); | ||
|
||
lexical PathSegment | ||
= current: "." | ||
| parent : ".." | ||
| name : PathChar+ \ ".." \ "." | ||
; | ||
|
||
lexical Drive = [A-Za-z]; | ||
|
||
lexical Slashes = Slash+ !>> [\\/]; | ||
|
||
lexical Slash = [\\/]; | ||
|
||
lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ .] !<< (); // only the last segment must not end in spaces. | ||
|
||
import ParseTree; | ||
|
||
@synopsis{Convert a windows path literal to a source location URI} | ||
@description{ | ||
1. parses the path using the grammar for ((WindowsPath)) | ||
2. takes the _literal_ name components using string interpolation `"<segment>"`. This means no decoding/encoding happens at all while extracting | ||
hostname, share name and path segment names. Also all superfluous path separators are skipped. | ||
3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also | ||
the right path separators are introduced. | ||
} | ||
loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#WindowsPath, input, src)); | ||
|
||
@synopsis{UNC} | ||
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _><PathChar* hostName><Slashes _><PathChar* shareName><Slashes _><WindowsFilePath path>`) | ||
= appendPath(|unc://<hostName>/| + "<shareName>", path); | ||
|
||
@synopsis{DOS UNC Device Drive} | ||
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _><DOSDevice dq><Slashes _><Drive drive>:<OptionalWindowsFilePath path>`) | ||
= appendPath(|unc://<deviceIndicator(dq)>/| + "<drive>:", path); | ||
|
||
@synopsis{DOS UNC Device Path} | ||
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _><DOSDevice dq><Slashes _><PathChar* deviceName><Slashes _><WindowsFilePath path>`) | ||
= appendPath(|unc://<deviceIndicator(dq)>/| + "<deviceName>", path); | ||
|
||
private str deviceIndicator((DOSDevice) `?`) = "%3F"; | ||
private str deviceIndicator((DOSDevice) `.`) = "."; | ||
|
||
@synopsis{DOS UNCPath} | ||
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _>?<Slashes _><PathChar* shareName><Slashes _><WindowsFilePath path>`) | ||
= appendPath(|unc://%3F/| + "<shareName>", path); | ||
|
||
|
||
@synopsis{Absolute: given the drive and relative to its root.} | ||
private loc mapPathToLoc((WindowsPath) `<Drive drive>:<Slashes _><WindowsFilePath path>`) | ||
= appendPath(|file:///<drive>:/|, path); | ||
|
||
@synopsis{Drive relative: relative to the current working directory on the given drive.} | ||
private loc mapPathToLoc((WindowsPath) `<Drive drive>:<WindowsFilePath path>`) | ||
= appendPath(|file:///<drive>:.|, path); | ||
|
||
@synopsis{Directory relative: relative to the root of the current drive.} | ||
private loc mapPathToLoc((WindowsPath) `<Slash _><WindowsFilePath path>`) | ||
= appendPath(|cwdrive:///|, path); | ||
|
||
@synopsis{Relative to the current working directory on the current drive.} | ||
private loc mapPathToLoc((WindowsPath) `<WindowsFilePath path>`) | ||
= appendPath(|cwd:///|, path); | ||
|
||
private loc appendPath(loc root, WindowsFilePath path) | ||
= (root | it + "<segment>" | segment <- path.segments); | ||
|
||
private loc appendPath(loc root, (OptionalWindowsFilePath) ``) = root; | ||
|
||
private loc appendPath(loc root, (OptionalWindowsFilePath) `<Slashes _><WindowsFilePath path>`) | ||
= appendPath(root, path); | ||
|
||
private bool IS_WINDOWS = /win/i := getSystemProperty("os.name"); | ||
|
||
test bool uncSharePath() | ||
= parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt") | ||
== |unc://Server2/Share/Test/Foo.txt|; | ||
|
||
test bool uncDrivePath() | ||
= parseWindowsPath("\\\\system07\\C$\\") | ||
== |unc://system07/C$|; | ||
|
||
|
||
test bool uncDOSDevicePathLocalFileQuestion() { | ||
loc l = parseWindowsPath("\\\\?\\c:\\windows\\system32\\cmd.exe"); | ||
|
||
if (IS_WINDOWS) { | ||
assert exists(l); | ||
} | ||
|
||
return l == |unc://%3F/c:/windows/system32/cmd.exe|; | ||
} | ||
|
||
test bool uncDOSDevicePathLocalFileDot() { | ||
loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt"); | ||
|
||
return l == |unc://./C:/Test/Foo.txt|; | ||
} | ||
|
||
test bool uncDOSDeviceUNCSharePath() { | ||
// the entire UNC namespace is looped back into the DOS Device UNC encoding via | ||
// the reserved name "UNC": | ||
loc m1 = parseWindowsPath("\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt"); | ||
loc m2 = parseWindowsPath("\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt"); | ||
|
||
return m1 == |unc://%3F/UNC/Server/Share/Test/Foo.txt| | ||
&& m2 == |unc://./UNC/Server/Share/Test/Foo.txt|; | ||
} | ||
|
||
test bool uncDOSDeviceVolumeGUIDReference() { | ||
loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"); | ||
|
||
return l == |unc://./Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|; | ||
} | ||
|
||
test bool uncDOSDeviceBootPartition() { | ||
loc l = parseWindowsPath("\\\\.\\BootPartition\\"); | ||
return l == |unc://./BootPartition|; | ||
} | ||
|
||
test bool simpleDrivePathC() | ||
= parseWindowsPath("C:\\Program Files\\Rascal") | ||
== |file:///C:/Program%20Files/Rascal|; | ||
|
||
test bool mixedSlashesDrivePathC() | ||
= parseWindowsPath("C:\\Program Files/Rascal") | ||
== |file:///C:/Program%20Files/Rascal|; | ||
|
||
test bool trailingSlashesDrivePathC() | ||
= parseWindowsPath("C:\\Program Files\\Rascal\\\\") | ||
== |file:///C:/Program%20Files/Rascal|; | ||
|
||
test bool simpleDrivePathD() | ||
= parseWindowsPath("D:\\Program Files\\Rascal") | ||
== |file:///D:/Program%20Files/Rascal|; | ||
|
||
test bool uncNetworkShareOk() { | ||
loc l = parseWindowsPath("\\\\localhost\\ADMIN$\\System32\\cmd.exe"); | ||
|
||
if (IS_WINDOWS) { | ||
return exists(l); | ||
} | ||
else { | ||
return |unc://localhost/ADMIN$/System32/cmd.exe| == l; | ||
} | ||
} |
29 changes: 29 additions & 0 deletions
29
src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2009-2024 CWI | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Public License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v10.html | ||
* | ||
* Contributors: | ||
* * Jurgen J. Vinju - [email protected] - CWI | ||
******************************************************************************/ | ||
package org.rascalmpl.uri.file; | ||
|
||
import java.nio.file.Paths; | ||
|
||
/** | ||
* For reading and writing files relative to the current working drive. | ||
* This is different from the current working directory, namely it is the | ||
* root of the current working directory. | ||
*/ | ||
public class CurrentWorkingDriveResolver extends AliasedFileResolver { | ||
|
||
public CurrentWorkingDriveResolver() { | ||
super("cwdrive", deriveCurrentWorkingDrive(System.getProperty("user.dir"))); | ||
} | ||
|
||
private static String deriveCurrentWorkingDrive(String userDir) { | ||
return Paths.get(userDir).toAbsolutePath().getRoot().toString(); | ||
} | ||
} |
Oops, something went wrong.