Skip to content

Commit

Permalink
Merge pull request #1942 from usethesource/loc-from-filesystem-converter
Browse files Browse the repository at this point in the history
add functionality to convert windows paths (all shapes and sizes) to `loc` values
  • Loading branch information
jurgenvinju authored May 22, 2024
2 parents f18ed05 + 55fc9de commit 0b0b547
Show file tree
Hide file tree
Showing 11 changed files with 419 additions and 1 deletion.
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
</argLine>
<includes>
<include>**/org/rascalmpl/test/AllSuiteParallel.java</include>
<include>**/org/rascalmpl/test/library/LibraryLangPaths.java</include>
<include>**/org/rascalmpl/test/value/AllTests.java</include>
<include>**/org/rascalmpl/*Test.java</include>
</includes>
Expand Down
24 changes: 24 additions & 0 deletions src/org/rascalmpl/library/Location.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import Set;
import String;
import Exception;

import lang::paths::Windows;
import lang::paths::Unix;

@synopsis{Extracts a path relative to a parent location.}
@description{
Expand All @@ -43,6 +45,28 @@ loc relativize(list[loc] haystack, loc needle) {
}
}

@synopsis{Convert Windows path syntax to a `loc` value}
@description{
This conversion supports generic Windows path syntax, including:
* Absolute drive-specific: `C:\Program Files`
* Relative drive-specific: `C:hello.txt`
* Relative: `hello.txt`
* Directory-relative: `\hello.txt`
* UNC format: `\\system07\C$\`
Windows paths, against popular believe, support both `/` and `\` as path separators.
}
loc locFromWindowsPath(str path) = parseWindowsPath(path);

@synopsis{Convert Unix path syntax to a `loc` value}
@description{
This conversion supports generic Unix path syntax, including:
* Absolute: `/usr/local/bin`
* Relative: `hello.txt`
* Home: `~/hello.txt`
* User: `~userName\hello.txt`
}
loc locFromUnixPath(str path) = parseUnixPath(path);

@synopsis{Check that two locations refer to the same file.}
bool isSameFile(loc l, loc r) = l.top[fragment=""] == r.top[fragment=""];
Expand Down
2 changes: 1 addition & 1 deletion src/org/rascalmpl/library/Prelude.java
Original file line number Diff line number Diff line change
Expand Up @@ -3615,7 +3615,7 @@ public ISet findResources(IString fileName) {
public ISourceLocation relativize(ISourceLocation outside, ISourceLocation inside) {
return URIUtil.relativize(outside, inside);
}

public IValue readBinaryValueFile(IValue type, ISourceLocation loc){
if(trackIO) System.err.println("readBinaryValueFile: " + loc);

Expand Down
1 change: 1 addition & 0 deletions src/org/rascalmpl/library/String.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ toLocation("http://grammarware.net");
toLocation("document.xml");
```
}
@deprecated{Use ((Location::locFromWindowsPath)) for example. The current function does not handle all the different intricasies of path notation.}
public loc toLocation(str s) = (/<car:.*>\:\/\/<cdr:.*>/ := s) ? |<car>://<cdr>| : |cwd:///<s>|;


Expand Down
108 changes: 108 additions & 0 deletions src/org/rascalmpl/library/lang/paths/Unix.rsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.}
@description{
This syntax definition of POSIX paths and file names, including some of the conventions
with ~ for the home folder and . and .. for relative directories.
The main function of this module, ((parseUnixPath)):
* faithfully maps any syntactically correctly Unix paths to syntactically correct `loc` values.
* throws a ParseError if the path does not comply.
* ensures that if the file exists on system A, then the `loc` representation
resolves to the same file on system A via any ((Library:module:IO)) function.
* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases.
This is left to downstream processors of `loc` values, if necessary. The current transformation
is purely syntactical, and tries to preserve the semantics of the path as much as possible.
}
@pitfalls{
* the `~` notation is typically a feature of the shell and not of system paths. However it is so commonly
used to refer to the home directories of users that we've added an interpretation here with the `home:///` scheme.
* otherwise, the path syntax may be different from what you have to type in _bash_ or _zsh_. This is because shells
need to reserve characters, like spaces, for different purposes (commandline argument separation). The
current definition is about the path notation that shells like _zsh_ and _bash_, and other programs, have to pass into the string arguments of
OS features, after their own concatenation, splicing, variable expansion, de-escaping and unquoting routines have finished..
}
module lang::paths::Unix

lexical UnixPath
= absolute: Slashes UnixFilePath?
| relative: UnixFilePath
| home : "~" (Slashes UnixFilePath)?
| user : "~" UserName name (Slashes UnixFilePath)?
;

lexical UserName = ![/~]+;

lexical PathChar = ![/];

lexical PathSegment
= current: "."
| parent : ".."
| name : (PathChar \ "~" PathChar*) \ ".." \ "." \ "~"
;

lexical Slashes = Slash+ !>> [/];

lexical Slash = [/];

lexical UnixFilePath = {PathSegment Slashes}+ segments Slashes?;

import ParseTree;

@synopsis{Convert a Unix path literal to a source location URI}
@description{
1. parses the path using the grammar for ((UnixPath))
2. takes the _literal_ name components using string interpolation `"<segment>"`. This means no decoding/encoding happens at all while extracting
hostname, share name and path segment names. Also all superfluous path separators are skipped.
3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also
the right path separators are introduced.
}
loc parseUnixPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#UnixPath, input, src));

@synopsis{Root is a special case}
private loc mapPathToLoc((UnixPath) `<Slashes _>`)
= |file:///|;

@synopsis{Absolute: given the drive and relative to its root.}
private loc mapPathToLoc((UnixPath) `<Slashes _><UnixFilePath path>`)
= appendPath(|file:///|, path);

@synopsis{Relative: relative to the current working directory.}
private loc mapPathToLoc((UnixPath) `<UnixFilePath path>`)
= appendPath(|cwd:///|, path);

@synopsis{Home relative: relative to the current users home directory}
private loc mapPathToLoc((UnixPath) `~<Slash _><UnixFilePath path>`)
= appendPath(|home:///|, path);

@synopsis{Home relative: relative to the current users home directory}
private loc mapPathToLoc((UnixPath) `~`)
= |home:///|;

@synopsis{User relative: relative to any specific user's home directory}
private loc mapPathToLoc((UnixPath) `~<UserName name><Slash _><UnixFilePath path>`)
= appendPath(|home:///../<name>/|, path);

@synopsis{User relative: relative to any specific user's home directory}
private loc mapPathToLoc((UnixPath) `~<UserName name>`)
= |home:///../<name>/|;

private loc appendPath(loc root, UnixFilePath path)
= (root | it + "<segment>" | segment <- path.segments);

test bool root()
= parseUnixPath("/") == |file:///|;

test bool absolutePath()
= parseUnixPath("/usr/local/bin")
== |file:///usr/local/bin|;

test bool relativePath()
= parseUnixPath(".bash_rc")
== |cwd:///.bash_rc|;

test bool homePath()
= parseUnixPath("~/.bash_profile")
== |home:///.bash_profile|;

test bool userPath()
= parseUnixPath("~root/.bash_profile")
== |home:///../root/.bash_profile|;
191 changes: 191 additions & 0 deletions src/org/rascalmpl/library/lang/paths/Windows.rsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
@synopsis{Defines the syntax of filesystem and network drive paths on DOS and Windows Systems.}
@description{
This syntax definition of file paths and file names in Windows formalizes open-source implementations
manually written in Java, C++ and C# code. These are parsers for Windows syntax of file and directory names,
as well as shares on local networks (UNC notation). It also derives from openly available documentation
sources on Windows and the .NET platform for confirmation and test examples.
The main function of this module, ((parseWindowsPath)):
* faithfully maps any syntactically correctly Windows paths to syntactically correct `loc` values.
* throws a ParseError if the path does not comply. Typically file names ending in spaces do not comply.
* ensures that if the file exists on system A, then the `loc` representation
resolves to the same file on system A via any ((Library:module:IO)) function.
* and nothing more. No normalization, no interpretatioon of `.` and `..`, no changing of cases.
This is left to downstream processors of `loc` values, if necessary. The current transformation
is purely syntactical, and tries to preserve the semantics of the path as much as possible.
}
@pitfalls{
* Length limitations are not implemnted by this parser. This means that overly long names will lead
to IO exceptions when they are finally used.
* The names of drives, files and devices are mapped as-is, without normalization. This means that
the resulting `loc` value may not be a _canonical_ representation of the identified resource.
Normalization of `loc` values is for a different function TBD.
}
module lang::paths::Windows

import IO;
import util::SystemAPI;

lexical WindowsPath
= unc : Slash Slash Slashes? PathChar* \ "." Slashes PathChar* Slashes WindowsFilePath
| uncDOSDrive : Slash Slash Slashes? DOSDevice Slashes Drive ":" OptionalWindowsFilePath
| uncDOSPath : Slash Slash Slashes? DOSDevice Slashes PathChar* Slashes WindowsFilePath
| absolute : Drive ":" Slashes WindowsFilePath
| driveRelative : Drive ":" WindowsFilePath
| directoryRelative: Slash WindowsFilePath
| relative : WindowsFilePath
;

lexical OptionalWindowsFilePath
= ()
| Slashes WindowsFilePath
;

lexical DOSDevice = [.?];

lexical PathChar = !([\a00-\a20\< \> : \" | ? * \\ /] - [\ ]);

lexical PathSegment
= current: "."
| parent : ".."
| name : PathChar+ \ ".." \ "."
;

lexical Drive = [A-Za-z];

lexical Slashes = Slash+ !>> [\\/];

lexical Slash = [\\/];

lexical WindowsFilePath = {PathSegment Slashes}* segments Slashes? [\ .] !<< (); // only the last segment must not end in spaces.

import ParseTree;

@synopsis{Convert a windows path literal to a source location URI}
@description{
1. parses the path using the grammar for ((WindowsPath))
2. takes the _literal_ name components using string interpolation `"<segment>"`. This means no decoding/encoding happens at all while extracting
hostname, share name and path segment names. Also all superfluous path separators are skipped.
3. uses `loc + str` path concatenation with its builtin character encoding to construct the URI. Also
the right path separators are introduced.
}
loc parseWindowsPath(str input, loc src=|unknown:///|) = mapPathToLoc(parse(#WindowsPath, input, src));

@synopsis{UNC}
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _><PathChar* hostName><Slashes _><PathChar* shareName><Slashes _><WindowsFilePath path>`)
= appendPath(|unc://<hostName>/| + "<shareName>", path);

@synopsis{DOS UNC Device Drive}
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _><DOSDevice dq><Slashes _><Drive drive>:<OptionalWindowsFilePath path>`)
= appendPath(|unc://<deviceIndicator(dq)>/| + "<drive>:", path);

@synopsis{DOS UNC Device Path}
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _><DOSDevice dq><Slashes _><PathChar* deviceName><Slashes _><WindowsFilePath path>`)
= appendPath(|unc://<deviceIndicator(dq)>/| + "<deviceName>", path);

private str deviceIndicator((DOSDevice) `?`) = "%3F";
private str deviceIndicator((DOSDevice) `.`) = ".";

@synopsis{DOS UNCPath}
private loc mapPathToLoc((WindowsPath) `<Slash _><Slash _><Slashes? _>?<Slashes _><PathChar* shareName><Slashes _><WindowsFilePath path>`)
= appendPath(|unc://%3F/| + "<shareName>", path);


@synopsis{Absolute: given the drive and relative to its root.}
private loc mapPathToLoc((WindowsPath) `<Drive drive>:<Slashes _><WindowsFilePath path>`)
= appendPath(|file:///<drive>:/|, path);

@synopsis{Drive relative: relative to the current working directory on the given drive.}
private loc mapPathToLoc((WindowsPath) `<Drive drive>:<WindowsFilePath path>`)
= appendPath(|file:///<drive>:.|, path);

@synopsis{Directory relative: relative to the root of the current drive.}
private loc mapPathToLoc((WindowsPath) `<Slash _><WindowsFilePath path>`)
= appendPath(|cwdrive:///|, path);

@synopsis{Relative to the current working directory on the current drive.}
private loc mapPathToLoc((WindowsPath) `<WindowsFilePath path>`)
= appendPath(|cwd:///|, path);

private loc appendPath(loc root, WindowsFilePath path)
= (root | it + "<segment>" | segment <- path.segments);

private loc appendPath(loc root, (OptionalWindowsFilePath) ``) = root;

private loc appendPath(loc root, (OptionalWindowsFilePath) `<Slashes _><WindowsFilePath path>`)
= appendPath(root, path);

private bool IS_WINDOWS = /win/i := getSystemProperty("os.name");

test bool uncSharePath()
= parseWindowsPath("\\\\Server2\\Share\\Test\\Foo.txt")
== |unc://Server2/Share/Test/Foo.txt|;

test bool uncDrivePath()
= parseWindowsPath("\\\\system07\\C$\\")
== |unc://system07/C$|;


test bool uncDOSDevicePathLocalFileQuestion() {
loc l = parseWindowsPath("\\\\?\\c:\\windows\\system32\\cmd.exe");

if (IS_WINDOWS) {
assert exists(l);
}

return l == |unc://%3F/c:/windows/system32/cmd.exe|;
}

test bool uncDOSDevicePathLocalFileDot() {
loc l = parseWindowsPath("\\\\.\\C:\\Test\\Foo.txt");

return l == |unc://./C:/Test/Foo.txt|;
}

test bool uncDOSDeviceUNCSharePath() {
// the entire UNC namespace is looped back into the DOS Device UNC encoding via
// the reserved name "UNC":
loc m1 = parseWindowsPath("\\\\?\\UNC\\Server\\Share\\Test\\Foo.txt");
loc m2 = parseWindowsPath("\\\\.\\UNC\\Server\\Share\\Test\\Foo.txt");

return m1 == |unc://%3F/UNC/Server/Share/Test/Foo.txt|
&& m2 == |unc://./UNC/Server/Share/Test/Foo.txt|;
}

test bool uncDOSDeviceVolumeGUIDReference() {
loc l = parseWindowsPath("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt");

return l == |unc://./Volume%7Bb75e2c83-0000-0000-0000-602f00000000%7D/Test/Foo.txt|;
}

test bool uncDOSDeviceBootPartition() {
loc l = parseWindowsPath("\\\\.\\BootPartition\\");
return l == |unc://./BootPartition|;
}

test bool simpleDrivePathC()
= parseWindowsPath("C:\\Program Files\\Rascal")
== |file:///C:/Program%20Files/Rascal|;

test bool mixedSlashesDrivePathC()
= parseWindowsPath("C:\\Program Files/Rascal")
== |file:///C:/Program%20Files/Rascal|;

test bool trailingSlashesDrivePathC()
= parseWindowsPath("C:\\Program Files\\Rascal\\\\")
== |file:///C:/Program%20Files/Rascal|;

test bool simpleDrivePathD()
= parseWindowsPath("D:\\Program Files\\Rascal")
== |file:///D:/Program%20Files/Rascal|;

test bool uncNetworkShareOk() {
loc l = parseWindowsPath("\\\\localhost\\ADMIN$\\System32\\cmd.exe");

if (IS_WINDOWS) {
return exists(l);
}
else {
return |unc://localhost/ADMIN$/System32/cmd.exe| == l;
}
}
29 changes: 29 additions & 0 deletions src/org/rascalmpl/uri/file/CurrentWorkingDriveResolver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*******************************************************************************
* Copyright (c) 2009-2024 CWI
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* * Jurgen J. Vinju - [email protected] - CWI
******************************************************************************/
package org.rascalmpl.uri.file;

import java.nio.file.Paths;

/**
* For reading and writing files relative to the current working drive.
* This is different from the current working directory, namely it is the
* root of the current working directory.
*/
public class CurrentWorkingDriveResolver extends AliasedFileResolver {

public CurrentWorkingDriveResolver() {
super("cwdrive", deriveCurrentWorkingDrive(System.getProperty("user.dir")));
}

private static String deriveCurrentWorkingDrive(String userDir) {
return Paths.get(userDir).toAbsolutePath().getRoot().toString();
}
}
Loading

0 comments on commit 0b0b547

Please sign in to comment.