This repository has been archived by the owner on Feb 1, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #21 from snyk/fix/ignore_files_processing
fix: parsing of .ignore file by using PathMatcher and internal caches
- Loading branch information
Showing
9 changed files
with
2,013 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
200 changes: 126 additions & 74 deletions
200
src/main/java/ai/deepcode/javaclient/core/DeepCodeIgnoreInfoHolderBase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,134 +1,186 @@ | ||
package ai.deepcode.javaclient.core; | ||
|
||
import org.jetbrains.annotations.NotNull; | ||
import org.jetbrains.annotations.Nullable; | ||
|
||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.nio.file.FileSystems; | ||
import java.nio.file.Path; | ||
import java.nio.file.PathMatcher; | ||
import java.util.*; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.regex.PatternSyntaxException; | ||
|
||
public abstract class DeepCodeIgnoreInfoHolderBase { | ||
|
||
private final HashContentUtilsBase hashContentUtils; | ||
private final PlatformDependentUtilsBase pdUtils; | ||
private final DCLoggerBase dcLogger; | ||
|
||
// .ignore file to Line in .ignore file to PathMatcher | ||
private final Map<Object, Map<Integer, PathMatcher>> map_ignore2PathMatchers = new ConcurrentHashMap<>(); | ||
|
||
// .ignore file to Line in .ignore file to PathMatcher | ||
private final Map<Object, Map<Integer, PathMatcher>> map_ignore2ReIncludePathMatchers = new ConcurrentHashMap<>(); | ||
|
||
private final Map<Object, Map<String, Boolean>> project2IgnoredFilePaths = new ConcurrentHashMap<>(); | ||
|
||
protected DeepCodeIgnoreInfoHolderBase( | ||
@NotNull HashContentUtilsBase hashContentUtils) { | ||
@NotNull HashContentUtilsBase hashContentUtils, | ||
@NotNull PlatformDependentUtilsBase pdUtils, | ||
@NotNull DCLoggerBase dcLogger) { | ||
this.hashContentUtils = hashContentUtils; | ||
this.pdUtils = pdUtils; | ||
this.dcLogger = dcLogger; | ||
} | ||
|
||
public void scanAllMissedIgnoreFiles( | ||
@NotNull Collection<Object> allProjectFiles, | ||
@Nullable Object progress) { | ||
allProjectFiles.stream() | ||
.filter(this::is_ignoreFile) | ||
.filter(ignoreFile -> !map_ignore2PathMatchers.containsKey(ignoreFile)) | ||
.forEach(ignoreFile -> update_ignoreFileContent(ignoreFile, progress)); | ||
} | ||
|
||
private static final Map<Object, Set<String>> map_dcignore2Regexps = new ConcurrentHashMap<>(); | ||
private static final Map<Object, Set<String>> map_gitignore2Regexps = new ConcurrentHashMap<>(); | ||
public boolean isIgnoredFile(@NotNull Object fileToCheck) { | ||
return project2IgnoredFilePaths | ||
.computeIfAbsent(pdUtils.getProject(fileToCheck), prj -> new ConcurrentHashMap<>()) | ||
.computeIfAbsent( | ||
pdUtils.getFilePath(fileToCheck), | ||
filePath -> | ||
map_ignore2PathMatchers.keySet().stream() | ||
.filter(ignoreFile -> inScope(filePath, ignoreFile)) | ||
.anyMatch(ignoreFile -> isIgnoredFile(filePath, ignoreFile)) | ||
); | ||
} | ||
|
||
public boolean isDcIgnoredFile(@NotNull Object file) { | ||
return map_dcignore2Regexps.entrySet().stream() | ||
.filter(e -> inScope(e.getKey(), file)) | ||
.flatMap(e -> e.getValue().stream()) | ||
.anyMatch(getFilePath(file)::matches); | ||
private boolean isIgnoredFile(@NotNull String filePath, @NotNull Object ignoreFile) { | ||
final Path path = pathOf(filePath); | ||
return map_ignore2PathMatchers.get(ignoreFile).entrySet().stream() | ||
.anyMatch(line2matcher -> { | ||
final int lineIndex = line2matcher.getKey(); | ||
final PathMatcher pathMatcher = line2matcher.getValue(); | ||
return pathMatcher.matches(path) && | ||
// An optional prefix "!" which negates the pattern; | ||
// any matching file excluded by a _previous_ pattern will become included again. | ||
map_ignore2ReIncludePathMatchers.get(ignoreFile).entrySet().stream() | ||
.filter(e -> e.getKey() > lineIndex) | ||
.noneMatch(e -> e.getValue().matches(path)); | ||
}); | ||
} | ||
|
||
public boolean isGitIgnoredFile(@NotNull Object file) { | ||
return map_gitignore2Regexps.entrySet().stream() | ||
.filter(e -> inScope(e.getKey(), file)) | ||
.flatMap(e -> e.getValue().stream()) | ||
.anyMatch(getFilePath(file)::matches); | ||
private void removeIgnoredFilePaths(@NotNull Object ignoreFile) { | ||
final Object project = pdUtils.getProject(ignoreFile); | ||
project2IgnoredFilePaths | ||
.getOrDefault(project, Collections.emptyMap()) | ||
.keySet() | ||
.removeIf(filePath -> inScope(filePath, ignoreFile)); | ||
} | ||
|
||
protected abstract String getFilePath(@NotNull Object file); | ||
/** copy of {@link Path#of(java.lang.String, java.lang.String...)} due to java 8 compatibility */ | ||
private static Path pathOf(String first, String... more){ | ||
return FileSystems.getDefault().getPath(first, more); | ||
} | ||
|
||
private boolean inScope(@NotNull Object ignoreFile, @NotNull Object fileToCheck) { | ||
return getFilePath(fileToCheck).startsWith(getDirPath(ignoreFile)); | ||
private boolean inScope(@NotNull String filePathToCheck, @NotNull Object ignoreFile) { | ||
return filePathToCheck.startsWith(pdUtils.getDirPath(ignoreFile)); | ||
}; | ||
|
||
public boolean is_ignoreFile(@NotNull Object file) { | ||
return is_dcignoreFile(file) || is_gitignoreFile(file); | ||
} | ||
|
||
protected abstract String getFileName(@NotNull Object file); | ||
|
||
public boolean is_dcignoreFile(@NotNull Object file) { | ||
return getFileName(file).equals(".dcignore"); | ||
return pdUtils.getFileName(file).equals(".dcignore"); | ||
} | ||
|
||
public boolean is_gitignoreFile(@NotNull Object file) { | ||
return getFileName(file).equals(".gitignore"); | ||
return pdUtils.getFileName(file).equals(".gitignore"); | ||
} | ||
|
||
public void remove_dcignoreFileContent(@NotNull Object file) { | ||
map_dcignore2Regexps.remove(file); | ||
} | ||
|
||
public void remove_gitignoreFileContent(@NotNull Object file) { | ||
map_gitignore2Regexps.remove(file); | ||
public void remove_ignoreFileContent(@NotNull Object ignoreFile) { | ||
removeIgnoredFilePaths(ignoreFile); | ||
map_ignore2PathMatchers.remove(ignoreFile); | ||
map_ignore2ReIncludePathMatchers.remove(ignoreFile); | ||
} | ||
|
||
public void removeProject(@NotNull Object project) { | ||
map_dcignore2Regexps.forEach((file, _set) -> { | ||
if (getProjectOfFile(file).equals(project)) map_dcignore2Regexps.remove(file); | ||
map_ignore2PathMatchers.keySet().forEach(file -> { | ||
if (pdUtils.getProject(file).equals(project)) remove_ignoreFileContent(file); | ||
}); | ||
map_gitignore2Regexps.forEach((file, _set) -> { | ||
if (getProjectOfFile(file).equals(project)) map_gitignore2Regexps.remove(file); | ||
map_ignore2ReIncludePathMatchers.keySet().forEach(file -> { | ||
if (pdUtils.getProject(file).equals(project)) remove_ignoreFileContent(file); | ||
}); | ||
project2IgnoredFilePaths.remove(project); | ||
} | ||
|
||
protected abstract Object getProjectOfFile(@NotNull Object file); | ||
|
||
public void update_dcignoreFileContent(@NotNull Object file) { | ||
map_dcignore2Regexps.put(file, parse_ignoreFile2Regexps(file)); | ||
} | ||
|
||
public void update_gitignoreFileContent(@NotNull Object file) { | ||
map_gitignore2Regexps.put(file, parse_ignoreFile2Regexps(file)); | ||
public void update_ignoreFileContent(@NotNull Object ignoreFile, @Nullable Object progress) { | ||
dcLogger.logInfo("Scanning .ignore file: " + pdUtils.getFilePath(ignoreFile)); | ||
parse_ignoreFile2Globs(ignoreFile, progress); | ||
dcLogger.logInfo("Scan FINISHED for .ignore file: " + pdUtils.getFilePath(ignoreFile)); | ||
} | ||
|
||
protected abstract String getDirPath(@NotNull Object file); | ||
|
||
private Set<String> parse_ignoreFile2Regexps(@NotNull Object file) { | ||
Set<String> result = new HashSet<>(); | ||
String basePath = getDirPath(file); | ||
String lineSeparator = "[\n\r]"; | ||
final String fileText = hashContentUtils.doGetFileContent(file); | ||
for (String line : fileText.split(lineSeparator)) { | ||
private void parse_ignoreFile2Globs(@NotNull Object ignoreFile, @Nullable Object progress) { | ||
pdUtils.progressSetText(progress, "parsing file: " + pdUtils.getFilePath(ignoreFile)); | ||
Map<Integer, PathMatcher> ignoreMatchers = new HashMap<>(); | ||
Map<Integer, PathMatcher> reIncludedMatchers = new HashMap<>(); | ||
String basePath = pdUtils.getDirPath(ignoreFile); | ||
String lineSeparator = "\r\n|[\r\n]"; | ||
final String fileText = hashContentUtils.doGetFileContent(ignoreFile); | ||
final String[] lines = fileText.split(lineSeparator); | ||
for (int lineIndex = 0; lineIndex < lines.length; lineIndex++) { | ||
String line = lines[lineIndex]; | ||
|
||
// https://git-scm.com/docs/gitignore#_pattern_format | ||
// todo: `!` negation not implemented yet | ||
line = line.trim(); | ||
if (line.isEmpty() || line.startsWith("#")) continue; | ||
|
||
// An optional prefix "!" which negates the pattern; | ||
// any matching file excluded by a previous pattern will become included again. | ||
// todo??? It is not possible to re-include a file if a parent directory of that file is excluded. | ||
boolean isReIncludePattern = line.startsWith("!"); | ||
if (isReIncludePattern) line = line.substring(1); | ||
|
||
String prefix = basePath; | ||
// If there is a separator at the beginning or middle (or both) of the pattern, then the | ||
// pattern is relative to the directory level of the particular .gitignore file itself. | ||
// Otherwise the pattern may also match at any level below the .gitignore level. | ||
int indexBegMidSepar = line.substring(0, line.length() - 1).indexOf('/'); | ||
if (indexBegMidSepar != 0) prefix += "/"; | ||
if (indexBegMidSepar == -1) { | ||
prefix += ".*"; | ||
} else if (line.endsWith("/*") || line.endsWith("/**")) { | ||
int indexLastSepar = line.lastIndexOf('/'); | ||
if (indexBegMidSepar == indexLastSepar) prefix += ".*"; | ||
prefix += "**/"; | ||
} else if (indexBegMidSepar > 0) { | ||
if (line.endsWith("/*") || line.endsWith("/**")) { | ||
int indexLastSepar = line.lastIndexOf('/'); | ||
if (indexBegMidSepar == indexLastSepar) prefix += "**/"; | ||
} else { | ||
prefix += "/"; | ||
} | ||
} | ||
|
||
// If there is a separator at the end of the pattern then the pattern will only match | ||
// directories, otherwise the pattern can match both files and directories. | ||
String postfix = | ||
(line.endsWith("/")) | ||
? ".+" // should be dir | ||
: "(/.+)?"; // could be dir or file | ||
|
||
String body = | ||
line.replace(".", "\\.") | ||
// An asterisk "*" matches anything except a slash. | ||
.replace("*", "[^/]*") | ||
// The character "?" matches any one character except "/". | ||
.replace("?", "[^/]?") | ||
// A slash followed by two consecutive asterisks then a slash matches zero or more | ||
// directories. For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on. | ||
// A trailing "/**" matches everything inside. For example, "abc/**" matches all | ||
// files inside directory "abc", relative to the location of the .gitignore file, | ||
// with infinite depth. | ||
.replace("[^/]*[^/]*", ".*"); | ||
|
||
result.add(prefix + body + postfix); | ||
(line.endsWith("/")) | ||
? "?**" // should be dir | ||
: "{/?**,}"; // could be dir or file | ||
|
||
// glob sanity check for validity | ||
try { | ||
PathMatcher globToMatch = FileSystems.getDefault() | ||
.getPathMatcher("glob:" + prefix + line + postfix); | ||
|
||
if (isReIncludePattern) { | ||
reIncludedMatchers.put(lineIndex, globToMatch); | ||
} else { | ||
ignoreMatchers.put(lineIndex, globToMatch); | ||
} | ||
} catch (PatternSyntaxException e) { | ||
dcLogger.logWarn("Incorrect Glob syntax in .ignore file: " + e.getMessage()); | ||
} | ||
pdUtils.progressSetFraction(progress, (double) lineIndex/lines.length); | ||
pdUtils.progressCheckCanceled(progress); | ||
} | ||
return result; | ||
map_ignore2ReIncludePathMatchers.put(ignoreFile, reIncludedMatchers); | ||
map_ignore2PathMatchers.put(ignoreFile, ignoreMatchers); | ||
} | ||
} |
Oops, something went wrong.