Skip to content
This repository has been archived by the owner on Feb 1, 2023. It is now read-only.

Commit

Permalink
Merge pull request #21 from snyk/fix/ignore_files_processing
Browse files Browse the repository at this point in the history
fix: parsing of .ignore file by using PathMatcher and internal caches
  • Loading branch information
ArtsiomCh authored Apr 14, 2021
2 parents 4b0a661 + 6ecb023 commit 0b30177
Show file tree
Hide file tree
Showing 9 changed files with 2,013 additions and 97 deletions.
7 changes: 6 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ plugins {

group = "io.snyk.code.sdk"
archivesBaseName = "snyk-code-client"
version = "2.1.8"
version = "2.1.9"

repositories {
mavenCentral()
Expand Down Expand Up @@ -38,6 +38,11 @@ compileIntegTestJava {
targetCompatibility = 11
}

compileTestJava {
sourceCompatibility = 11
targetCompatibility = 11
}

dependencies {
implementation "com.squareup.retrofit2:retrofit:2.7.1"
implementation "com.squareup.retrofit2:converter-gson:2.7.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ public Set<Object> getAllCachedProject() {

public void removeFilesFromCache(@NotNull Collection<Object> files) {
try {
dcLogger.logInfo("Request to remove from cache " + files.size() + " files: " + files);
final List<String> first50FilesName =
files.stream().limit(50).map(pdUtils::getFileName).collect(Collectors.toList());
dcLogger.logInfo("Request to remove from cache " + files.size() + " files: " + first50FilesName);
// todo: do we really need mutex here?
MUTEX.lock();
dcLogger.logInfo("MUTEX LOCK");
Expand Down Expand Up @@ -189,8 +191,10 @@ public void updateCachedResultsForFiles(
dcLogger.logWarn("updateCachedResultsForFiles requested for empty list of files");
return;
}
final List<String> first50FilesName =
allProjectFiles.stream().limit(50).map(pdUtils::getFileName).collect(Collectors.toList());
dcLogger.logInfo(
"Update requested for " + allProjectFiles.size() + " files: " + allProjectFiles.toString());
"Update requested for " + allProjectFiles.size() + " files: " + first50FilesName);
if (!deepCodeParams.consentGiven(project)) {
dcLogger.logWarn("Consent check fail! Project: " + pdUtils.getProjectName(project));
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,134 +1,186 @@
package ai.deepcode.javaclient.core;

import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.PatternSyntaxException;

public abstract class DeepCodeIgnoreInfoHolderBase {

private final HashContentUtilsBase hashContentUtils;
private final PlatformDependentUtilsBase pdUtils;
private final DCLoggerBase dcLogger;

// .ignore file to Line in .ignore file to PathMatcher
private final Map<Object, Map<Integer, PathMatcher>> map_ignore2PathMatchers = new ConcurrentHashMap<>();

// .ignore file to Line in .ignore file to PathMatcher
private final Map<Object, Map<Integer, PathMatcher>> map_ignore2ReIncludePathMatchers = new ConcurrentHashMap<>();

private final Map<Object, Map<String, Boolean>> project2IgnoredFilePaths = new ConcurrentHashMap<>();

protected DeepCodeIgnoreInfoHolderBase(
@NotNull HashContentUtilsBase hashContentUtils) {
@NotNull HashContentUtilsBase hashContentUtils,
@NotNull PlatformDependentUtilsBase pdUtils,
@NotNull DCLoggerBase dcLogger) {
this.hashContentUtils = hashContentUtils;
this.pdUtils = pdUtils;
this.dcLogger = dcLogger;
}

public void scanAllMissedIgnoreFiles(
@NotNull Collection<Object> allProjectFiles,
@Nullable Object progress) {
allProjectFiles.stream()
.filter(this::is_ignoreFile)
.filter(ignoreFile -> !map_ignore2PathMatchers.containsKey(ignoreFile))
.forEach(ignoreFile -> update_ignoreFileContent(ignoreFile, progress));
}

private static final Map<Object, Set<String>> map_dcignore2Regexps = new ConcurrentHashMap<>();
private static final Map<Object, Set<String>> map_gitignore2Regexps = new ConcurrentHashMap<>();
public boolean isIgnoredFile(@NotNull Object fileToCheck) {
return project2IgnoredFilePaths
.computeIfAbsent(pdUtils.getProject(fileToCheck), prj -> new ConcurrentHashMap<>())
.computeIfAbsent(
pdUtils.getFilePath(fileToCheck),
filePath ->
map_ignore2PathMatchers.keySet().stream()
.filter(ignoreFile -> inScope(filePath, ignoreFile))
.anyMatch(ignoreFile -> isIgnoredFile(filePath, ignoreFile))
);
}

public boolean isDcIgnoredFile(@NotNull Object file) {
return map_dcignore2Regexps.entrySet().stream()
.filter(e -> inScope(e.getKey(), file))
.flatMap(e -> e.getValue().stream())
.anyMatch(getFilePath(file)::matches);
private boolean isIgnoredFile(@NotNull String filePath, @NotNull Object ignoreFile) {
final Path path = pathOf(filePath);
return map_ignore2PathMatchers.get(ignoreFile).entrySet().stream()
.anyMatch(line2matcher -> {
final int lineIndex = line2matcher.getKey();
final PathMatcher pathMatcher = line2matcher.getValue();
return pathMatcher.matches(path) &&
// An optional prefix "!" which negates the pattern;
// any matching file excluded by a _previous_ pattern will become included again.
map_ignore2ReIncludePathMatchers.get(ignoreFile).entrySet().stream()
.filter(e -> e.getKey() > lineIndex)
.noneMatch(e -> e.getValue().matches(path));
});
}

public boolean isGitIgnoredFile(@NotNull Object file) {
return map_gitignore2Regexps.entrySet().stream()
.filter(e -> inScope(e.getKey(), file))
.flatMap(e -> e.getValue().stream())
.anyMatch(getFilePath(file)::matches);
private void removeIgnoredFilePaths(@NotNull Object ignoreFile) {
final Object project = pdUtils.getProject(ignoreFile);
project2IgnoredFilePaths
.getOrDefault(project, Collections.emptyMap())
.keySet()
.removeIf(filePath -> inScope(filePath, ignoreFile));
}

protected abstract String getFilePath(@NotNull Object file);
/** copy of {@link Path#of(java.lang.String, java.lang.String...)} due to java 8 compatibility */
private static Path pathOf(String first, String... more){
return FileSystems.getDefault().getPath(first, more);
}

private boolean inScope(@NotNull Object ignoreFile, @NotNull Object fileToCheck) {
return getFilePath(fileToCheck).startsWith(getDirPath(ignoreFile));
private boolean inScope(@NotNull String filePathToCheck, @NotNull Object ignoreFile) {
return filePathToCheck.startsWith(pdUtils.getDirPath(ignoreFile));
};

public boolean is_ignoreFile(@NotNull Object file) {
return is_dcignoreFile(file) || is_gitignoreFile(file);
}

protected abstract String getFileName(@NotNull Object file);

public boolean is_dcignoreFile(@NotNull Object file) {
return getFileName(file).equals(".dcignore");
return pdUtils.getFileName(file).equals(".dcignore");
}

public boolean is_gitignoreFile(@NotNull Object file) {
return getFileName(file).equals(".gitignore");
return pdUtils.getFileName(file).equals(".gitignore");
}

public void remove_dcignoreFileContent(@NotNull Object file) {
map_dcignore2Regexps.remove(file);
}

public void remove_gitignoreFileContent(@NotNull Object file) {
map_gitignore2Regexps.remove(file);
public void remove_ignoreFileContent(@NotNull Object ignoreFile) {
removeIgnoredFilePaths(ignoreFile);
map_ignore2PathMatchers.remove(ignoreFile);
map_ignore2ReIncludePathMatchers.remove(ignoreFile);
}

public void removeProject(@NotNull Object project) {
map_dcignore2Regexps.forEach((file, _set) -> {
if (getProjectOfFile(file).equals(project)) map_dcignore2Regexps.remove(file);
map_ignore2PathMatchers.keySet().forEach(file -> {
if (pdUtils.getProject(file).equals(project)) remove_ignoreFileContent(file);
});
map_gitignore2Regexps.forEach((file, _set) -> {
if (getProjectOfFile(file).equals(project)) map_gitignore2Regexps.remove(file);
map_ignore2ReIncludePathMatchers.keySet().forEach(file -> {
if (pdUtils.getProject(file).equals(project)) remove_ignoreFileContent(file);
});
project2IgnoredFilePaths.remove(project);
}

protected abstract Object getProjectOfFile(@NotNull Object file);

public void update_dcignoreFileContent(@NotNull Object file) {
map_dcignore2Regexps.put(file, parse_ignoreFile2Regexps(file));
}

public void update_gitignoreFileContent(@NotNull Object file) {
map_gitignore2Regexps.put(file, parse_ignoreFile2Regexps(file));
public void update_ignoreFileContent(@NotNull Object ignoreFile, @Nullable Object progress) {
dcLogger.logInfo("Scanning .ignore file: " + pdUtils.getFilePath(ignoreFile));
parse_ignoreFile2Globs(ignoreFile, progress);
dcLogger.logInfo("Scan FINISHED for .ignore file: " + pdUtils.getFilePath(ignoreFile));
}

protected abstract String getDirPath(@NotNull Object file);

private Set<String> parse_ignoreFile2Regexps(@NotNull Object file) {
Set<String> result = new HashSet<>();
String basePath = getDirPath(file);
String lineSeparator = "[\n\r]";
final String fileText = hashContentUtils.doGetFileContent(file);
for (String line : fileText.split(lineSeparator)) {
private void parse_ignoreFile2Globs(@NotNull Object ignoreFile, @Nullable Object progress) {
pdUtils.progressSetText(progress, "parsing file: " + pdUtils.getFilePath(ignoreFile));
Map<Integer, PathMatcher> ignoreMatchers = new HashMap<>();
Map<Integer, PathMatcher> reIncludedMatchers = new HashMap<>();
String basePath = pdUtils.getDirPath(ignoreFile);
String lineSeparator = "\r\n|[\r\n]";
final String fileText = hashContentUtils.doGetFileContent(ignoreFile);
final String[] lines = fileText.split(lineSeparator);
for (int lineIndex = 0; lineIndex < lines.length; lineIndex++) {
String line = lines[lineIndex];

// https://git-scm.com/docs/gitignore#_pattern_format
// todo: `!` negation not implemented yet
line = line.trim();
if (line.isEmpty() || line.startsWith("#")) continue;

// An optional prefix "!" which negates the pattern;
// any matching file excluded by a previous pattern will become included again.
// todo??? It is not possible to re-include a file if a parent directory of that file is excluded.
boolean isReIncludePattern = line.startsWith("!");
if (isReIncludePattern) line = line.substring(1);

String prefix = basePath;
// If there is a separator at the beginning or middle (or both) of the pattern, then the
// pattern is relative to the directory level of the particular .gitignore file itself.
// Otherwise the pattern may also match at any level below the .gitignore level.
int indexBegMidSepar = line.substring(0, line.length() - 1).indexOf('/');
if (indexBegMidSepar != 0) prefix += "/";
if (indexBegMidSepar == -1) {
prefix += ".*";
} else if (line.endsWith("/*") || line.endsWith("/**")) {
int indexLastSepar = line.lastIndexOf('/');
if (indexBegMidSepar == indexLastSepar) prefix += ".*";
prefix += "**/";
} else if (indexBegMidSepar > 0) {
if (line.endsWith("/*") || line.endsWith("/**")) {
int indexLastSepar = line.lastIndexOf('/');
if (indexBegMidSepar == indexLastSepar) prefix += "**/";
} else {
prefix += "/";
}
}

// If there is a separator at the end of the pattern then the pattern will only match
// directories, otherwise the pattern can match both files and directories.
String postfix =
(line.endsWith("/"))
? ".+" // should be dir
: "(/.+)?"; // could be dir or file

String body =
line.replace(".", "\\.")
// An asterisk "*" matches anything except a slash.
.replace("*", "[^/]*")
// The character "?" matches any one character except "/".
.replace("?", "[^/]?")
// A slash followed by two consecutive asterisks then a slash matches zero or more
// directories. For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on.
// A trailing "/**" matches everything inside. For example, "abc/**" matches all
// files inside directory "abc", relative to the location of the .gitignore file,
// with infinite depth.
.replace("[^/]*[^/]*", ".*");

result.add(prefix + body + postfix);
(line.endsWith("/"))
? "?**" // should be dir
: "{/?**,}"; // could be dir or file

// glob sanity check for validity
try {
PathMatcher globToMatch = FileSystems.getDefault()
.getPathMatcher("glob:" + prefix + line + postfix);

if (isReIncludePattern) {
reIncludedMatchers.put(lineIndex, globToMatch);
} else {
ignoreMatchers.put(lineIndex, globToMatch);
}
} catch (PatternSyntaxException e) {
dcLogger.logWarn("Incorrect Glob syntax in .ignore file: " + e.getMessage());
}
pdUtils.progressSetFraction(progress, (double) lineIndex/lines.length);
pdUtils.progressCheckCanceled(progress);
}
return result;
map_ignore2ReIncludePathMatchers.put(ignoreFile, reIncludedMatchers);
map_ignore2PathMatchers.put(ignoreFile, ignoreMatchers);
}
}
Loading

0 comments on commit 0b30177

Please sign in to comment.