From c4de1e7c731c8d1b12213f93addb18fe699e5977 Mon Sep 17 00:00:00 2001 From: Jasper Reef <51498987+Blugatroff@users.noreply.github.com> Date: Wed, 30 Oct 2024 20:31:24 +0100 Subject: [PATCH] Don't use matcher from a .gitignore against includePatterns (#1297) * Don't use matcher from a .gitignore against includePatterns When attempting to determine whether a .gitignore should be respected, we previously tried to see, whether any of the patterns in the .gitignore match against the includePatterns. As @fsoikin noticed [here](https://github.com/purescript/spago/pull/1296/files/b84981dd086219bc1157a440667aca92ea23efb4#r1815996597) this does not really make any sense in case the includePatterns really are actual patterns and not just paths. Therefore, instead of matching the .gitignore patterns against the includePatterns directly, we now only match them against the base of each includePattern. The base of a pattern is always a path. However, some includePatterns may not have a base, for example when searching for `**/foo/bar`. What should one do here? I think disrespecting .gitignores is not really feasible in such a case, because how can we tell if something that the .gitignore excludes could match the baseless pattern `**/foo/bar` without actually walking the excluded directories? So for example, if there was a file: `./abc/foo` and a .gitignore with the pattern `abc` and we glob for `**/foo`, then we do not find `./abc/foo`, because the .gitignore was respected. I think, this is the behaviour we would want anyway, we only ignore a .gitignore, when it is obvious that the glob target is necessarily within an ignored directory. Like globbing for `.spago/p/arrays-7.3.0/src/**/*.purs` when `.spago` is gitignored. This was already the behaviour previously. In the above example, we would have previously matched `abc` against `**/foo`, which would be false and therefore no conflict would have been dectected, so the .gitignore would also have been respected. This PR only makes this behaviour explicit and removes the, incidentally working, match of a pattern against another pattern. The new test also passes without this change, but i think it's a nice kind of documentation. I wonder, whether the globbing code should even attempt to disrespect .gitignore files, doesn't the calling code always know, whether it should limit its search to the non-ignored files? For example, when globbing for the source files of a package in .spago, we know beforehand, that a nonGitignoringGlob (otherwise known as `glob` :^D) would suffice. * style: rearrange let-bound vals, use guard, don't `or` twice, use plural --- src/Spago/Glob.purs | 34 ++++++++++++++++++++++------------ test/Spago/Glob.purs | 5 +++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/src/Spago/Glob.purs b/src/Spago/Glob.purs index 29838fcea..6cfaa9735 100644 --- a/src/Spago/Glob.purs +++ b/src/Spago/Glob.purs @@ -12,7 +12,7 @@ import Control.Monad.Maybe.Trans (runMaybeT) import Control.Monad.Trans.Class (lift) import Data.Array as Array import Data.Filterable (filter) -import Data.Foldable (any, traverse_) +import Data.Foldable (all, any, traverse_) import Data.String as String import Data.String as String.CodePoint import Effect.Aff as Aff @@ -101,6 +101,15 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do canceled <- Ref.new false let + -- The base of every includePattern + -- The base of a pattern is its longest non-glob prefix. + -- For example: foo/bar/*/*.purs => foo/bar + -- **/spago.yaml => "" + includePatternBases :: Array String + includePatternBases = map (_.base <<< scanPattern) includePatterns + + allIncludePatternsHaveBase = all (not <<< String.null) includePatternBases + -- Update the ignoreMatcherRef with the patterns from a .gitignore file updateIgnoreMatcherWithGitignore :: Entry -> Effect Unit updateIgnoreMatcherWithGitignore entry = do @@ -119,9 +128,17 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- ex. if `includePatterns` is [".spago/p/aff-1.0.0/**/*.purs"], -- and `gitignored` is ["node_modules", ".spago"], -- then add "node_modules" to `ignoreMatcher` but not ".spago" - wouldConflictWithSearch matcher = any matcher includePatterns - - newMatchers = or $ filter (not <<< wouldConflictWithSearch) gitignored + wouldConflictWithSearch matcher = any matcher includePatternBases + + newMatchers :: Array (String -> Boolean) + newMatchers | allIncludePatternsHaveBase = filter (not <<< wouldConflictWithSearch) gitignored + newMatchers = do + -- Some of the include patterns don't have a base, + -- e.g. there is an include pattern like "*/foo/bar" or "**/.spago". + -- In this case, do not attempt to determine whether the gitignore + -- file would exclude some of the target paths. Instead always respect + -- the .gitignore. + gitignored -- Another possible approach could be to keep a growing array of patterns and -- regenerate the matcher on every gitignore. We have tried that (see #1234), @@ -133,17 +150,10 @@ fsWalk cwd ignorePatterns includePatterns = Aff.makeAff \cb -> do -- new matchers together, then the whole thing with the previous matcher. -- This is still prone to stack issues, but we now have a tree so it should -- not be as dramatic. - addMatcher currentMatcher = or [ currentMatcher, newMatchers ] + addMatcher currentMatcher = or $ Array.cons currentMatcher newMatchers Ref.modify_ addMatcher ignoreMatcherRef - -- The base of every includePattern - -- The base of a pattern is its longest non-glob prefix. - -- For example: foo/bar/*/*.purs => foo/bar - -- **/spago.yaml => "" - includePatternBases :: Array String - includePatternBases = map (_.base <<< scanPattern) includePatterns - matchesAnyPatternBase :: String -> Boolean matchesAnyPatternBase relDirPath = any matchesPatternBase includePatternBases where diff --git a/test/Spago/Glob.purs b/test/Spago/Glob.purs index 45129b970..6d76b0d8d 100644 --- a/test/Spago/Glob.purs +++ b/test/Spago/Glob.purs @@ -103,3 +103,8 @@ spec = Spec.around globTmpDir do FS.writeTextFile (Path.concat [ p, "fruits", "right", ".gitignore" ]) hugeGitignore a <- Glob.gitignoringGlob p [ "fruits/**/apple" ] Array.sort a `Assert.shouldEqual` [ "fruits/left/apple", "fruits/right/apple" ] + + Spec.it "does respect .gitignore even though it might conflict with a search path without base" $ \p -> do + FS.writeTextFile (Path.concat [ p, ".gitignore" ]) "fruits" + a <- Glob.gitignoringGlob p [ "**/apple" ] + Array.sort a `Assert.shouldEqual` []