From 3b2899830fb7264d2dedc662866b4074156b01d4 Mon Sep 17 00:00:00 2001 From: Fabrizio Ferrai Date: Sun, 29 Sep 2024 03:23:43 +0400 Subject: [PATCH] Prevent double cloning of git repos when fetching them in parallel (#1291) --- src/Spago/Command/Fetch.purs | 18 +++++++- .../expected-stderr/lockfile-up-to-date.txt | 17 ++++++++ test/Spago/Build/Monorepo.purs | 41 ++++++++++++------- 3 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 test-fixtures/monorepo/1208-no-double-cloning/expected-stderr/lockfile-up-to-date.txt diff --git a/src/Spago/Command/Fetch.purs b/src/Spago/Command/Fetch.purs index 29c2f3bb0..704f41adf 100644 --- a/src/Spago/Command/Fetch.purs +++ b/src/Spago/Command/Fetch.purs @@ -27,12 +27,14 @@ import Data.Either as Either import Data.FunctorWithIndex (mapWithIndex) import Data.HTTP.Method as Method import Data.Int as Int +import Data.List as List import Data.Map as Map import Data.Newtype (wrap) import Data.Set as Set import Data.String (joinWith) -import Data.Traversable (sequence) +import Data.Traversable (sequence, traverse_) import Effect.Aff as Aff +import Effect.Aff.AVar as AVar import Effect.Ref as Ref import Node.Buffer as Buffer import Node.Encoding as Encoding @@ -204,11 +206,23 @@ run { packages: packagesRequestedToInstall, ensureRanges, isTest, isRepl } = do fetchPackagesToLocalCache :: ∀ a. Map PackageName Package -> Spago (FetchEnv a) Unit fetchPackagesToLocalCache packages = do { offline } <- ask + -- Before starting to fetch packages we build a Map of AVars to act as locks for each git location. + -- This is so we don't have two threads trying to clone the same repo at the same time. + gitLocks <- liftAff $ map (Map.fromFoldable <<< List.catMaybes) $ for (Map.values packages) case _ of + GitPackage gitPackage -> (Just <<< Tuple gitPackage.git) <$> AVar.new unit + _ -> pure Nothing parallelise $ packages # Map.toUnfoldable <#> \(Tuple name package) -> do let localPackageLocation = Config.getPackageLocation name package -- first of all, we check if we have the package in the local cache. If so, we don't even do the work unlessM (FS.exists localPackageLocation) case package of - GitPackage gitPackage -> getGitPackageInLocalCache name gitPackage + GitPackage gitPackage -> do + -- for git repos it's a little more involved since cloning them takes a while and we risk race conditions + -- and possibly cloning the same repo multiple times - so we use a lock on the git url to prevent that + let lock = Map.lookup gitPackage.git gitLocks + -- Take the lock, do the git thing, release the lock + liftAff $ AVar.take `traverse_` lock + getGitPackageInLocalCache name gitPackage + liftAff $ AVar.put unit `traverse_` lock RegistryVersion v -> do -- if the version comes from the registry then we have a longer list of things to do let versionString = Registry.Version.print v diff --git a/test-fixtures/monorepo/1208-no-double-cloning/expected-stderr/lockfile-up-to-date.txt b/test-fixtures/monorepo/1208-no-double-cloning/expected-stderr/lockfile-up-to-date.txt new file mode 100644 index 000000000..8a7c9f69c --- /dev/null +++ b/test-fixtures/monorepo/1208-no-double-cloning/expected-stderr/lockfile-up-to-date.txt @@ -0,0 +1,17 @@ +Reading Spago workspace configuration... + +✓ Selecting package to build: consumer + +Downloading dependencies... +Cloning +Building... +purs compile... +purs compile... +purs compile... +purs compile... +purs compile... + Src Lib All +Warnings 0 0 0 +Errors 0 0 0 + +✓ Build succeeded. diff --git a/test/Spago/Build/Monorepo.purs b/test/Spago/Build/Monorepo.purs index d7115c949..01828c011 100644 --- a/test/Spago/Build/Monorepo.purs +++ b/test/Spago/Build/Monorepo.purs @@ -5,6 +5,8 @@ import Test.Prelude import Data.Array as Array import Data.String (Pattern(..)) import Data.String as String +import Data.String.Regex as Regex +import Data.String.Regex.Flags as Regex.Flags import Effect.Aff (bracket) import Node.Path as Path import Node.Process as Process @@ -254,19 +256,20 @@ spec = Spec.describe "monorepo" do Spec.it "#1208: clones a monorepo only once, even if multiple packages from it are needed" \{ spago, fixture, testCwd } -> do -- A local file system Git repo to use as a remote for Spago to clone from - let createLibraryRepo = do - let libRepo = Path.concat [ Paths.paths.temp, "spago-1208" ] - whenM (FS.exists libRepo) $ rmRf libRepo - FS.copyTree { src: fixture "monorepo/1208-no-double-cloning/library", dst: libRepo } - git_ libRepo [ "init" ] - git_ libRepo [ "add", "." ] - git_ libRepo [ "config", "--global", "core.longpaths", "true" ] - git_ libRepo [ "config", "user.name", "test-user" ] - git_ libRepo [ "config", "user.email", "test-user@aol.com" ] - git_ libRepo [ "commit", "-m", "Initial commit" ] - git_ libRepo [ "tag", "v1" ] - git_ libRepo [ "tag", "v2" ] - pure libRepo + let + createLibraryRepo = do + let libRepo = Path.concat [ Paths.paths.temp, "spago-1208" ] + whenM (FS.exists libRepo) $ rmRf libRepo + FS.copyTree { src: fixture "monorepo/1208-no-double-cloning/library", dst: libRepo } + git_ libRepo [ "init" ] + git_ libRepo [ "add", "." ] + git_ libRepo [ "config", "--global", "core.longpaths", "true" ] + git_ libRepo [ "config", "user.name", "test-user" ] + git_ libRepo [ "config", "user.email", "test-user@aol.com" ] + git_ libRepo [ "commit", "-m", "Initial commit" ] + git_ libRepo [ "tag", "v1" ] + git_ libRepo [ "tag", "v2" ] + pure libRepo bracket createLibraryRepo rmRf \libRepo -> do let @@ -302,7 +305,11 @@ spec = Spec.describe "monorepo" do { stdoutFile: Nothing , stderrFile: Just $ fixture expectedFixture , result - , sanitize: String.trim >>> String.replaceAll (String.Pattern libRepo) (String.Replacement "") + , sanitize: + String.replaceAll (String.Pattern "\r\n") (String.Replacement "\n") + >>> String.replaceAll (String.Pattern libRepo) (String.Replacement "") + >>> Regex.replace (unsafeFromRight $ Regex.regex "^purs compile: .*$" (Regex.Flags.global <> Regex.Flags.multiline)) "purs compile..." + >>> String.trim } -- First run `spago install` to make sure global cache is populated, @@ -348,6 +355,12 @@ spec = Spec.describe "monorepo" do shouldBeSuccessErr' "monorepo/1208-no-double-cloning/expected-stderr/four-deps.txt" assertRefCheckedOut "lib4" "v4" + -- Lockfile test: when it's up to date but the cache is not populated (i.e. a fresh clone) + -- then there are no double clones. This is a regression test for #1206 + spago [ "build" ] >>= shouldBeSuccess + rmRf ".spago" + spago [ "build" ] >>= shouldBeSuccessErr' "monorepo/1208-no-double-cloning/expected-stderr/lockfile-up-to-date.txt" + where git_ cwd = void <<< git cwd