-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathgithub-backup.hs
472 lines (411 loc) · 14.6 KB
/
github-backup.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
{- github-backup
-
- Copyright 2012 Joey Hess <[email protected]>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Main where
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Either
import System.Environment
import Control.Exception (bracket, try, SomeException)
import Text.Show.Pretty
import Control.Monad.State.Strict
import qualified Github.Data.Readable as Github
import qualified Github.Repos as Github
import qualified Github.Repos.Forks as Github
import qualified Github.PullRequests as Github
import qualified Github.Repos.Watching as Github
import qualified Github.Issues as Github
import qualified Github.Issues.Comments
import qualified Github.Issues.Milestones
import Common
import Utility.State
import qualified Git
import qualified Git.Construct
import qualified Git.Config
import qualified Git.Types
import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
-- A github user and repo.
data GithubUserRepo = GithubUserRepo String String
deriving (Eq, Show, Read, Ord)
toGithubUserRepo :: Github.Repo -> GithubUserRepo
toGithubUserRepo r = GithubUserRepo
(Github.githubUserLogin $ Github.repoOwner r)
(Github.repoName r)
repoUrl :: GithubUserRepo -> String
repoUrl (GithubUserRepo user remote) =
"git://github.com/" ++ user ++ "/" ++ remote ++ ".git"
repoWikiUrl :: GithubUserRepo -> String
repoWikiUrl (GithubUserRepo user remote) =
"git://github.com/" ++ user ++ "/" ++ remote ++ ".wiki.git"
-- A name for a github api call.
type ApiName = String
-- A request to make of github. It may have an extra parameter.
data Request = RequestSimple ApiName GithubUserRepo
| RequestNum ApiName GithubUserRepo Int
deriving (Eq, Show, Read, Ord)
requestRepo :: Request -> GithubUserRepo
requestRepo (RequestSimple _ repo) = repo
requestRepo (RequestNum _ repo _) = repo
requestName :: Request -> String
requestName (RequestSimple name _) = name
requestName (RequestNum name _ _) = name
data BackupState = BackupState
{ failedRequests :: S.Set Request
, retriedRequests :: S.Set Request
, gitRepo :: Git.Repo
}
{- Our monad. -}
newtype Backup a = Backup { runBackup :: StateT BackupState IO a }
deriving (
Monad,
MonadState BackupState,
MonadIO,
Functor,
Applicative
)
inRepo :: (Git.Repo -> IO a) -> Backup a
inRepo a = liftIO . a =<< getState gitRepo
failedRequest :: Request -> Github.Error-> Backup ()
failedRequest req e = unless (ignorable e) $ do
set <- getState failedRequests
changeState $ \s -> s { failedRequests = S.insert req set }
where
ignorable (Github.JsonError m) =
"disabled for this repo" `isInfixOf` m
ignorable _ = False
runRequest :: Request -> Backup ()
runRequest req = do
-- avoid re-running requests that were already retried
retried <- getState retriedRequests
unless (S.member req retried) $
(lookupApi req) req
type Storer = Request -> Backup ()
data ApiListItem = ApiListItem ApiName Storer Bool
apiList :: [ApiListItem]
apiList =
[ ApiListItem "userrepo" userrepoStore True
, ApiListItem "watchers" watchersStore True
, ApiListItem "pullrequests" pullrequestsStore True
, ApiListItem "pullrequest" pullrequestStore False
, ApiListItem "milestones" milestonesStore True
, ApiListItem "issues" issuesStore True
, ApiListItem "issuecomments" issuecommentsStore False
-- comes last because it recurses on to the forks
, ApiListItem "forks" forksStore True
]
{- Map of Github api calls we can make to store their data. -}
api :: M.Map ApiName Storer
api = M.fromList $ map (\(ApiListItem n s _) -> (n, s)) apiList
{- List of toplevel api calls that are followed to get all data. -}
toplevelApi :: [ApiName]
toplevelApi = map (\(ApiListItem n _ _) -> n) $
filter (\(ApiListItem _ _ toplevel) -> toplevel) apiList
lookupApi :: Request -> Storer
lookupApi req = fromMaybe bad $ M.lookup name api
where
name = requestName req
bad = error $ "internal error: bad api call: " ++ name
userrepoStore :: Storer
userrepoStore = simpleHelper Github.userRepo $ \req r -> do
when (Github.repoHasWiki r == Just True) $
updateWiki $ toGithubUserRepo r
store "repo" req r
watchersStore :: Storer
watchersStore = simpleHelper Github.watchersFor $ storeSorted "watchers"
pullrequestsStore :: Storer
pullrequestsStore = simpleHelper Github.pullRequestsFor $
forValues $ \req r -> do
let repo = requestRepo req
let n = Github.pullRequestNumber r
runRequest $ RequestNum "pullrequest" repo n
pullrequestStore :: Storer
pullrequestStore = numHelper Github.pullRequest $ \n ->
store ("pullrequest" </> show n)
milestonesStore :: Storer
milestonesStore = simpleHelper Github.Issues.Milestones.milestones $
forValues $ \req m -> do
let n = Github.milestoneNumber m
store ("milestone" </> show n) req m
issuesStore :: Storer
issuesStore = withHelper Github.issuesForRepo [] $ forValues $ \req i -> do
let repo = requestRepo req
let n = Github.issueNumber i
store ("issue" </> show n) req i
runRequest (RequestNum "issuecomments" repo n)
issuecommentsStore :: Storer
issuecommentsStore = numHelper Github.Issues.Comments.comments $ \n ->
forValues $ \req c -> do
let i = Github.issueCommentId c
store ("issue" </> show n ++ "_comment" </> show i) req c
forksStore :: Storer
forksStore = simpleHelper Github.forksFor $ \req fs -> do
storeSorted "forks" req fs
mapM_ (traverse . toGithubUserRepo) fs
where
traverse fork = whenM (addFork fork) $
gatherMetaData fork
forValues :: (Request -> v -> Backup ()) -> Request -> [v] -> Backup ()
forValues handle req vs = forM_ vs (handle req)
type ApiCall v = String -> String -> IO (Either Github.Error v)
type ApiWith v b = String -> String -> b -> IO (Either Github.Error v)
type ApiNum v = ApiWith v Int
type Handler v = Request -> v -> Backup ()
type Helper = Request -> Backup ()
simpleHelper :: ApiCall v -> Handler v -> Helper
simpleHelper call handle req@(RequestSimple _ (GithubUserRepo user repo)) =
either (failedRequest req) (handle req) =<< liftIO (call user repo)
simpleHelper _ _ r = badRequest r
withHelper :: ApiWith v b -> b -> Handler v -> Helper
withHelper call b handle req@(RequestSimple _ (GithubUserRepo user repo)) =
either (failedRequest req) (handle req) =<< liftIO (call user repo b)
withHelper _ _ _ r = badRequest r
numHelper :: ApiNum v -> (Int -> Handler v) -> Helper
numHelper call handle req@(RequestNum _ (GithubUserRepo user repo) num) =
either (failedRequest req) (handle num req) =<< liftIO (call user repo num)
numHelper _ _ r = badRequest r
badRequest :: Request -> a
badRequest r = error $ "internal error: bad request type " ++ show r
store :: Show a => FilePath -> Request -> a -> Backup ()
store filebase req val = do
file <- location (requestRepo req) <$> workDir
liftIO $ do
createDirectoryIfMissing True (parentDir file)
writeFile file (ppShow val)
where
location (GithubUserRepo user repo) workdir =
workdir </> user ++ "_" ++ repo </> filebase
workDir :: Backup FilePath
workDir = (</>)
<$> (Git.gitDir <$> getState gitRepo)
<*> pure "github-backup.tmp"
storeSorted :: Ord a => Show a => FilePath -> Request -> [a] -> Backup ()
storeSorted file req val = store file req (sort val)
gitHubRepos :: Backup [Git.Repo]
gitHubRepos = fst . unzip . gitHubPairs <$> getState gitRepo
gitHubRemotes :: Backup [GithubUserRepo]
gitHubRemotes = snd . unzip . gitHubPairs <$> getState gitRepo
gitHubPairs :: Git.Repo -> [(Git.Repo, GithubUserRepo)]
gitHubPairs = filter (not . wiki ) . mapMaybe check . Git.Types.remotes
where
check [email protected] { Git.Types.location = Git.Types.Url u } =
headMaybe $ mapMaybe (checkurl r $ show u) gitHubUrlPrefixes
check _ = Nothing
checkurl r u prefix
| prefix `isPrefixOf` u && length bits == 2 =
Just (r,
GithubUserRepo (bits !! 0)
(dropdotgit $ bits !! 1))
| otherwise = Nothing
where
rest = drop (length prefix) u
bits = split "/" rest
dropdotgit s
| ".git" `isSuffixOf` s = take (length s - length ".git") s
| otherwise = s
wiki (_, GithubUserRepo _ u) = ".wiki" `isSuffixOf` u
{- All known prefixes for urls to github repos. -}
gitHubUrlPrefixes :: [String]
gitHubUrlPrefixes =
[ "[email protected]:"
, "git://github.com/"
, "https://github.com/"
, "http://github.com/"
, "ssh://[email protected]/~/"
]
onGithubBranch :: Git.Repo -> IO () -> IO ()
onGithubBranch r a = bracket prep cleanup (const a)
where
prep = do
oldbranch <- Git.Branch.current r
when (oldbranch == Just branchref) $
error $ "it's not currently safe to run github-backup while the " ++
branchname ++ " branch is checked out!"
exists <- Git.Ref.matching branchref r
if null exists
then checkout [Param "--orphan", Param branchname]
else checkout [Param branchname]
return oldbranch
cleanup Nothing = return ()
cleanup (Just oldbranch)
| name == branchname = return ()
| otherwise = checkout [Param "--force", Param name]
where
name = show $ Git.Ref.base oldbranch
checkout params = Git.Command.run "checkout" (Param "-q" : params) r
branchname = "github"
branchref = Git.Ref $ "refs/heads/" ++ branchname
{- Commits all files in the workDir into git, and deletes it. -}
commitWorkDir :: Backup ()
commitWorkDir = do
dir <- workDir
r <- getState gitRepo
let git_false_worktree ps = boolSystem "git" $
[ Param ("--work-tree=" ++ dir)
, Param ("--git-dir=" ++ Git.gitDir r)
] ++ ps
liftIO $ whenM (doesDirectoryExist dir) $ onGithubBranch r $ do
_ <- git_false_worktree [ Param "add", Param "." ]
_ <- git_false_worktree [ Param "commit",
Param "-a", Param "-m", Param "github-backup"]
removeDirectoryRecursive dir
updateWiki :: GithubUserRepo -> Backup ()
updateWiki fork = do
remotes <- Git.remotes <$> getState gitRepo
if any (\r -> Git.remoteName r == Just remote) remotes
then do
_ <- fetchwiki
return ()
else do
-- github often does not really have a wiki,
-- don't bloat config if there is none
unlessM (addRemote remote $ repoWikiUrl fork) $
removeRemote remote
return ()
where
fetchwiki = inRepo $ Git.Command.runBool "fetch" [Param remote]
remote = remoteFor fork
remoteFor (GithubUserRepo user repo) =
"github_" ++ user ++ "_" ++ repo ++ ".wiki"
addFork :: GithubUserRepo -> Backup Bool
addFork fork = do
remotes <- gitHubRemotes
if fork `elem` remotes
then return False
else do
liftIO $ putStrLn $ "New fork: " ++ repoUrl fork
_ <- addRemote (remoteFor fork) (repoUrl fork)
return True
where
remoteFor (GithubUserRepo user repo) =
"github_" ++ user ++ "_" ++ repo
{- Adds a remote, also fetching from it. -}
addRemote :: String -> String -> Backup Bool
addRemote remotename remoteurl =
inRepo $ Git.Command.runBool "remote"
[ Param "add"
, Param "-f"
, Param remotename
, Param remoteurl
]
removeRemote :: String -> Backup ()
removeRemote remotename = do
_ <- inRepo $ Git.Command.runBool "remote"
[ Param "rm"
, Param remotename
]
return ()
{- Fetches from the github remote. Done by github-backup, just because
- it would be weird for a backup to not fetch all available data.
- Even though its real focus is on metadata not stored in git. -}
fetchRepo :: Git.Repo -> Backup Bool
fetchRepo repo = inRepo $
Git.Command.runBool "fetch"
[Param $ fromJust $ Git.Types.remoteName repo]
{- Gathers metadata for the repo. Retuns a list of files written
- and a list that may contain requests that need to be retried later. -}
gatherMetaData :: GithubUserRepo -> Backup ()
gatherMetaData repo = do
liftIO $ putStrLn $ "Gathering metadata for " ++ repoUrl repo ++ " ..."
mapM_ call toplevelApi
where
call name = runRequest $ RequestSimple name repo
storeRetry :: [Request] -> Git.Repo -> IO ()
storeRetry [] r = do
_ <- try $ removeFile (retryFile r) :: IO (Either SomeException ())
return ()
storeRetry retryrequests r = writeFile (retryFile r) (show retryrequests)
loadRetry :: Git.Repo -> IO [Request]
loadRetry r = maybe [] (fromMaybe [] . readish)
<$> catchMaybeIO (readFileStrict (retryFile r))
retryFile :: Git.Repo -> FilePath
retryFile r = Git.gitDir r </> "github-backup.todo"
retry :: Backup (S.Set Request)
retry = do
todo <- inRepo loadRetry
unless (null todo) $ do
liftIO $ putStrLn $
"Retrying " ++ show (length todo) ++
" requests that failed last time..."
mapM_ runRequest todo
retriedfailed <- getState failedRequests
changeState $ \s -> s
{ failedRequests = S.empty
, retriedRequests = S.fromList todo
}
return retriedfailed
summarizeRequests :: [Request] -> [String]
summarizeRequests = go M.empty
where
go m [] = map format $ sort $ map swap $ M.toList m
go m (r:rs) = go (M.insertWith (+) (requestName r) (1 :: Integer) m) rs
format (num, name) = show num ++ "\t" ++ name
swap (a, b) = (b, a)
{- Save all backup data. Files that were written to the workDir are committed.
- Requests that failed are saved for next time. Requests that were retried
- this time and failed are ordered last, to ensure that we don't get stuck
- retrying the same requests and not making progress when run again.
-}
save :: S.Set Request -> Backup ()
save retriedfailed = do
commitWorkDir
failed <- getState failedRequests
let toretry = S.toList failed ++ S.toList retriedfailed
inRepo $ storeRetry toretry
unless (null toretry) $
error $ unlines $
["Backup may be incomplete; " ++
show (length toretry) ++ " requests failed:"
] ++ map (" " ++) (summarizeRequests toretry) ++
[ "Run again later."
]
newState :: Git.Repo -> BackupState
newState = BackupState S.empty S.empty
backupRepo :: Git.Repo -> IO ()
backupRepo repo = evalStateT (runBackup go) . newState =<< Git.Config.read repo
where
go = do
retriedfailed <- retry
remotes <- gitHubPairs <$> getState gitRepo
when (null remotes) $
error "no github remotes found"
forM_ remotes $ \(r, remote) -> do
_ <- fetchRepo r
gatherMetaData remote
save retriedfailed
backupName :: String -> IO ()
backupName name = do
userrepos <- either (const []) id <$>
Github.userRepos name Github.All
orgrepos <- either (const []) id <$>
Github.organizationRepos name
let repos = userrepos ++ orgrepos
when (null repos) $
error $ "No GitHub repositories found for " ++ name
status <- forM repos $ \repo -> do
let dir = Github.repoName repo
unlessM (doesDirectoryExist dir) $ do
putStrLn $ "New repository: " ++ dir
ok <- boolSystem "git"
[ Param "clone"
, Param (Github.repoGitUrl repo)
, Param dir
]
unless ok $ error "clone failed"
try (backupRepo =<< Git.Construct.fromPath dir)
:: IO (Either SomeException ())
unless (null $ lefts status) $
error "Failed to successfully back everything up. Run again later."
usage :: String
usage = "usage: github-backup [username|organization]"
main :: IO ()
main = getArgs >>= go
where
go [] = backupRepo =<< Git.Construct.fromCwd
go (name:[]) = backupName name
go _= error usage