This repository has been archived by the owner on Apr 11, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
git-split-tools.sh
executable file
·552 lines (451 loc) · 15.2 KB
/
git-split-tools.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
#!/bin/bash
#
# git-split-tools.sh -- by Dario Berzano & Alina Grigoras
#
# A set of utilities to help removing permanently files from a Git
# repository.
#
# a print function with colors, on stderr
function prc() (
declare -A color
color=(
[red]="\033[31m"
[yellow]="\033[33m"
[green]="\033[32m"
[blue]="\033[34m"
[magenta]="\033[35m"
[hi]="\033[1m"
)
selcol=${color[$1]:=${color[hi]}}
nocol="\033[m"
echo -e "${selcol}$2${nocol}" >&2
)
# a print function
function pr() (
prc hi "$1"
)
# break if something is wrong
function fatal() {
"$@"
local rv=$?
if [[ $rv != 0 ]] ; then
prc red "this should not happen, aborting:"
prc red " $* --> returned $rv"
exit 10
fi
}
# update remote branches
function updbr() (
fatal cd "$GitRootSplit"
prc yellow 'updating list of remote branches'
fatal git remote update --prune
)
# list remote branches
function lsbr() (
# the git plumbing interface (to be used in scripts):
# https://www.kernel.org/pub/software/scm/git/docs/git.html#_low_level_commands_plumbing
fatal cd "$GitRootSplit"
remote="$1"
prc yellow "listing all available remote branches from remote \"$remote\""
# produce lines to be either piped in shell, or eval'd
# the %(var) is correctly escaped
# this one produces one line per *remote* branch.
git for-each-ref --shell \
--format 'echo %(refname)' \
"refs/remotes/${remote}/" | \
while read Line ; do
if [[ $(eval "$Line") =~ /([^/]*)$ ]] ; then
pr ${BASH_REMATCH[1]}
else
prc red "should not happen, aborting: $Line"
exit 10
fi
done
)
# cleans all: reverts all to a pristine state (just cloned)
function cleanall() (
fatal cd "$GitRootSplit"
prc yellow "cleaning all"
# move to detached
fatal git clean -f -d
fatal git reset --hard remotes/origin/HEAD
fatal git checkout $(git rev-parse HEAD)
# iterates over all refs and deletes them all
# note: we will restore them with "remote update"
while read Cmd ; do
eval "$Cmd"
done < <( git for-each-ref --shell --format 'fatal git update-ref -d %(refname)' )
# move to master
fatal git remote update --prune
fatal git checkout master
fatal git clean -f -d
fatal git reset --hard remotes/origin/HEAD
fatal git pull
prc green "repository restored to a pristine and updated state: now it looks like a fresh clone :-)"
prc blue "tip: use \"gc\" to get rid of dangling commits"
)
# slim a repository
function slimrepo() (
fatal cd "$GitRootSplit"
# ask for confirmation
right_answer='yes, I intend to proceed!'
prc red "you are about to do something potentially catastrophic:"
prc red " - all backups Git has made (refs/original) will be deleted"
prc red " - all remote refs will be deleted (though locally only)"
prc red "you must confirm this operation by typing: \"${right_answer}\""
read -p ':> ' given_answer
fatal [ "$given_answer" == "$right_answer" ]
prc magenta 'removing backups and all remotes'
fatal git for-each-ref --format="%(refname)" refs/original/ refs/remotes | xargs -n 1 git update-ref -d
prc magenta 'expiring all dangling refs'
fatal git reflog expire --expire=now --all
prc magenta 'garbage collecting (might take a while)'
fatal git gc --prune=now
)
# list all files ever written in all remote branches, also the ones not
# currently present in the working directory, also the ones that have
# been deleted
function lsallfiles() (
# list what changed in revision <rev> (wrt/the previous)
# git diff-tree --no-commit-id --name-only -r <rev>
# if run on every commit, it will produce eventually the full list
# of files ever written! note that this is much faster than using
# git ls-files
# list all commits for a branch (no need to check it out)
# git rev-list <branch>
# list all commits in all remote branches
# git rev-list --remotes
regexp="$1"
invert_regexp="$2"
only_root_dir="$3"
ofile="$4"
istmpfile="$5"
prc yellow 'listing all files ever written to Git history in all branches'
if [[ $regexp != '' ]] ; then
prc magenta "showing only entries matching extended regexp: $regexp"
[[ ${invert_regexp} == 1 ]] && prc magenta 'inverting regexp match'
[[ ${only_root_dir} == 1 ]] && prc magenta 'printing only list of dirs under root'
fi
[[ $ofile != '' && $istmpfile != 1 ]] && prc magenta "writing results on stdout and on file: $ofile"
fatal cd "$GitRootSplit"
[[ $invert_regexp == 1 ]] && invert_regexp='-v'
git rev-list --remotes | while read commit ; do
git diff-tree --no-commit-id --name-only -r $commit | \
if [[ "$regexp" != '' ]] ; then
grep $invert_regexp -E "$regexp"
else
cat
fi | \
if [[ $only_root_dir == 1 ]] ; then
grep -oE '^([^/]*)/'
else
cat
fi
done | sort -u | \
if [[ $ofile != '' ]] ; then
tee "$ofile"
else
cat
fi
)
# rewrite history by removing files forever
function rewritehist() (
prc yellow 'rewriting Git history by removing files forever'
fatal cd "$GitRootSplit"
ifile="$1"
if [[ ! -s $ifile ]] ; then
prc red 'input list is empty, aborting'
return 1
fi
prc magenta "removing the following files (args passed as-is to 'git rm'):"
ifile_tmp=$(mktemp /tmp/ali-split-list-XXXXX)
fatal cp "$ifile" "$ifile_tmp"
while read line ; do
pr "$line"
done < <(fatal cat "$ifile_tmp")
remote="$2"
# creates one local branch per remote branch: remote branches are
# taken from the specified remote.
# note that it does not checkout the branches (i.e. it does not
# change the current working directory.
# in order for it to work, we are moving to a "detached head" state
# and it is better to call this command after "cleanall".
prc magenta "checking out all branches from remote \"${remote}\""
fatal git checkout "refs/remotes/${remote}/HEAD" # detached head
while read RefBranch ; do
RefBranch=$(eval "$RefBranch")
if [[ $RefBranch =~ /([^/]*)$ ]] ; then
ShortBranch=${BASH_REMATCH[1]}
else
prc red "malformed refname: $RefBranch - this should not happen, aborting!"
exit 10
fi
[[ $ShortBranch == 'HEAD' ]] && continue
prc yellow "branch: $RefBranch -> $ShortBranch"
fatal git branch --force --track "$ShortBranch" "$RefBranch"
done < <( git for-each-ref --shell --format 'echo %(refname)' "refs/remotes/${remote}" )
# have a look at http://git-scm.com/docs/git-filter-branch
# --index-filter: applies the command to every commit
# --tag-name-filter cat: applies a "dummy" filter to tags: this is
# needed because we want to keep the same tag names on one side,
# but we want them to point to the *refactored* commits as well:
# if we do not provide any --tag-name-filter, tags will be left
# there, pointing to commits that do not exist anymore
# the final --all is the option passed to 'git rev-list' to retrieve
# the list of all commits to mangle. in our case, if local==remote,
# we might as well pass --remotes
# the complicated index-filter string is derived from here:
# http://stackoverflow.com/questions/11393817/bash-read-lines-in-file-into-an-array
# note: empty commits are removed by --prune-empty, but empty merge
# commits will not!
# check the affected branches with:
# git rev-list $( git for-each-ref --format '%(refname)' refs/heads )
# run while keeping your fingers crossed
fatal git filter-branch \
--force \
--index-filter '( echo ; IFS=$'\''\n\r'\'' GLOBIGNORE="*" ary=($(cat '${ifile_tmp}')) ; git rm -r -f --cached --ignore-unmatch "${ary[@]}" )' \
--prune-empty \
--tag-name-filter cat -- $( git for-each-ref --format '%(refname)' refs/heads )
rm -f ${ifile_tmp}
)
# delete all remote references (branches and tags)
# this command is obviously very dangerous and has an interactive
# confirmation prompt
function delremoterefs() (
fatal cd "$GitRootSplit"
remote="$1"
# ask for confirmation
right_answer='yes, I intend to proceed!'
prc red "you are about to do something potentially catastrophic:"
prc red " - delete all remote branches from the remote named \"${remote}\""
prc red " - delete all tags from the remote named \"${remote}\""
prc red "you must confirm this operation by typing: \"${right_answer}\""
read -p ':> ' given_answer
fatal [ "$given_answer" == "$right_answer" ]
# where does HEAD point to?
ref_head=$( git ls-remote "${remote}" HEAD | awk '{ print $1 }' )
if [[ ! $ref_head =~ ^([a-fA-F0-9]+)$ ]] ; then
prc red "cannot get HEAD from \"${remote}\": this should not happen, aborting!"
exit 10
fi
prc magenta "deleting remote tags and branches from \"${remote}\""
while read RemoteRef ; do
if [[ $RemoteRef =~ ^([a-fA-F0-9]*).+(refs/[^/]+/.+)$ ]] ; then
ref_hash="${BASH_REMATCH[1]}"
ref_name="${BASH_REMATCH[2]}"
# skip annotated tags: they are automatically deleted
l=$(( ${#ref_name} - 3 ))
[[ ${ref_name:$l} == '^{}' ]] && continue
# skip current HEAD
if [[ $ref_hash == $ref_head ]] ; then
prc magenta "not deleting from \"${remote}\" reference \"${ref_name}\": it is the current HEAD"
continue
fi
prc blue "deleting from \"${remote}\" reference \"${ref_name}\"..."
fatal git push "${remote}" :"${ref_name}"
else
prc red "malformed remote refname: $ref_name - this should not happen, aborting!"
exit 10
fi
done < <( git ls-remote --heads --tags "${remote}" )
)
# force push all branches and tags
function forcepushall() (
fatal cd "$GitRootSplit"
remote="$1"
# ask for confirmation
right_answer='yes, I intend to proceed!'
prc red "you are about to do something potentially catastrophic:"
prc red " - force pushing all local branches to the remote \"${remote}\""
prc red " - force pushing all tags to the remote \"${remote}\""
prc red "you must confirm this operation by typing: \"${right_answer}\""
read -p ':> ' given_answer
fatal [ "$given_answer" == "$right_answer" ]
while read local_ref ; do
if [[ $local_ref =~ ^([a-fA-F0-9]*).+(refs/[^/]+/(.+))$ ]] ; then
ref_hash="${BASH_REMATCH[1]}"
ref_name="${BASH_REMATCH[2]}"
ref_short="${BASH_REMATCH[3]}"
prc blue "force pushing branch \"${ref_short}\" (${ref_name}) and its tags to remote \"${remote}\"..."
fatal git push -f --follow-tags "${remote}" "${ref_name}:${ref_short}"
else
prc red "malformed local refname: $ref_name - this should not happen, aborting!"
exit 10
fi
done < <( git show-ref --heads )
)
# list all committers and authors along with their emails and a count
function listauth() (
fatal cd "$GitRootSplit"
ofile="$1"
prc magenta "writing list to ${ofile}"
# %an --> GIT_AUTHOR_NAME
# %ae --> GIT_AUTHOR_EMAIL
# %cn --> GIT_COMMITTER_NAME
# %ce --> GIT_COMMITTER_EMAIL
while read commit ; do
git log -1 --no-walk --format="tformat:%cn;%ce%n%an;%ae" $commit
done < <( git rev-list --all ) | sort | uniq -c | sed -e 's/^\s*\([0-9]\+\) /\1;/' | tee -a "${ofile}"
prc magenta "list written to ${ofile}"
)
# rewrite authors according to a mapfile
function rewriteauth() (
fatal cd "$GitRootSplit"
infile="$1"
verbose="$2"
# maps email to author and email
export mapfile="$infile"
export verbose
function _git_auth_map() {
chcomm=' '
chauth=' '
# author
raw=$( grep "^${GIT_AUTHOR_EMAIL};" "$mapfile" 2> /dev/null | head -n1 | cut -d\; -f2,3 )
name=${raw%;*}
email=${raw##*;}
if [[ $raw != '' && $name != '' && $email != '' ]] ; then
chauth='*'
#echo -e "\n\nauthor is changing! $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> --> $name <$email>"
export GIT_AUTHOR_NAME=$name
export GIT_AUTHOR_EMAIL=$email
fi
# committer
if [[ $GIT_AUTHOR_EMAIL != $GIT_COMMITTER_EMAIL ]] ; then
raw=$( grep "^${GIT_COMMITTER_EMAIL};" "$mapfile" 2> /dev/null | head -n1 | cut -d\; -f2,3 )
fi
name=${raw%;*}
email=${raw##*;}
if [[ $raw != '' && $name != '' && $email != '' ]] ; then
chcomm='*'
#echo -e "committer is changing! $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> --> $name <$email>\n\n"
export GIT_COMMITTER_NAME=$name
export GIT_COMMITTER_EMAIL=$email
fi
# messages
if [[ $verbose == 1 ]] ; then
echo
echo "author${chauth}: $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> / committer${chcomm}: $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>"
fi
}
export -f _git_auth_map
# want to test on a bunch of commits?
# --all --> 59820a28155e835bb38f0823ab966c33074fb29a..HEAD (from 598... to HEAD)
fatal git filter-branch \
--force \
--env-filter '_git_auth_map' \
--tag-name-filter cat -- --all
unset _git_auth_map mapfile verbose
)
# nice time formatting
function nicetime() (
t=$1
hr=$(( t / 3600 ))
t=$(( t % 3600 ))
mn=$(( t / 60 ))
t=$(( t % 60 ))
sc=$t
echo "${hr}h ${mn}m ${sc}s"
)
# the main function
function main() (
while [[ $# -gt 0 ]] ; do
case "$1" in
--source)
GitRootSplit="$2"
shift
;;
--regexp)
RegExp="$2"
shift
;;
--file)
File="$2"
shift
;;
--remote)
Remote="$2"
shift
;;
--invert-match)
RegExpInvert=1
;;
--only-root-dir)
OnlyRootDir=1
;;
--verbose)
Verbose=1
;;
lsbr)
do_lsbr=1
;;
updbr)
do_updbr=1
;;
cleanall)
do_cleanall=1
;;
rewritehist)
do_rewritehist=1
;;
rewriteauth)
do_rewriteauth=1
;;
lsallfiles)
do_lsallfiles=1
;;
delremoterefs)
do_delremoterefs=1
;;
listauth)
do_listauth=1
;;
forcepushall)
do_forcepushall=1
;;
slimrepo)
do_slimrepo=1
;;
*)
prc red "not understood: $1"
return 1
;;
esac
shift
done
GitRootSplit=$( cd "$GitRootSplit" ; pwd )
if [[ ! -d "${GitRootSplit}/.git" && ! -f "${GitRootSplit}/HEAD" ]] ; then
prc red 'set the $GitRootSplit var to the original Git source dir'
return 1
fi
if [[ ${File} == '' ]] ; then
File=$( mktemp /tmp/ali-split-list-XXXXX )
TempFile=1
elif [[ ${File:0:1} != '/' ]] ; then
File="${PWD}/${File}"
fi
if [[ ${Remote} == '' ]] ; then
prc yellow "no remote set: defaulting to \"origin\""
Remote='origin'
fi
export GitRootSplit
prc yellow "working on Git source on: $GitRootSplit"
# process actions in right order, and time them
ts_start=$( date --utc +%s )
[[ $do_cleanall == 1 ]] && cleanall
[[ $do_updbr == 1 ]] && updbr
[[ $do_lsbr == 1 ]] && lsbr "$Remote"
[[ $do_listauth == 1 ]] && listauth "$File"
[[ $do_lsallfiles == 1 ]] && lsallfiles "$RegExp" "$RegExpInvert" "$OnlyRootDir" "$File" "$TempFile"
[[ $do_rewritehist == 1 ]] && rewritehist "$File" "$Remote"
[[ $do_rewriteauth == 1 ]] && rewriteauth "$File" "$Verbose"
[[ $do_slimrepo == 1 ]] && slimrepo
[[ $do_delremoterefs == 1 ]] && delremoterefs "$Remote"
[[ $do_forcepushall == 1 ]] && forcepushall "$Remote"
ts_end=$( date --utc +%s )
ts_delta=$(( ts_end - ts_start ))
[[ ${TempFile} == 1 ]] && rm -f "${TempFile}"
prc magenta "time taken by all operations: $( nicetime $ts_delta )"
)
# entry point
main "$@"
exit $?