This repository has been archived by the owner on Sep 6, 2022. It is now read-only.
forked from abusesa/github-backup
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathgithub-backup.py
executable file
·105 lines (82 loc) · 2.72 KB
/
github-backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
import os
import re
import sys
import json
import errno
import argparse
import subprocess
import urllib.parse
import requests
def get_json(url, token):
while True:
response = requests.get(
url, headers={"Authorization": "token {0}".format(token)}
)
response.raise_for_status()
yield response.json()
if "next" not in response.links:
break
url = response.links["next"]["url"]
def check_name(name):
if not re.match(r"^[-\.\w]*$", name):
raise RuntimeError("invalid name '{0}'".format(name))
return name
def mkdir(path):
try:
os.makedirs(path, 0o770)
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
return False
return True
def mirror(repo_name, repo_url, to_path, username, token):
parsed = urllib.parse.urlparse(repo_url)
modified = list(parsed)
modified[1] = "{username}:{token}@{netloc}".format(
username=username, token=token, netloc=parsed.netloc
)
repo_url = urllib.parse.urlunparse(modified)
repo_path = os.path.join(to_path, repo_name)
mkdir(repo_path)
# git-init manual:
# "Running git init in an existing repository is safe."
subprocess.call(["git", "init", "--bare", "--quiet"], cwd=repo_path)
# https://github.com/blog/1270-easier-builds-and-deployments-using-git-over-https-and-oauth:
# "To avoid writing tokens to disk, don't clone."
subprocess.call(
[
"git",
"fetch",
"--force",
"--prune",
"--tags",
repo_url,
"refs/heads/*:refs/heads/*",
],
cwd=repo_path,
)
def main():
parser = argparse.ArgumentParser(description="Backup GitHub repositories")
parser.add_argument("config", metavar="CONFIG", help="a configuration file")
args = parser.parse_args()
with open(args.config, "rb") as f:
config = json.loads(f.read())
owners = config.get("owners")
token = config["token"]
path = os.path.expanduser(config["directory"])
if mkdir(path):
print("Created directory {0}".format(path), file=sys.stderr)
user = next(get_json("https://api.github.com/user", token))
for page in get_json("https://api.github.com/user/repos", token):
for repo in page:
name = check_name(repo["name"])
owner = check_name(repo["owner"]["login"])
clone_url = repo["clone_url"]
if owners and owner not in owners:
continue
owner_path = os.path.join(path, owner)
mkdir(owner_path)
mirror(name, clone_url, owner_path, user["login"], token)
if __name__ == "__main__":
main()