-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
72 lines (67 loc) · 1.67 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from copy import deepcopy
import json
import os
import uuid
BASE = {
"Source": {
"Identifier": {
"ID": "<ZenodoDOI>",
"IDScheme": "doi"
},
"Type": {
"Name": "unknown"
}
},
"RelationshipType": {
"Name": "IsRelatedTo",
"SubType": "IsIdenticalTo",
"SubTypeSchema": "DataCite"
},
"Target": {
"Identifier": {
"ID": "<ID>",
"IDScheme": "<Scheme>"
},
"Type": {
"Name": "unknown"
}
},
"LinkPublicationDate": "2018-01-01",
"LinkProvider": [
{
"Name": "Zenodo"
}
]
}
VERSION = deepcopy(BASE)
VERSION['RelationshipType'] = {
'Name': 'IsRelatedTo',
'SubType': 'HasVersion',
'SubTypeSchema': 'DataCite',
}
IDENTITY = deepcopy(BASE)
IDENTITY['RelationshipType'] = {
"Name": "IsRelatedTo",
"SubType": "IsIdenticalTo",
"SubTypeSchema": "DataCite"
}
CITES = deepcopy(BASE)
CITES['RelationshipType'] = {'Name': 'References'}
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
def write_payloads(prefix, iterable, size=None, chunk_size=100):
"""
prefix = 'events/zenodo/version'
"""
# 5 leading zeros will fit <10 million objects (in chunks of 100)
try:
size = len(iterable)
except Exception:
pass
leading_zeros = str(len(str(size // chunk_size))) if size else '5'
filename_fmt = '{0:0' + leading_zeros + 'd}.json'
for idx, chunk in enumerate(chunks(iterable, chunk_size)):
with open(os.path.join(prefix, filename_fmt.format(idx)), 'w') as fp:
json.dump(list(chunk), fp, indent=2)