-
Notifications
You must be signed in to change notification settings - Fork 4
/
GSheetRead.py
100 lines (84 loc) · 4.72 KB
/
GSheetRead.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from ENCODETools import GetENCODE
from ENCODETools import LoginGSheet
from ENCODETools import FindGSpreadSheet
from ENCODETools import FindGWorkSheet
from ENCODETools import FindGSheetCells
from ENCODETools import WriteJSON
from identity import keys
if __name__ == "__main__":
'''
This script will read in a google spreadsheet of objects and save them to json
'''
json_file = 'import.json'
email = "[email protected]" # Stanford accounts won't work!
password = "abc123"
spreadname = 'Import Test Sheet'
typelist = ['biosample', 'construct', 'fly-donor', 'replicate', 'experiment', 'library', 'document']
# start a spreadsheet login
sheetclient = LoginGSheet(email,password)
# get the spreadsheet id
[spreadid,spreadsheet] = FindGSpreadSheet(sheetclient,spreadname)
# check for data of each potential object type
object_list = []
for workname in typelist:
print workname
# find the worksheet
[workid,worksheet] = FindGWorkSheet(sheetclient,spreadid,workname)
if workid:
# get rows
rows = sheetclient.GetListFeed(spreadid, workid).entry
# get list of compressed header names
headers = rows[0].custom.keys()
# get cells from sheet
cells = FindGSheetCells(sheetclient,spreadid,workid)
# get schema for current object type
object_schema = GetENCODE(('/profiles/' + workname + '.json'),keys)
# for each header name, replace the compressed name with the full name
for colindex,header in enumerate(headers):
headers[colindex] = cells[colindex].content.text
# for each row, load in each key/value pair that has a value assigned
# convert value to schema defined type
for row in rows:
new_object = {u'@type':[workname,u'item']}
for header in headers:
if object_schema[u'properties'].has_key(header):
value = row.custom[header.replace('_','').lower()].text
if value is not None:
# need to fix dates before adding them. Google API does not allow disabling of autoformat.
# use regexp to check for dates (MM/DD/YYYY)
# then format them as we enter them (YYYY-MM-DD)
# if "\u03bc" in value:
# value = unicode(value)
# print value
# print type(value)
if object_schema[u'properties'][header].has_key(u'format') and object_schema[u'properties'][header][u'format'] == 'date':
#print value
date = value.split('/')
if len(date[0]) is 1:
date[0] = '0' + date[0]
if len(date[1]) is 1:
date[1] = '0' + date[1]
value = date[2] + '-' + date[0] + '-' + date[1]
#print value
if object_schema[u'properties'][header][u'type'] == 'string':
new_object.update({header:unicode(value)})
elif object_schema[u'properties'][header][u'type'] == 'integer':
new_object.update({header:int(value)})
elif object_schema[u'properties'][header][u'type'] == 'float':
new_object.update({header:float(value)})
elif object_schema[u'properties'][header][u'type'] == 'array':
value = value.split(', ')
if object_schema[u'properties'][header][u'items'][u'type'] == 'string':
new_object.update({header:value})
elif object_schema[u'properties'][header][u'items'][u'type'] == 'object':
sub_object = dict()
for prop_value_pair in value:
pair = prop_value_pair.split(': ')
sub_object[pair[0]] = pair[1]
new_object.update({header: [sub_object]})
# elif object_schema[u'properties'][header][u'items'][u'type'] is 'integer':
# new_object.update({header:int(value)})
object_list.append(new_object)
# write object to file
WriteJSON(object_list,json_file)
print('Done.')