-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjoin.py
67 lines (60 loc) · 1.73 KB
/
join.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
import hashlib
import csv
# join.py will perform a join of csv files based on the first column which is used as a key
# (CSV file can be thought of as a database table.)
# this is very useful when you have data in different CSV files that you'd like to merge or join together
# into one CSV file based on some joining key.
#
# An example is you have a database of registration data with names, birthdates, addresses, etc.
# You also have a database of timing data with names, times, etc.
# You need to have a table with names, birthdates, addresses, AND times.
# You simply set the first column as the joining key, in this case name OR any key that can be used to join and run join.py
#
# Form example,
# %cat 1.csv
# 9,1
# 9,2
# 9,3
# 9,4
# 9,5
# 9,6
# 9,7
# 9,8
# 9,9
#
# %cat registrations.csv
# bill,birthdate,address
#
# %cat timings.csv
# bill,time
#
# %cat 1.csv registrations.csv timings.csv |join.py
#
# Will result in the joined table
# 18,9,1,9,2,9,3,9,4,9,5,9,6,9,7,9,8,9,9
# 5,bill,birthdate,address,bill,time
#
# Also we prepend the number of columns in the row since you will probably want to sort by that next to
# group like records.
#
def main():
writer = csv.writer(sys.stdout,lineterminator='\n')
savekey = None
csvReader = csv.reader(sys.stdin, delimiter=',', quotechar='"')
dict={}
for row in csvReader:
if row[0] in dict:
dict[row[0]].append(row)
else:
dict[row[0]] = [row]
for key in dict:
mergelist=[]
totallen=0
for listitem in dict[key]:
mergelist.extend(listitem)
totallen = totallen + len(listitem)
mergelist.insert(0,totallen)
writer.writerow(mergelist)
if __name__ == '__main__':
main()