-
Notifications
You must be signed in to change notification settings - Fork 3
/
csv2csv.py
51 lines (41 loc) · 5.25 KB
/
csv2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/python
# -*- coding:utf-8 -*-
avguser = [25562.4003905848, 0.496289443930517, 0, 0.044750716094483, 0.0760305533646062, 0.122628585661505, 0.078562829507244, 0.0908298393457595, 0.0990701149902445, 0.106500892523558, 0.0865125160861804, 0.0855162107185852, 0.079995018473162, 0.0798082112167379, 0.0301797500934036, 0.012993482502387, 0.00475320685790195, 0.00168126530781685, 0.000166050894599195, 2.07563618248993e-05, 0, 0, 0.269811902559359, 0.279925994449584, 0.0839962997224792, 0.00791448247507452, 0.0540034947065474, 0.104943981909754, 0.00700997019220886, 0.0290471785383904, 0.00450200431699044, 0.00483091787439614, 0.0676739644362216, 0.00789392537773666, 0.039654640764724, 0.0387912426765341, 0.0213793812313701, 0.0585054990235379, 0.243478260869565, 0.344146366533046, 0.0158084078528112, 0.00283687943262411, 0.313845205057046, 0.242368177613321, 0.0213177099393566, 0.342460684551341, 0.0508377017165176, 0.0329735841299209, 0.310042142049543, 0.122088601089526, 0.0859492239695755, 0.139068763490595, 0.218994757940179, 0.193175043683832, 0.0877582485353068, 0.0524000411141947, 0.0298900195292425, 0.0173913043478261, 0.00700997019220886, 0.0117586596772536, 0.00277520814061055, 0.0110391612704286, 0.00117175454825779, 0.00478980367972042, 0.000616712920135677, 0.00115119745091993, 0.00129509713228492, 0.000678384212149245, 0.0109980470757529, 0.119745091993011, 0.0761640456367561, 0.114975845410628, 0.172515160859287, 0.170582793709528, 0.0942337341967314, 0.0700585877274129, 0.0442799876657416, 0.0347826086956522, 0.017124062082434, 0.0304245040600267, 0.0078322540857231, 0.0157878507554733, 0.00197348134443417, 0.00758556891766883, 0.00102785486689279, 0.00195292424709631, 0.00189125295508274, 0.00108952615890636, 0.015972864631514, 0.491135703566656, 0.546244222427793, 0.512844491725768, 0.373091252955083, 0.345854301572618, 0.393336149655669, 0.338453304553397, 0.291617430362833, 0.47831737074725, 0.550110292938637, 0.58636432521328, 0.536658982423682, 0.469626570048309, 0.534464368383184, 0.396645618254702, 0.368254796998664, 0.538262877993627, 0.429791653818481, 0.423440365916333]
avgword = [22.4717711600071, 24637.1264824473, 0.000202872334130734, 0.00489429506090396, 0.00168214977050067, 0.193388052510122, 0.168333319244977, 0.106313556098427, 0.00201181731346312, 0.523173937667475, 0.716764862511729, 0.000380385626495127, 0.00595092180116821, 0.000600163988470089, 0.096601043101918, 0.0129753763704449, 0.0363310538372457, 0.130396192762529, 34.7083009918459, 0.0349164584166808, 0.0187725008490618, 0.0515167651251174, 0.0933043676722936, 0.0220272236749879, 0.0396736829776393, 0.000639605217959256, 0.0297552622332295, 0.0209320325819038, 0.00555446636648828, 0.0654646619584374, 0.0898132729224605, 0.0137901526368622, 0.048486951518646, 0.145609861782486, 0.0661989438476249, 0.145808538084549, 0.0024742622905266, 0.101577501406698, 0.164029331884427, 0.153920930507773, 0.0363881096983869, 0.0921040396953534, 0.0368147187724036, 0.0182852771036537, 0.0352929186053027, 0.146285361209502, 0.00213762796528488, 0.0469770116229619, 0.0665068921158236, 0.106888361045131, 0.103025333682725, 0.120184951944616, 0.0958402718489277, 0.00791314127819944, 0.14099627222086, 0.206685315572159, 0.0714638658523129, 0.040884666806738, 0.0437485217144267, 0.0662690922662844, 0.003534660415038, 0.0782326438491644, 0.0135635854831379, 0.0573201422802906, 0.0230761390880825, 0.0866095806459793, 0.0829473561115948, 0.144208417511264, 0.151875301138621, 0.00154851789611188, 0.191638278628245, 0.0887989112518068, 0.0149624698632621, 0.0401349101021969, 0.0734506288729499, 0.10660862345421, 0.0160492038695778, 0.134681870821041, 0.0297398370065663, 0.0402135659108497, 0.00375347272644512, 0.088739740154352, 0.00292871862960291, 0.0381738108722665, 0.0228743032576158, 0.0219346723150089, 0.0313213190203423, 0.0571790397339329, 0.0497380182291643, 0.0100919570095142, 0.0324546768992819, 0.00852611193895695, 0.0810499974268865, 0.0676387458997592, 0.0044772365257148, 0.0190033882923929, 0.099197849049883, 0.0568313562053214, 0.0354041170277513, 0.0920364155839765, 0.0288601402127466]
userdata = [",".join([str(each) for each in avguser[1:]])]*50928
worddata = [",".join([str(each) for each in avgword[2:]])]*(50*50928)
userfile = open("newusers.csv","r")
userfile.readline()
wordfile = open("newwords.csv","r")
wordfile.readline()
s = userfile.readline()
while s!='':
l = s[:-1].split(',')
userdata[int(l[0])] = ",".join([str(each) for each in l[1:]])
s = userfile.readline()
s = wordfile.readline()
while s!='':
l = s[:-1].split(',')
worddata[int(l[0])+int(l[1])*50] = ",".join([str(each) for each in l[2:]])
s = wordfile.readline()
def train2csv(name):
f = open(name,"r")
g = open(name+".enlarged.csv","w")
f.readline()
l = f.readlines()
for line in l:
(artist,track,user,rating,time) = line[:-1].split(',')
g.write(' '.join([user,artist,track,userdata[int(user)],worddata[int(artist)+50*int(user)]])+'\n')
g.close()
def test2csv(name):
f = open(name,"r")
g = open(name+".enlarged.csv","w")
f.readline()
l = f.readlines()
for line in l:
(artist,track,user,time) = line[:-1].split(',')
g.write(' '.join([user,artist,track,userdata[int(user)],worddata[int(artist)+50*int(user)]])+'\n')
g.close()
if __name__ == '__main__':
train2csv("train.csv")
test2csv("test.csv")