-
Notifications
You must be signed in to change notification settings - Fork 0
/
Processing.py
51 lines (42 loc) · 1.26 KB
/
Processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
'''
Created on 22 мая 2016 г.
@author: miroslvgoncarenko
'''
import pandas
import numpy as np
import itertools
import operator
def most_common(L):
# get an iterable of (item, iterable) pairs
SL = sorted((x, i) for i, x in enumerate(L))
# print 'SL:', SL
groups = itertools.groupby(SL, key=operator.itemgetter(0))
# auxiliary function to get "quality" for an item
def _auxfun(g):
item, iterable = g
count = 0
min_index = len(L)
for _, where in iterable:
count += 1
min_index = min(min_index, where)
# print 'item %r, count %r, minind %r' % (item, count, min_index)
return count, -min_index
# pick the highest-count/earliest item
return max(groups, key=_auxfun)[0]
data = pandas.read_csv('titanic.csv', index_col='PassengerId')
a = data['Pclass']==1
a = np.count_nonzero(a)
n_female = np.nonzero(data['Sex']=='female')
names = data['Name'].as_matrix()
tt = names[n_female]
for i in range(0,len(tt)):
if 'Mrs.' in tt[i] and '(' in tt[i]:
nm = tt[i].split('(')
tt[i] = nm[1].split(' ')[0]
else:
nm = tt[i].split(', ')
tt[i] = nm[1].split(' ')[1]
tt[i] = tt[i].replace(')','')
print(i)
print(tt[i])
print (most_common(tt))