-
Notifications
You must be signed in to change notification settings - Fork 2
/
feature_reduction.py
71 lines (49 loc) · 1.75 KB
/
feature_reduction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import pandas as pd
import sys
from os import listdir
from os.path import isfile, join
from sklearn.feature_extraction.text import CountVectorizer
import libpmf.libpmf as libpmf
nFeatures = lambda K: int(K / 100 * data.shape[ 1 ]) + 1
TYPE = sys.argv[1]
START = float(sys.argv[2])
STEP = float(sys.argv[3])
lb = 0.1
question_info = pd.read_csv("data/question_info.txt", sep="\t", header=None, names=[
"question_id", "tag", "word_id", "char_id", "upvotes", "answers", "top_answers"
])
user_info = pd.read_csv("data/user_info.txt", sep="\t", header=None, names=[
"user_id", "expert_tags", "word_id", "char_id"
])
vctorizer = CountVectorizer(lambda s: s.split('/'))
if TYPE == "qW":
data = vctorizer.fit_transform(question_info['word_id'])
if TYPE == "qC":
data = vctorizer.fit_transform(question_info['char_id'])
if TYPE == "uW":
data = vctorizer.fit_transform(user_info['word_id'])
if TYPE == "uC":
data = vctorizer.fit_transform(user_info['char_id'])
if TYPE == "uT":
data = vctorizer.fit_transform(user_info['expert_tags'])
def run(K):
print "TESTING: {0}".format(K)
model = libpmf.train(data, '-k {0} -l {1} -t 10 -T 10 -N 1'.format(nFeatures(K), lb))
dMAtrix = data.todense()
factorized = np.dot( model['W'], model['H'].transpose() )
ERROR = np.square(dMAtrix - factorized).mean()
return ERROR
def hill_climbing(k, step, prev_error=np.inf, iter=0, MAX_ITER=1000):
if iter == MAX_ITER:
return k
error = run(k)
if error == prev_error:
return k
if error < prev_error:
return hill_climbing(k+step, step, error, iter+1)
if error > prev_error:
step = float(-step) / 10
return hill_climbing(k+step, step, error, iter+1)
k = hill_climbing(START, STEP)
print "BEST K : {0}".format( nFeatures(k) )