-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathp23.py
140 lines (116 loc) · 3.59 KB
/
p23.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# back testing
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, preprocessing
import pandas as pd
from matplotlib import style
import statistics
style.use("ggplot")
FEATURES = [
'DE Ratio',
'Trailing P/E',
'Price/Sales',
'Price/Book',
'Profit Margin',
'Operating Margin',
'Return on Assets',
'Return on Equity',
'Revenue Per Share',
'Market Cap',
'Enterprise Value',
'Forward P/E',
'PEG Ratio',
'Enterprise Value/Revenue',
'Enterprise Value/EBITDA',
'Revenue',
'Gross Profit',
'EBITDA',
'Net Income Avl to Common ',
'Diluted EPS',
'Earnings Growth',
'Revenue Growth',
'Total Cash',
'Total Cash Per Share',
'Total Debt',
'Current Ratio',
'Book Value Per Share',
'Cash Flow',
'Beta',
'Held by Insiders',
'Held by Institutions',
'Shares Short (as of',
'Short Ratio',
'Short % of Float',
'Shares Short (prior '
]
def Build_Data_Set():
data_df = pd.DataFrame.from_csv("key_stats_acc_perf_WITH_NA.csv")
# data_df = pd.DataFrame.from_csv("key_stats_acc_perf_NO_NA.csv")
# shuffle data:
data_df = data_df.reindex(np.random.permutation(data_df.index))
data_df = data_df.replace("NaN",0).replace("N/A",0)
# data_df = data_df.replace("NaN",-999).replace("N/A",-999)
X = np.array(data_df[FEATURES].values)#.tolist())
y = ( data_df["Status"]
.replace("underperform",0)
.replace("outperform",1)
.values.tolist()
)
X = preprocessing.scale(X)
Z = np.array( data_df[ ["stock_p_change", "sp500_p_change"] ] )
return X,y,Z
def Analysis():
test_size = 1
invest_amount = 10000 # dollars
total_invests = 0
if_market = 0
if_strat = 0
X, y, Z = Build_Data_Set()
print(len(X))
clf = svm.SVC(kernel="linear", C=1.0)
clf.fit(X[:-test_size],y[:-test_size]) # train data
correct_count = 0
for x in range(1, test_size+1):
invest_return = 0
market_return = 0
if clf.predict(X[-x])[0] == y[-x]: # test data
correct_count += 1
if clf.predict(X[-x])[0] == 1:
invest_return = invest_amount + (invest_amount * (Z[-x][0] / 100.0))
market_return = invest_amount + (invest_amount * (Z[-x][1] / 100.0))
total_invests += 1
if_market += market_return
if_strat += invest_return
# print("correct_count=%s"%float(correct_count))
# print("test_size=%s"%float(test_size))
# print('_'*120)
# on OS X with 64-bit python 2.7.6 had to add float(), otherwise result was zero:
# print("Accuracy: %s%%" % ((float(correct_count) / float(test_size)) * 100.00))
# print("Total Trades: %s" % total_invests)
# print("Ending with Strategy: %s" % if_strat)
# print("Ending with Market: %s" % if_market)
# compared = ((if_strat - if_market) / if_market) * 100.0
# do_nothing = total_invests * invest_amount
# avg_market = ((if_market - do_nothing) / do_nothing) * 100.0
# avg_strat = ((if_strat - do_nothing) / do_nothing) * 100.0
# print('*'*120)
# print("Compared to market, we earn: %s%% more" % str(compared))
# print('*'*120)
# print("Average investment return: %s%%" % str(avg_strat))
# print("Average market return: %s%%" % str(avg_market))
# print('_'*120)
# data_df = pd.DataFrame.from_csv("forward_sample_NO_NA.csv")
data_df = pd.DataFrame.from_csv("forward_sample_WITH_NA.csv")
data_df = data_df.replace("NaN",0).replace("N/A",0)
X = np.array(data_df[FEATURES].values)
X = preprocessing.scale(X)
Z = data_df["Ticker"].values.tolist()
invest_list = []
for i in range(len(X)):
p = clf.predict(X[i])[0]
if p == 1:
print(Z[i])
invest_list.append(Z[i])
print(len(invest_list))
print(invest_list)
Analysis()