-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
198 lines (173 loc) · 5.15 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
package main
import (
"encoding/csv"
"fmt"
"log"
"model_traning/labelencoder"
"os"
"strconv"
dataframe "github.com/rocketlaunchr/dataframe-go"
base "github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/ensemble"
)
func loadCSV(filePath string) ([][]string, error) {
// Open the CSV file
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()
// Create a new CSV reader
reader := csv.NewReader(file)
// Read all CSV data
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
return records, nil
}
func main() {
// Load CSV file
data, err := loadCSV("data_for_last_mile.csv")
if err != nil {
fmt.Println("Error:", err)
return
}
// Extract headers
headers := data[0]
data = data[1:] // Remove header row from data
// fmt.Println(headers)
// Initialize LabelEncoder for columns with string values
encoders := make([]*labelencoder.LabelEncoder, len(headers))
// Convert string data to int using LabelEncoder for selected columns
var encodedData [][]interface{}
for col := 0; col < len(headers); col++ {
var colValues []interface{}
isString := false
for _, row := range data {
_, errFloat := strconv.ParseFloat(row[col], 64)
_, errInt := strconv.ParseInt(row[col], 10, 64)
if errFloat != nil && errInt != nil {
isString = true
}
colValues = append(colValues, row[col])
}
if isString {
fmt.Println(headers[col])
values := InterfaceToString(colValues)
// Initialize a new LabelEncoder for the column
encoders[col] = labelencoder.NewLabelEncoder()
encoders[col].Fit(values)
encodedCol := encoders[col].Encode(values)
// Fit and transform column data
// encodedCol, _ := encoders[col].FitTransform(nil, NewStringMatrix(len(colValues), 1, colValues))
// Convert encoded column to []int
numRows := len(encodedCol)
colData := make([]interface{}, numRows)
for i := 0; i < numRows; i++ {
colData[i] = encodedCol[i]
}
// fmt.Println(colData)
encodedData = append(encodedData, colData)
} else {
encodedData = append(encodedData, colValues)
}
}
// Print headers
// fmt.Println("Column Headers:")
// fmt.Println(headers)
// fmt.Println("First 5 rows after label encoding:")
transposedData := transpose(encodedData)
// fmt.Println("Column Headers:")
// fmt.Println(headers)
// fmt.Println(transposedData[0:5])
// Create a new DataFrame
series := make([]dataframe.Series, len(headers))
for i, header := range headers {
var s dataframe.Series
// Determine the type of the series based on the type of the first value
switch transposedData[0][i].(type) {
case string:
s = dataframe.NewSeriesFloat64(header, nil)
case int:
s = dataframe.NewSeriesInt64(header, nil)
case float64:
s = dataframe.NewSeriesFloat64(header, nil)
default:
panic(fmt.Sprintf("Unsupported data type for column %s", header))
}
series[i] = s
}
fmt.Println(series)
// Add data to the series
for _, row := range transposedData {
for i, value := range row {
series[i].Append(value)
}
}
// Create a new DataFrame
df := dataframe.NewDataFrame(series...)
// Print the DataFrame
fmt.Println(df)
mlData := base.ConvertDataFrameToInstances(df, 7)
shuffleData := base.Shuffle(mlData)
train, test := base.InstancesTrainTestSplit(shuffleData, 0.2)
rf := ensemble.NewRandomForest(100, 100)
rf.String()
err = rf.Fit(train)
if err != nil {
log.Fatal(err)
}
// Predict on the testing set
predictions, err := rf.Predict(test)
if err != nil {
log.Fatal(err)
}
fmt.Println(train, test)
fmt.Println(predictions)
// rf.Save("random_forest_model")
// Save label encoder models for columns with string values
// for col, encoder := range encoders {
// if encoder != nil {
// err := encoder.Save(fmt.Sprintf("encoder_%s.model", headers[col]))
// if err != nil {
// fmt.Printf("Error saving encoder for column %s: %v\n", headers[col], err)
// } else {
// fmt.Printf("Label encoder for column %s saved successfully\n", headers[col])
// }
// }
// }
}
func InterfaceToString(val []interface{}) []string {
stringSlice := make([]string, len(val))
for i, v := range val {
stringSlice[i] = fmt.Sprintf("%v", v)
}
return stringSlice
}
func transpose(matrix [][]interface{}) [][]interface{} {
if len(matrix) == 0 {
return nil
}
numRows := len(matrix)
numCols := len(matrix[0])
transposed := make([][]interface{}, numCols)
for j := 0; j < numCols; j++ {
transposed[j] = make([]interface{}, numRows)
for i := 0; i < numRows; i++ {
transposed[j][i] = matrix[i][j]
}
}
return transposed
}
func subsetMatrix(superset [][]interface{}, columnsToSelect []int) [][]interface{} {
numRows := len(superset)
subset := make([][]interface{}, numRows)
for i := 0; i < numRows; i++ {
subset[i] = make([]interface{}, len(columnsToSelect))
for j, colIndex := range columnsToSelect {
subset[i][j] = superset[i][colIndex]
}
}
return subset
}