forked from ryanbressler/CloudForest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeatureinterfaces.go
78 lines (71 loc) · 2.43 KB
/
featureinterfaces.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package CloudForest
import ()
const maxExhaustiveCats = 5
const maxNonRandomExahustive = 10
const maxNonBigCats = 30
const minImp = 1e-12
//Feature contains all methods needed for a predictor feature.
type Feature interface {
NCats() (n int)
Length() (l int)
GetStr(i int) (value string)
IsMissing(i int) bool
GoesLeft(i int, splitter *Splitter) bool
PutMissing(i int)
SplitImpurity(l []int, r []int, counter *[]int) (impurityDecrease float64)
Impurity(cases *[]int, counter *[]int) (impurity float64)
FindPredicted(cases []int) (pred string)
BestSplit(target Target,
cases *[]int,
parentImp float64,
leafSize int,
allocs *BestSplitAllocs) (codedSplit interface{}, impurityDecrease float64)
DecodeSplit(codedSplit interface{}) (s *Splitter)
ShuffledCopy() (fake Feature)
Copy() (copy Feature)
Shuffle()
ImputeMissing()
GetName() string
}
//NumFeature contains the methods of Feature plus methods needed to implement
//diffrent types of regression. It is usually embeded by regression targets to
//provide access to the underlying data.
type NumFeature interface {
Feature
Get(i int) float64
Put(i int, v float64)
Predicted(cases *[]int) float64
Mean(cases *[]int) float64
Norm(i int, v float64) float64
Error(cases *[]int, predicted float64) (e float64)
}
//CatFeature contains the methods of Feature plus methods needed to implement
//diffrent types of classification. It is usually embeded by classification targets to
//provide access to the underlying data.
type CatFeature interface {
Feature
CatToNum(value string) (numericv int)
NumToCat(i int) (value string)
Geti(i int) int
Puti(i int, v int)
Modei(cases *[]int) int
PutStr(i int, v string)
Mode(cases *[]int) string
Gini(cases *[]int) float64
GiniWithoutAlocate(cases *[]int, counts *[]int) (e float64)
}
//Target abstracts the methods needed for a feature to be predictable
//as either a catagroical or numerical feature in a random forest.
type Target interface {
NCats() (n int)
SplitImpurity(l []int, r []int, counter *[]int) (impurityDecrease float64)
Impurity(cases *[]int, counter *[]int) (impurity float64)
FindPredicted(cases []int) (pred string)
}
//BoostingTarget augments Target with a "Boost" method that will be called after each
//tree is grown with the partion generated by that tree. It will return the weigh the
//tree should be given and boost the target for the next tree.
type BoostingTarget interface {
Target
Boost(partition *[][]int) (weight float64)
}