forked from malaschitz/randomForest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
isolationForest.go
107 lines (100 loc) · 2.49 KB
/
isolationForest.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package randomforest
import (
"math"
"math/rand"
)
//Forest je base class for whole forest with database, properties of Forest and trees.
type IsolationForest struct {
X [][]float64
Features int // number of attributes
NTrees int // number of trees
NSize int // len of data
Sample int //sample size
Results [][2]int //results - sum of depths and counts for every data
}
// Train run training process. Parameter is number of calculated trees.
func (forest *IsolationForest) Train(trees int) {
forest.defaults()
forest.NTrees = trees
forest.Results = make([][2]int, len(forest.X))
for i := 0; i < len(forest.X); i++ {
forest.Results[i] = [2]int{0, 0}
}
forest.buildNewTrees(0, trees)
}
func (forest *IsolationForest) buildNewTrees(firstIndex int, trees int) {
// constrain parallelism, use buffered channel as counting semaphore
s := make(chan bool, NumWorkers)
for i := 0; i < trees; i++ {
s <- true
go func(j int) {
defer func() { <-s }()
forest.newTree(j)
}(firstIndex + i)
}
// wait for all trees to finish
for i := 0; i < NumWorkers; i++ {
s <- true
}
}
func (forest *IsolationForest) defaults() {
forest.NSize = len(forest.X)
forest.Features = len(forest.X[0])
forest.Sample = 256
if forest.Sample > forest.NSize {
forest.Sample = forest.NSize
}
}
// Calculate a new tree in forest.
func (forest *IsolationForest) newTree(index int) {
//random samples
samples := map[int]bool{}
for {
samples[rand.Intn(forest.NSize)] = true
if len(samples) == forest.Sample {
break
}
}
forest.branch(samples, 0)
}
func (forest *IsolationForest) branch(samples map[int]bool, depth int) {
if len(samples) > 1 {
for i := 0; i < forest.Features; i++ {
feature := rand.Intn(forest.Features)
min := math.MaxFloat64
max := -math.MaxFloat64
for k := range samples {
if forest.X[k][feature] < min {
min = forest.X[k][feature]
}
if forest.X[k][feature] > max {
max = forest.X[k][feature]
}
}
if min != max { //found a new split
splitValue := rand.Float64()*(max-min) + min
sml0 := map[int]bool{}
sml1 := map[int]bool{}
for k := range samples {
if forest.X[k][feature] < splitValue {
sml0[k] = true
} else {
sml1[k] = true
}
}
forest.branch(sml0, depth+1)
forest.branch(sml1, depth+1)
return
}
}
}
//end of branching
mux.Lock()
for k := range samples {
s := forest.Results[k]
s[0] = s[0] + 1
s[1] = s[1] + depth
forest.Results[k] = s
}
mux.Unlock()
}