ak0da/main.go

91 lines
1.7 KiB
Go

package main
import (
"fmt"
"log"
"os"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/trees"
)
var (
fname = "data.csv"
outliers = 15
randomDataSize = 10001
)
func main() {
data, err := loadData()
if err != nil {
log.Fatalln(err)
}
log.Printf(
"Train Isolation Forest model (nTrees: %d, maxDepth: %d, subSpace: %d) on the data\n",
1000, 1000, 7500,
)
predictions := train(data)
log.Println("Calculate avg/min scores")
avg := 0.0
minScore := 1.0
for i := 0; i < randomDataSize; i++ {
tmp := predictions[i]
avg += tmp
if tmp < minScore {
minScore = tmp
}
}
fmt.Fprintf(os.Stderr, "\tAverage anomaly score for normal data: %f\n",
avg/float64(randomDataSize),
)
fmt.Fprintf(os.Stderr, "\tMinimum anomaly score for normal data: %f\n",
minScore,
)
// these values should be much higher as comapred to the scores for normal
// data.
fmt.Fprintln(os.Stderr, "\tAnomaly scores for outliers are:")
for i := randomDataSize; i < (randomDataSize + outliers); i++ {
fmt.Fprint(os.Stderr, "\t")
fmt.Fprintln(os.Stderr, predictions[i])
}
log.Println("we're done")
}
func train(data *base.DenseInstances) []float64 {
// get a new Isolation Forest with 700 trees, max depth 700 and each tree
// using 7500 datapoints.
forest := trees.NewIsolationForest(1000, 1000, 7500)
// fit the isolation forest to the data.
forest.Fit(data)
// return predictions.
return forest.Predict(data)
}
func loadData() (*base.DenseInstances, error) {
// generate and save random data, along with known outlier values.
err := prepData(fname)
if err != nil {
return nil, err
}
data, err := base.ParseCSVToInstances(fname, false)
if err != nil {
return nil, err
}
return data, nil
}