package main import ( "fmt" "log" "os" "github.com/sjwhitworth/golearn/base" "github.com/sjwhitworth/golearn/trees" ) var ( fname = "data.csv" outliers = 15 randomDataSize = 10001 ) func main() { data, err := loadData() if err != nil { log.Fatalln(err) } log.Printf( "Train Isolation Forest model (nTrees: %d, maxDepth: %d, subSpace: %d) on the data\n", 1000, 1000, 7500, ) predictions := train(data) log.Println("Calculate avg/min scores") avg := 0.0 minScore := 1.0 for i := 0; i < randomDataSize; i++ { tmp := predictions[i] avg += tmp if tmp < minScore { minScore = tmp } } fmt.Fprintf(os.Stderr, "\tAverage anomaly score for normal data: %f\n", avg/float64(randomDataSize), ) fmt.Fprintf(os.Stderr, "\tMinimum anomaly score for normal data: %f\n", minScore, ) // these values should be much higher as comapred to the scores for normal // data. fmt.Fprintln(os.Stderr, "\tAnomaly scores for outliers are:") for i := randomDataSize; i < (randomDataSize + outliers); i++ { fmt.Fprint(os.Stderr, "\t") fmt.Fprintln(os.Stderr, predictions[i]) } log.Println("we're done") } func train(data *base.DenseInstances) []float64 { // get a new Isolation Forest with 700 trees, max depth 700 and each tree // using 7500 datapoints. forest := trees.NewIsolationForest(1000, 1000, 7500) // fit the isolation forest to the data. forest.Fit(data) // return predictions. return forest.Predict(data) } func loadData() (*base.DenseInstances, error) { // generate and save random data, along with known outlier values. err := prepData(fname) if err != nil { return nil, err } data, err := base.ParseCSVToInstances(fname, false) if err != nil { return nil, err } return data, nil }