go: break out data handling funcs to its own file
This commit is contained in:
parent
938adb802d
commit
2b0ee8c693
103
data.go
Normal file
103
data.go
Normal file
@ -0,0 +1,103 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"golang.org/x/exp/rand"
|
||||
|
||||
"gonum.org/v1/gonum/stat/distuv"
|
||||
)
|
||||
|
||||
// prepData generates and saves random data (along with some known outliers) to
|
||||
// a file in CSV format.
|
||||
func prepData(path string) error {
|
||||
log.Println("generating data")
|
||||
|
||||
data := genData(true, randomDataSize, -1.0, 1.0)
|
||||
|
||||
log.Println("generating data - done")
|
||||
log.Printf("saving data to file at '%s'\n", path)
|
||||
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
log.Printf("could not save data to file at '%s'\n", path)
|
||||
return err
|
||||
}
|
||||
|
||||
defer f.Close()
|
||||
|
||||
w := csv.NewWriter(f)
|
||||
|
||||
defer w.Flush()
|
||||
|
||||
log.Println("writing data")
|
||||
|
||||
err = w.WriteAll(data)
|
||||
if err != nil {
|
||||
log.Println("error writing data")
|
||||
return err
|
||||
}
|
||||
|
||||
log.Println("writing data - done")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// genData generates new random data with either normal or uniform
|
||||
// distribution. if normal is set, normal distribution is set with sigma and mu
|
||||
// values corresponding to the standard normal distribution and min/max values
|
||||
// are ignored.
|
||||
func genData(normal bool, size int, min, max float64) [][]string {
|
||||
col1 := make([]float64, size)
|
||||
col2 := make([]float64, size)
|
||||
|
||||
switch {
|
||||
case !normal:
|
||||
uniform := &distuv.Uniform{
|
||||
Min: min,
|
||||
Max: max,
|
||||
Src: rand.NewSource(uint64(
|
||||
time.Now().UnixNano(),
|
||||
)),
|
||||
}
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
col1[i] = uniform.Rand()
|
||||
col2[i] = uniform.Rand()
|
||||
}
|
||||
|
||||
case normal:
|
||||
stdnorm := &distuv.Normal{
|
||||
Sigma: 1,
|
||||
Mu: 0,
|
||||
Src: rand.NewSource(uint64(
|
||||
time.Now().UnixNano(),
|
||||
)),
|
||||
}
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
col1[i] = stdnorm.Rand()
|
||||
col2[i] = stdnorm.Rand()
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < outliers; i++ {
|
||||
col1 = append(col1, float64(rand.Int63()))
|
||||
col2 = append(col2, float64(rand.Int63()))
|
||||
}
|
||||
|
||||
data := make([][]string, 0, size+outliers)
|
||||
|
||||
for i := 0; i < size+outliers; i++ {
|
||||
r1 := strconv.FormatFloat(col1[i], 'f', -1, 64)
|
||||
r2 := strconv.FormatFloat(col2[i], 'f', -1, 64)
|
||||
|
||||
data = append(data, []string{r1, r2})
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
95
main.go
95
main.go
@ -1,17 +1,12 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/sjwhitworth/golearn/base"
|
||||
"github.com/sjwhitworth/golearn/trees"
|
||||
"golang.org/x/exp/rand"
|
||||
"gonum.org/v1/gonum/stat/distuv"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -93,93 +88,3 @@ func loadData() (*base.DenseInstances, error) {
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// prepData generates and saves random data (along with some known outliers) to
|
||||
// a file in CSV format.
|
||||
func prepData(path string) error {
|
||||
log.Println("generating data")
|
||||
|
||||
data := genData(true, randomDataSize, -1.0, 1.0)
|
||||
|
||||
log.Println("generating data - done")
|
||||
log.Printf("saving data to file at '%s'\n", path)
|
||||
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
log.Printf("could not save data to file at '%s'\n", path)
|
||||
return err
|
||||
}
|
||||
|
||||
defer f.Close()
|
||||
|
||||
w := csv.NewWriter(f)
|
||||
|
||||
defer w.Flush()
|
||||
|
||||
log.Println("writing data")
|
||||
|
||||
err = w.WriteAll(data)
|
||||
if err != nil {
|
||||
log.Println("error writing data")
|
||||
return err
|
||||
}
|
||||
|
||||
log.Println("writing data - done")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// genData generates new random data with either normal or uniform
|
||||
// distribution. if normal is set, normal distribution is set with sigma and mu
|
||||
// values corresponding to the standard normal distribution and min/max values
|
||||
// are ignored.
|
||||
func genData(normal bool, size int, min, max float64) [][]string {
|
||||
col1 := make([]float64, size)
|
||||
col2 := make([]float64, size)
|
||||
|
||||
switch {
|
||||
case !normal:
|
||||
uniform := &distuv.Uniform{
|
||||
Min: min,
|
||||
Max: max,
|
||||
Src: rand.NewSource(uint64(
|
||||
time.Now().UnixNano(),
|
||||
)),
|
||||
}
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
col1[i] = uniform.Rand()
|
||||
col2[i] = uniform.Rand()
|
||||
}
|
||||
|
||||
case normal:
|
||||
stdnorm := &distuv.Normal{
|
||||
Sigma: 1,
|
||||
Mu: 0,
|
||||
Src: rand.NewSource(uint64(
|
||||
time.Now().UnixNano(),
|
||||
)),
|
||||
}
|
||||
|
||||
for i := 0; i < size; i++ {
|
||||
col1[i] = stdnorm.Rand()
|
||||
col2[i] = stdnorm.Rand()
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < outliers; i++ {
|
||||
col1 = append(col1, float64(rand.Int63()))
|
||||
col2 = append(col2, float64(rand.Int63()))
|
||||
}
|
||||
|
||||
data := make([][]string, 0, size+outliers)
|
||||
|
||||
for i := 0; i < size+outliers; i++ {
|
||||
r1 := strconv.FormatFloat(col1[i], 'f', -1, 64)
|
||||
r2 := strconv.FormatFloat(col2[i], 'f', -1, 64)
|
||||
|
||||
data = append(data, []string{r1, r2})
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user