go: break out data handling funcs to its own file

This commit is contained in:
leo 2023-05-14 21:30:22 +02:00
parent 938adb802d
commit 2b0ee8c693
Signed by: wanderer
SSH Key Fingerprint: SHA256:Dp8+iwKHSlrMEHzE3bJnPng70I7LEsa3IJXRH/U+idQ
2 changed files with 103 additions and 95 deletions

103
data.go Normal file

@ -0,0 +1,103 @@
package main
import (
"encoding/csv"
"log"
"os"
"strconv"
"time"
"golang.org/x/exp/rand"
"gonum.org/v1/gonum/stat/distuv"
)
// prepData generates and saves random data (along with some known outliers) to
// a file in CSV format.
func prepData(path string) error {
log.Println("generating data")
data := genData(true, randomDataSize, -1.0, 1.0)
log.Println("generating data - done")
log.Printf("saving data to file at '%s'\n", path)
f, err := os.Create(path)
if err != nil {
log.Printf("could not save data to file at '%s'\n", path)
return err
}
defer f.Close()
w := csv.NewWriter(f)
defer w.Flush()
log.Println("writing data")
err = w.WriteAll(data)
if err != nil {
log.Println("error writing data")
return err
}
log.Println("writing data - done")
return nil
}
// genData generates new random data with either normal or uniform
// distribution. if normal is set, normal distribution is set with sigma and mu
// values corresponding to the standard normal distribution and min/max values
// are ignored.
func genData(normal bool, size int, min, max float64) [][]string {
col1 := make([]float64, size)
col2 := make([]float64, size)
switch {
case !normal:
uniform := &distuv.Uniform{
Min: min,
Max: max,
Src: rand.NewSource(uint64(
time.Now().UnixNano(),
)),
}
for i := 0; i < size; i++ {
col1[i] = uniform.Rand()
col2[i] = uniform.Rand()
}
case normal:
stdnorm := &distuv.Normal{
Sigma: 1,
Mu: 0,
Src: rand.NewSource(uint64(
time.Now().UnixNano(),
)),
}
for i := 0; i < size; i++ {
col1[i] = stdnorm.Rand()
col2[i] = stdnorm.Rand()
}
}
for i := 0; i < outliers; i++ {
col1 = append(col1, float64(rand.Int63()))
col2 = append(col2, float64(rand.Int63()))
}
data := make([][]string, 0, size+outliers)
for i := 0; i < size+outliers; i++ {
r1 := strconv.FormatFloat(col1[i], 'f', -1, 64)
r2 := strconv.FormatFloat(col2[i], 'f', -1, 64)
data = append(data, []string{r1, r2})
}
return data
}

95
main.go

@ -1,17 +1,12 @@
package main package main
import ( import (
"encoding/csv"
"fmt" "fmt"
"log" "log"
"os" "os"
"strconv"
"time"
"github.com/sjwhitworth/golearn/base" "github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/trees" "github.com/sjwhitworth/golearn/trees"
"golang.org/x/exp/rand"
"gonum.org/v1/gonum/stat/distuv"
) )
var ( var (
@ -93,93 +88,3 @@ func loadData() (*base.DenseInstances, error) {
return data, nil return data, nil
} }
// prepData generates and saves random data (along with some known outliers) to
// a file in CSV format.
func prepData(path string) error {
log.Println("generating data")
data := genData(true, randomDataSize, -1.0, 1.0)
log.Println("generating data - done")
log.Printf("saving data to file at '%s'\n", path)
f, err := os.Create(path)
if err != nil {
log.Printf("could not save data to file at '%s'\n", path)
return err
}
defer f.Close()
w := csv.NewWriter(f)
defer w.Flush()
log.Println("writing data")
err = w.WriteAll(data)
if err != nil {
log.Println("error writing data")
return err
}
log.Println("writing data - done")
return nil
}
// genData generates new random data with either normal or uniform
// distribution. if normal is set, normal distribution is set with sigma and mu
// values corresponding to the standard normal distribution and min/max values
// are ignored.
func genData(normal bool, size int, min, max float64) [][]string {
col1 := make([]float64, size)
col2 := make([]float64, size)
switch {
case !normal:
uniform := &distuv.Uniform{
Min: min,
Max: max,
Src: rand.NewSource(uint64(
time.Now().UnixNano(),
)),
}
for i := 0; i < size; i++ {
col1[i] = uniform.Rand()
col2[i] = uniform.Rand()
}
case normal:
stdnorm := &distuv.Normal{
Sigma: 1,
Mu: 0,
Src: rand.NewSource(uint64(
time.Now().UnixNano(),
)),
}
for i := 0; i < size; i++ {
col1[i] = stdnorm.Rand()
col2[i] = stdnorm.Rand()
}
}
for i := 0; i < outliers; i++ {
col1 = append(col1, float64(rand.Int63()))
col2 = append(col2, float64(rand.Int63()))
}
data := make([][]string, 0, size+outliers)
for i := 0; i < size+outliers; i++ {
r1 := strconv.FormatFloat(col1[i], 'f', -1, 64)
r2 := strconv.FormatFloat(col2[i], 'f', -1, 64)
data = append(data, []string{r1, r2})
}
return data
}