From 2b0ee8c693dab64cb724b6f781e61c41a961d590 Mon Sep 17 00:00:00 2001 From: leo Date: Sun, 14 May 2023 21:30:22 +0200 Subject: [PATCH] go: break out data handling funcs to its own file --- data.go | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 95 --------------------------------------------------- 2 files changed, 103 insertions(+), 95 deletions(-) create mode 100644 data.go diff --git a/data.go b/data.go new file mode 100644 index 0000000..36aacc7 --- /dev/null +++ b/data.go @@ -0,0 +1,103 @@ +package main + +import ( + "encoding/csv" + "log" + "os" + "strconv" + "time" + + "golang.org/x/exp/rand" + + "gonum.org/v1/gonum/stat/distuv" +) + +// prepData generates and saves random data (along with some known outliers) to +// a file in CSV format. +func prepData(path string) error { + log.Println("generating data") + + data := genData(true, randomDataSize, -1.0, 1.0) + + log.Println("generating data - done") + log.Printf("saving data to file at '%s'\n", path) + + f, err := os.Create(path) + if err != nil { + log.Printf("could not save data to file at '%s'\n", path) + return err + } + + defer f.Close() + + w := csv.NewWriter(f) + + defer w.Flush() + + log.Println("writing data") + + err = w.WriteAll(data) + if err != nil { + log.Println("error writing data") + return err + } + + log.Println("writing data - done") + + return nil +} + +// genData generates new random data with either normal or uniform +// distribution. if normal is set, normal distribution is set with sigma and mu +// values corresponding to the standard normal distribution and min/max values +// are ignored. +func genData(normal bool, size int, min, max float64) [][]string { + col1 := make([]float64, size) + col2 := make([]float64, size) + + switch { + case !normal: + uniform := &distuv.Uniform{ + Min: min, + Max: max, + Src: rand.NewSource(uint64( + time.Now().UnixNano(), + )), + } + + for i := 0; i < size; i++ { + col1[i] = uniform.Rand() + col2[i] = uniform.Rand() + } + + case normal: + stdnorm := &distuv.Normal{ + Sigma: 1, + Mu: 0, + Src: rand.NewSource(uint64( + time.Now().UnixNano(), + )), + } + + for i := 0; i < size; i++ { + col1[i] = stdnorm.Rand() + col2[i] = stdnorm.Rand() + } + } + + for i := 0; i < outliers; i++ { + col1 = append(col1, float64(rand.Int63())) + col2 = append(col2, float64(rand.Int63())) + } + + data := make([][]string, 0, size+outliers) + + for i := 0; i < size+outliers; i++ { + r1 := strconv.FormatFloat(col1[i], 'f', -1, 64) + r2 := strconv.FormatFloat(col2[i], 'f', -1, 64) + + data = append(data, []string{r1, r2}) + } + + return data +} diff --git a/main.go b/main.go index 3ef785d..bad0a48 100644 --- a/main.go +++ b/main.go @@ -1,17 +1,12 @@ package main import ( - "encoding/csv" "fmt" "log" "os" - "strconv" - "time" "github.com/sjwhitworth/golearn/base" "github.com/sjwhitworth/golearn/trees" - "golang.org/x/exp/rand" - "gonum.org/v1/gonum/stat/distuv" ) var ( @@ -93,93 +88,3 @@ func loadData() (*base.DenseInstances, error) { return data, nil } - -// prepData generates and saves random data (along with some known outliers) to -// a file in CSV format. -func prepData(path string) error { - log.Println("generating data") - - data := genData(true, randomDataSize, -1.0, 1.0) - - log.Println("generating data - done") - log.Printf("saving data to file at '%s'\n", path) - - f, err := os.Create(path) - if err != nil { - log.Printf("could not save data to file at '%s'\n", path) - return err - } - - defer f.Close() - - w := csv.NewWriter(f) - - defer w.Flush() - - log.Println("writing data") - - err = w.WriteAll(data) - if err != nil { - log.Println("error writing data") - return err - } - - log.Println("writing data - done") - - return nil -} - -// genData generates new random data with either normal or uniform -// distribution. if normal is set, normal distribution is set with sigma and mu -// values corresponding to the standard normal distribution and min/max values -// are ignored. -func genData(normal bool, size int, min, max float64) [][]string { - col1 := make([]float64, size) - col2 := make([]float64, size) - - switch { - case !normal: - uniform := &distuv.Uniform{ - Min: min, - Max: max, - Src: rand.NewSource(uint64( - time.Now().UnixNano(), - )), - } - - for i := 0; i < size; i++ { - col1[i] = uniform.Rand() - col2[i] = uniform.Rand() - } - - case normal: - stdnorm := &distuv.Normal{ - Sigma: 1, - Mu: 0, - Src: rand.NewSource(uint64( - time.Now().UnixNano(), - )), - } - - for i := 0; i < size; i++ { - col1[i] = stdnorm.Rand() - col2[i] = stdnorm.Rand() - } - } - - for i := 0; i < outliers; i++ { - col1 = append(col1, float64(rand.Int63())) - col2 = append(col2, float64(rand.Int63())) - } - - data := make([][]string, 0, size+outliers) - - for i := 0; i < size+outliers; i++ { - r1 := strconv.FormatFloat(col1[i], 'f', -1, 64) - r2 := strconv.FormatFloat(col2[i], 'f', -1, 64) - - data = append(data, []string{r1, r2}) - } - - return data -}