2017-06-28 13:01:36 +02:00
|
|
|
package enry
|
|
|
|
|
|
|
|
import (
|
|
|
|
"flag"
|
2018-12-26 21:21:39 +01:00
|
|
|
"fmt"
|
2017-06-28 13:01:36 +02:00
|
|
|
"io/ioutil"
|
|
|
|
"log"
|
|
|
|
"os"
|
2017-07-18 13:29:46 +02:00
|
|
|
"os/exec"
|
2017-06-28 13:01:36 +02:00
|
|
|
"path/filepath"
|
|
|
|
"testing"
|
|
|
|
|
2020-03-19 17:31:29 +01:00
|
|
|
"github.com/go-enry/go-enry/v2/data"
|
2017-07-18 13:29:46 +02:00
|
|
|
)
|
2017-06-28 13:01:36 +02:00
|
|
|
|
|
|
|
type sample struct {
|
|
|
|
filename string
|
|
|
|
content []byte
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
slow bool
|
|
|
|
overcomeLanguage string
|
|
|
|
overcomeLanguages []string
|
|
|
|
samples []*sample
|
2017-07-18 13:29:46 +02:00
|
|
|
samplesDir string
|
2017-10-26 15:37:18 +02:00
|
|
|
cloned bool
|
2017-06-28 13:01:36 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestMain(m *testing.M) {
|
|
|
|
flag.BoolVar(&slow, "slow", false, "run benchmarks per sample for strategies too")
|
|
|
|
flag.Parse()
|
2017-07-18 13:29:46 +02:00
|
|
|
|
|
|
|
if err := cloneLinguist(linguistURL); err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
2017-10-26 15:37:18 +02:00
|
|
|
if cloned {
|
|
|
|
defer os.RemoveAll(filepath.Dir(samplesDir))
|
|
|
|
}
|
2017-07-18 13:29:46 +02:00
|
|
|
|
2017-06-28 13:01:36 +02:00
|
|
|
var err error
|
|
|
|
samples, err = getSamples(samplesDir)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2019-02-14 12:47:45 +01:00
|
|
|
os.Exit(m.Run())
|
2017-07-18 13:29:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func cloneLinguist(linguistURL string) error {
|
2017-10-26 15:37:18 +02:00
|
|
|
repoLinguist := os.Getenv(linguistClonedEnvVar)
|
|
|
|
cloned = repoLinguist == ""
|
|
|
|
if cloned {
|
|
|
|
var err error
|
|
|
|
repoLinguist, err = ioutil.TempDir("", "linguist-")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-07-18 13:29:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
samplesDir = filepath.Join(repoLinguist, "samples")
|
|
|
|
|
2017-10-26 15:37:18 +02:00
|
|
|
if cloned {
|
|
|
|
cmd := exec.Command("git", "clone", linguistURL, repoLinguist)
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-07-18 13:29:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
cwd, err := os.Getwd()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = os.Chdir(repoLinguist); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-10-26 15:37:18 +02:00
|
|
|
cmd := exec.Command("git", "checkout", data.LinguistCommit)
|
2017-07-18 13:29:46 +02:00
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = os.Chdir(cwd); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2017-06-28 13:01:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func getSamples(dir string) ([]*sample, error) {
|
|
|
|
samples := make([]*sample, 0, 2000)
|
|
|
|
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if info.IsDir() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
content, err := ioutil.ReadFile(path)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
s := &sample{
|
|
|
|
filename: path,
|
|
|
|
content: content,
|
|
|
|
}
|
|
|
|
samples = append(samples, s)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
return samples, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkGetLanguageTotal(b *testing.B) {
|
|
|
|
if slow {
|
|
|
|
b.SkipNow()
|
|
|
|
}
|
|
|
|
|
|
|
|
var o string
|
|
|
|
b.Run("GetLanguage()_TOTAL", func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
for _, sample := range samples {
|
|
|
|
o = GetLanguage(sample.filename, sample.content)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
overcomeLanguage = o
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkClassifyTotal(b *testing.B) {
|
|
|
|
if slow {
|
|
|
|
b.SkipNow()
|
|
|
|
}
|
|
|
|
|
|
|
|
var o []string
|
|
|
|
b.Run("Classify()_TOTAL", func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
for _, sample := range samples {
|
2019-10-29 17:56:13 +01:00
|
|
|
o = defaultClassifier.classify(sample.content, nil)
|
2017-06-28 13:01:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
overcomeLanguages = o
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkStrategiesTotal(b *testing.B) {
|
|
|
|
if slow {
|
|
|
|
b.SkipNow()
|
|
|
|
}
|
|
|
|
|
2018-12-26 21:21:39 +01:00
|
|
|
benchmarks := benchmarkForAllStrategies("TOTAL")
|
2017-06-28 13:01:36 +02:00
|
|
|
|
|
|
|
var o []string
|
|
|
|
for _, benchmark := range benchmarks {
|
|
|
|
b.Run(benchmark.name, func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
for _, sample := range samples {
|
|
|
|
o = benchmark.strategy(sample.filename, sample.content, benchmark.candidates)
|
|
|
|
}
|
|
|
|
|
|
|
|
overcomeLanguages = o
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkGetLanguagePerSample(b *testing.B) {
|
|
|
|
if !slow {
|
|
|
|
b.SkipNow()
|
|
|
|
}
|
|
|
|
|
|
|
|
var o string
|
|
|
|
for _, sample := range samples {
|
|
|
|
b.Run("GetLanguage()_SAMPLE_"+sample.filename, func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
o = GetLanguage(sample.filename, sample.content)
|
|
|
|
}
|
|
|
|
|
|
|
|
overcomeLanguage = o
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkClassifyPerSample(b *testing.B) {
|
|
|
|
if !slow {
|
|
|
|
b.SkipNow()
|
|
|
|
}
|
|
|
|
|
|
|
|
var o []string
|
|
|
|
for _, sample := range samples {
|
|
|
|
b.Run("Classify()_SAMPLE_"+sample.filename, func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
2019-10-29 17:56:13 +01:00
|
|
|
o = defaultClassifier.classify(sample.content, nil)
|
2017-06-28 13:01:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
overcomeLanguages = o
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkStrategiesPerSample(b *testing.B) {
|
|
|
|
if !slow {
|
|
|
|
b.SkipNow()
|
|
|
|
}
|
|
|
|
|
2018-12-26 21:21:39 +01:00
|
|
|
benchmarks := benchmarkForAllStrategies("SAMPLE")
|
2017-06-28 13:01:36 +02:00
|
|
|
|
|
|
|
var o []string
|
|
|
|
for _, benchmark := range benchmarks {
|
|
|
|
for _, sample := range samples {
|
|
|
|
b.Run(benchmark.name+sample.filename, func(b *testing.B) {
|
|
|
|
for n := 0; n < b.N; n++ {
|
|
|
|
o = benchmark.strategy(sample.filename, sample.content, benchmark.candidates)
|
|
|
|
}
|
|
|
|
|
|
|
|
overcomeLanguages = o
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-12-26 21:21:39 +01:00
|
|
|
|
|
|
|
type strategyName struct {
|
|
|
|
name string
|
|
|
|
strategy Strategy
|
|
|
|
candidates []string
|
|
|
|
}
|
|
|
|
|
|
|
|
func benchmarkForAllStrategies(class string) []strategyName {
|
|
|
|
return []strategyName{
|
|
|
|
{name: fmt.Sprintf("GetLanguagesByModeline()_%s_", class), strategy: GetLanguagesByModeline},
|
|
|
|
{name: fmt.Sprintf("GetLanguagesByFilename()_%s_", class), strategy: GetLanguagesByFilename},
|
|
|
|
{name: fmt.Sprintf("GetLanguagesByShebang()_%s_", class), strategy: GetLanguagesByShebang},
|
|
|
|
{name: fmt.Sprintf("GetLanguagesByExtension()_%s_", class), strategy: GetLanguagesByExtension},
|
|
|
|
{name: fmt.Sprintf("GetLanguagesByContent()_%s_", class), strategy: GetLanguagesByContent},
|
|
|
|
}
|
|
|
|
}
|