2017-06-13 13:56:07 +02:00
package enry
2016-07-13 22:21:18 +02:00
import (
2017-05-29 10:05:16 +02:00
"fmt"
"io/ioutil"
"os"
2017-06-21 15:18:27 +02:00
"os/exec"
2017-05-29 10:05:16 +02:00
"path/filepath"
2016-07-13 22:21:18 +02:00
"testing"
2020-03-19 17:31:29 +01:00
"github.com/go-enry/go-enry/v2/data"
2017-06-28 10:58:58 +02:00
2017-05-29 10:05:16 +02:00
"github.com/stretchr/testify/assert"
2019-02-14 12:47:45 +01:00
"github.com/stretchr/testify/require"
2017-05-29 10:05:16 +02:00
"github.com/stretchr/testify/suite"
2016-07-13 22:21:18 +02:00
)
2017-06-21 15:18:27 +02:00
const linguistURL = "https://github.com/github/linguist.git"
2017-10-26 15:37:18 +02:00
const linguistClonedEnvVar = "ENRY_TEST_REPO"
2017-06-21 15:18:27 +02:00
type EnryTestSuite struct {
2017-05-29 10:05:16 +02:00
suite . Suite
2021-03-07 17:34:08 +01:00
tmpLinguist string
needToClone bool
samplesDir string
2020-11-15 14:43:37 +01:00
testFixturesDir string
2019-02-14 12:47:45 +01:00
}
func ( s * EnryTestSuite ) TestRegexpEdgeCases ( ) {
var regexpEdgeCases = [ ] struct {
lang string
filename string
} {
{ lang : "ActionScript" , filename : "FooBar.as" } ,
{ lang : "Forth" , filename : "asm.fr" } ,
{ lang : "X PixMap" , filename : "cc-public_domain_mark_white.pm" } ,
//{lang: "SQL", filename: "drop_stuff.sql"}, // https://github.com/src-d/enry/issues/194
{ lang : "Fstar" , filename : "Hacl.Spec.Bignum.Fmul.fst" } ,
{ lang : "C++" , filename : "Types.h" } ,
}
for _ , r := range regexpEdgeCases {
2020-03-19 19:47:22 +01:00
filename := filepath . Join ( s . tmpLinguist , "samples" , r . lang , r . filename )
2019-02-14 12:47:45 +01:00
content , err := ioutil . ReadFile ( filename )
require . NoError ( s . T ( ) , err )
lang := GetLanguage ( r . filename , content )
s . T ( ) . Logf ( "File:%s, lang:%s" , filename , lang )
expLang , _ := data . LanguageByAlias ( r . lang )
require . EqualValues ( s . T ( ) , expLang , lang )
}
2017-06-21 15:18:27 +02:00
}
2018-12-27 11:55:34 +01:00
func Test_EnryTestSuite ( t * testing . T ) {
2017-06-21 15:18:27 +02:00
suite . Run ( t , new ( EnryTestSuite ) )
}
func ( s * EnryTestSuite ) SetupSuite ( ) {
var err error
2019-02-14 12:47:45 +01:00
s . tmpLinguist = os . Getenv ( linguistClonedEnvVar )
s . needToClone = s . tmpLinguist == ""
if s . needToClone {
s . tmpLinguist , err = ioutil . TempDir ( "" , "linguist-" )
require . NoError ( s . T ( ) , err )
s . T ( ) . Logf ( "Cloning Linguist repo to '%s' as %s was not set\n" ,
s . tmpLinguist , linguistClonedEnvVar )
cmd := exec . Command ( "git" , "clone" , linguistURL , s . tmpLinguist )
2017-09-28 20:58:13 +02:00
err = cmd . Run ( )
2019-02-14 12:47:45 +01:00
require . NoError ( s . T ( ) , err )
2017-09-28 20:58:13 +02:00
}
2019-02-14 12:47:45 +01:00
s . samplesDir = filepath . Join ( s . tmpLinguist , "samples" )
s . T ( ) . Logf ( "using samples from %s" , s . samplesDir )
2017-06-21 15:18:27 +02:00
2020-11-15 14:43:37 +01:00
s . testFixturesDir = filepath . Join ( s . tmpLinguist , "test" , "fixtures" )
s . T ( ) . Logf ( "using test fixtures from %s" , s . samplesDir )
2017-06-21 15:18:27 +02:00
cwd , err := os . Getwd ( )
assert . NoError ( s . T ( ) , err )
2019-02-14 12:47:45 +01:00
err = os . Chdir ( s . tmpLinguist )
2017-06-21 15:18:27 +02:00
assert . NoError ( s . T ( ) , err )
2017-09-28 20:58:13 +02:00
cmd := exec . Command ( "git" , "checkout" , data . LinguistCommit )
2017-06-21 15:18:27 +02:00
err = cmd . Run ( )
assert . NoError ( s . T ( ) , err )
err = os . Chdir ( cwd )
assert . NoError ( s . T ( ) , err )
2017-05-29 10:05:16 +02:00
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TearDownSuite ( ) {
2019-02-14 12:47:45 +01:00
if s . needToClone {
err := os . RemoveAll ( s . tmpLinguist )
2017-09-28 20:58:13 +02:00
assert . NoError ( s . T ( ) , err )
}
2017-05-29 10:05:16 +02:00
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguage ( ) {
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
name string
filename string
content [ ] byte
expected string
2017-06-15 10:26:14 +02:00
safe bool
2017-05-29 10:05:16 +02:00
} {
2019-03-14 13:26:00 +01:00
{ name : "TestGetLanguage_0" , filename : "foo.h" , content : [ ] byte { } , expected : "C" } ,
2017-05-29 10:05:16 +02:00
{ name : "TestGetLanguage_1" , filename : "foo.py" , content : [ ] byte { } , expected : "Python" } ,
{ name : "TestGetLanguage_2" , filename : "foo.m" , content : [ ] byte ( ":- module" ) , expected : "Mercury" } ,
2019-03-14 13:26:00 +01:00
{ name : "TestGetLanguage_3" , filename : "foo.m" , content : nil , expected : "MATLAB" } ,
2017-07-04 11:22:01 +02:00
{ name : "TestGetLanguage_4" , filename : "foo.mo" , content : [ ] byte { 0xDE , 0x12 , 0x04 , 0x95 , 0x00 , 0x00 , 0x00 , 0x00 } , expected : OtherLanguage } ,
2017-12-07 16:42:02 +01:00
{ name : "TestGetLanguage_5" , filename : "" , content : nil , expected : OtherLanguage } ,
2017-05-29 10:05:16 +02:00
}
for _ , test := range tests {
language := GetLanguage ( test . filename , test . content )
2017-05-31 12:07:46 +02:00
assert . Equal ( s . T ( ) , test . expected , language , fmt . Sprintf ( "%v: %v, expected: %v" , test . name , language , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2021-04-13 21:02:15 +02:00
func ( s * EnryTestSuite ) TestGetLanguages ( ) {
tests := [ ] struct {
name string
filename string
content [ ] byte
expected [ ] string
} {
// With no content or filename, no language can be detected
{ name : "TestGetLanguages_0" , filename : "" , content : [ ] byte { } , expected : nil } ,
// The strategy that will match is GetLanguagesByExtension. Lacking content, it will return those results.
{ name : "TestGetLanguages_1" , filename : "foo.h" , content : [ ] byte { } , expected : [ ] string { "C" , "C++" , "Objective-C" } } ,
// GetLanguagesByExtension will return an unambiguous match when there is a single result.
{ name : "TestGetLanguages_2" , filename : "foo.groovy" , content : [ ] byte { } , expected : [ ] string { "Groovy" } } ,
// GetLanguagesByExtension will return "Rust", "RenderScript" for .rs,
// then GetLanguagesByContent will take the first rule that matches (in this case Rust)
{ name : "TestGetLanguages_3" , filename : "foo.rs" , content : [ ] byte ( "use \n#include" ) , expected : [ ] string { "Rust" } } ,
// .. and in this case, RenderScript (no content that matches a Rust regex can be included, because it runs first.)
{ name : "TestGetLanguages_4" , filename : "foo.rs" , content : [ ] byte ( "#include" ) , expected : [ ] string { "RenderScript" } } ,
// GetLanguagesByExtension will return "AMPL", "Linux Kernel Module", "Modula-2", "XML",
// then GetLanguagesByContent will ALWAYS return Linux Kernel Module and AMPL when there is no content,
// and no further classifier can do anything without content
{ name : "TestGetLanguages_5" , filename : "foo.mod" , content : [ ] byte { } , expected : [ ] string { "Linux Kernel Module" , "AMPL" } } ,
// ...with some AMPL tokens, the DefaultClassifier will pick AMPL as the most likely language.
{ name : "TestGetLanguages_6" , filename : "foo.mod" , content : [ ] byte ( "BEAMS ROWS - TotalWeight" ) , expected : [ ] string { "AMPL" , "Linux Kernel Module" } } ,
}
for _ , test := range tests {
languages := GetLanguages ( test . filename , test . content )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: %v, expected: %v" , test . name , languages , test . expected ) )
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByModelineLinguist ( ) {
2020-03-19 19:47:22 +01:00
var modelinesDir = filepath . Join ( s . tmpLinguist , "test" , "fixtures" , "Data" , "Modelines" )
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
2017-06-12 13:42:20 +02:00
name string
filename string
candidates [ ] string
expected [ ] string
2017-05-29 10:05:16 +02:00
} {
// Emacs
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByModelineLinguist_1" , filename : filepath . Join ( modelinesDir , "example_smalltalk.md" ) , expected : [ ] string { "Smalltalk" } } ,
{ name : "TestGetLanguagesByModelineLinguist_2" , filename : filepath . Join ( modelinesDir , "fundamentalEmacs.c" ) , expected : [ ] string { "Text" } } ,
{ name : "TestGetLanguagesByModelineLinguist_3" , filename : filepath . Join ( modelinesDir , "iamphp.inc" ) , expected : [ ] string { "PHP" } } ,
{ name : "TestGetLanguagesByModelineLinguist_4" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs1" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_5" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs2" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_6" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs3" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_7" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs4" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_8" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs5" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_9" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs6" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_10" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs7" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_11" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs9" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_12" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs10" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_13" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs11" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_14" , filename : filepath . Join ( modelinesDir , "seeplusplusEmacs12" ) , expected : [ ] string { "C++" } } ,
2017-05-29 10:05:16 +02:00
// Vim
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByModelineLinguist_15" , filename : filepath . Join ( modelinesDir , "seeplusplus" ) , expected : [ ] string { "C++" } } ,
{ name : "TestGetLanguagesByModelineLinguist_16" , filename : filepath . Join ( modelinesDir , "iamjs.pl" ) , expected : [ ] string { "JavaScript" } } ,
{ name : "TestGetLanguagesByModelineLinguist_17" , filename : filepath . Join ( modelinesDir , "iamjs2.pl" ) , expected : [ ] string { "JavaScript" } } ,
{ name : "TestGetLanguagesByModelineLinguist_18" , filename : filepath . Join ( modelinesDir , "not_perl.pl" ) , expected : [ ] string { "Prolog" } } ,
{ name : "TestGetLanguagesByModelineLinguist_19" , filename : filepath . Join ( modelinesDir , "ruby" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_20" , filename : filepath . Join ( modelinesDir , "ruby2" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_21" , filename : filepath . Join ( modelinesDir , "ruby3" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_22" , filename : filepath . Join ( modelinesDir , "ruby4" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_23" , filename : filepath . Join ( modelinesDir , "ruby5" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_24" , filename : filepath . Join ( modelinesDir , "ruby6" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_25" , filename : filepath . Join ( modelinesDir , "ruby7" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_26" , filename : filepath . Join ( modelinesDir , "ruby8" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_27" , filename : filepath . Join ( modelinesDir , "ruby9" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_28" , filename : filepath . Join ( modelinesDir , "ruby10" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_29" , filename : filepath . Join ( modelinesDir , "ruby11" ) , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByModelineLinguist_30" , filename : filepath . Join ( modelinesDir , "ruby12" ) , expected : [ ] string { "Ruby" } } ,
2017-07-11 16:58:41 +02:00
{ name : "TestGetLanguagesByModelineLinguist_31" , filename : filepath . Join ( s . samplesDir , "C/main.c" ) , expected : nil } ,
2017-12-07 16:42:02 +01:00
{ name : "TestGetLanguagesByModelineLinguist_32" , filename : "" , expected : nil } ,
2017-05-29 10:05:16 +02:00
}
for _ , test := range tests {
2017-12-07 16:42:02 +01:00
var content [ ] byte
var err error
if test . filename != "" {
content , err = ioutil . ReadFile ( test . filename )
assert . NoError ( s . T ( ) , err )
}
2017-05-29 10:05:16 +02:00
2017-06-12 13:42:20 +02:00
languages := GetLanguagesByModeline ( test . filename , content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByModeline ( ) {
2017-05-29 10:05:16 +02:00
const (
wrongVim = ` # vim: set syntax=ruby ft =python filetype=perl : `
rightVim = ` /* vim: set syntax=python ft =python filetype=python */ `
noLangVim = ` /* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions= { 1s: */ `
)
tests := [ ] struct {
2017-06-12 13:42:20 +02:00
name string
filename string
content [ ] byte
candidates [ ] string
expected [ ] string
2017-05-29 10:05:16 +02:00
} {
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByModeline_1" , content : [ ] byte ( wrongVim ) , expected : nil } ,
{ name : "TestGetLanguagesByModeline_2" , content : [ ] byte ( rightVim ) , expected : [ ] string { "Python" } } ,
{ name : "TestGetLanguagesByModeline_3" , content : [ ] byte ( noLangVim ) , expected : nil } ,
2017-11-08 13:16:40 +01:00
{ name : "TestGetLanguagesByModeline_4" , content : nil , expected : nil } ,
{ name : "TestGetLanguagesByModeline_5" , content : [ ] byte { } , expected : nil } ,
2017-05-29 10:05:16 +02:00
}
for _ , test := range tests {
2017-06-12 13:42:20 +02:00
languages := GetLanguagesByModeline ( test . filename , test . content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByFilename ( ) {
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
2017-06-12 13:42:20 +02:00
name string
filename string
content [ ] byte
candidates [ ] string
expected [ ] string
2017-05-29 10:05:16 +02:00
} {
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByFilename_1" , filename : "unknown.interpreter" , expected : nil } ,
{ name : "TestGetLanguagesByFilename_2" , filename : ".bashrc" , expected : [ ] string { "Shell" } } ,
{ name : "TestGetLanguagesByFilename_3" , filename : "Dockerfile" , expected : [ ] string { "Dockerfile" } } ,
{ name : "TestGetLanguagesByFilename_4" , filename : "Makefile.frag" , expected : [ ] string { "Makefile" } } ,
{ name : "TestGetLanguagesByFilename_5" , filename : "makefile" , expected : [ ] string { "Makefile" } } ,
{ name : "TestGetLanguagesByFilename_6" , filename : "Vagrantfile" , expected : [ ] string { "Ruby" } } ,
{ name : "TestGetLanguagesByFilename_7" , filename : "_vimrc" , expected : [ ] string { "Vim script" } } ,
{ name : "TestGetLanguagesByFilename_8" , filename : "pom.xml" , expected : [ ] string { "Maven POM" } } ,
2017-12-07 16:42:02 +01:00
{ name : "TestGetLanguagesByFilename_9" , filename : "" , expected : nil } ,
2017-05-29 10:05:16 +02:00
}
for _ , test := range tests {
2017-06-12 13:42:20 +02:00
languages := GetLanguagesByFilename ( test . filename , test . content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByShebang ( ) {
2017-05-29 10:05:16 +02:00
const (
multilineExecHack = ` # ! / bin / sh
# Next line is comment in Tcl , but not in sh ... \
exec tclsh "$0" $ { 1 + "$@" } `
multilineNoExecHack = ` # ! / bin / sh
# << < #
echo "A shell script in a zkl program ($0)"
echo "Now run zkl <this file> with Hello World as args"
zkl $ 0 Hello World !
exit
# << < #
println ( "The shell script says " , vm . arglist . concat ( " " ) ) ; `
)
tests := [ ] struct {
2017-06-12 13:42:20 +02:00
name string
filename string
content [ ] byte
candidates [ ] string
expected [ ] string
2017-05-29 10:05:16 +02:00
} {
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByShebang_1" , content : [ ] byte ( ` #!/unknown/interpreter ` ) , expected : nil } ,
{ name : "TestGetLanguagesByShebang_2" , content : [ ] byte ( ` no shebang ` ) , expected : nil } ,
{ name : "TestGetLanguagesByShebang_3" , content : [ ] byte ( ` #!/usr/bin/env ` ) , expected : nil } ,
{ name : "TestGetLanguagesByShebang_4" , content : [ ] byte ( ` #!/usr/bin/python -tt ` ) , expected : [ ] string { "Python" } } ,
{ name : "TestGetLanguagesByShebang_5" , content : [ ] byte ( ` #!/usr/bin/env python2.6 ` ) , expected : [ ] string { "Python" } } ,
2017-11-08 13:14:40 +01:00
{ name : "TestGetLanguagesByShebang_6" , content : [ ] byte ( ` #!/usr/bin/env perl ` ) , expected : [ ] string { "Perl" , "Pod" } } ,
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByShebang_7" , content : [ ] byte ( ` #! /bin/sh ` ) , expected : [ ] string { "Shell" } } ,
{ name : "TestGetLanguagesByShebang_8" , content : [ ] byte ( ` #!bash ` ) , expected : [ ] string { "Shell" } } ,
{ name : "TestGetLanguagesByShebang_9" , content : [ ] byte ( multilineExecHack ) , expected : [ ] string { "Tcl" } } ,
{ name : "TestGetLanguagesByShebang_10" , content : [ ] byte ( multilineNoExecHack ) , expected : [ ] string { "Shell" } } ,
2017-07-25 10:25:43 +02:00
{ name : "TestGetLanguagesByShebang_11" , content : [ ] byte ( ` #! ` ) , expected : nil } ,
2017-05-29 10:05:16 +02:00
}
for _ , test := range tests {
2017-06-12 13:42:20 +02:00
languages := GetLanguagesByShebang ( test . filename , test . content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByExtension ( ) {
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
2017-06-12 13:42:20 +02:00
name string
filename string
content [ ] byte
candidates [ ] string
expected [ ] string
2017-05-29 10:05:16 +02:00
} {
2019-03-14 13:26:00 +01:00
{ name : "TestGetLanguagesByExtension_0" , filename : "foo.h" , expected : [ ] string { "C" , "C++" , "Objective-C" } } ,
2017-06-12 13:42:20 +02:00
{ name : "TestGetLanguagesByExtension_1" , filename : "foo.foo" , expected : nil } ,
{ name : "TestGetLanguagesByExtension_2" , filename : "foo.go" , expected : [ ] string { "Go" } } ,
{ name : "TestGetLanguagesByExtension_3" , filename : "foo.go.php" , expected : [ ] string { "Hack" , "PHP" } } ,
2017-12-07 16:42:02 +01:00
{ name : "TestGetLanguagesByExtension_4" , filename : "" , expected : nil } ,
2017-05-29 10:05:16 +02:00
}
2016-07-13 22:21:18 +02:00
2017-05-29 10:05:16 +02:00
for _ , test := range tests {
2017-06-12 13:42:20 +02:00
languages := GetLanguagesByExtension ( test . filename , test . content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2020-09-17 10:27:28 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByManpage ( ) {
tests := [ ] struct {
name string
filename string
content [ ] byte
candidates [ ] string
expected [ ] string
} {
{ name : "TestGetLanguagesByManpage_1" , filename : "bsdmalloc.3malloc" , expected : [ ] string { "Roff Manpage" , "Roff" } } ,
{ name : "TestGetLanguagesByManpage_2" , filename : "dirent.h.0p" , expected : [ ] string { "Roff Manpage" , "Roff" } } ,
{ name : "TestGetLanguagesByManpage_3" , filename : "linguist.1gh" , expected : [ ] string { "Roff Manpage" , "Roff" } } ,
{ name : "TestGetLanguagesByManpage_4" , filename : "test.1.in" , expected : [ ] string { "Roff Manpage" , "Roff" } } ,
{ name : "TestGetLanguagesByManpage_5" , filename : "test.man.in" , expected : [ ] string { "Roff Manpage" , "Roff" } } ,
{ name : "TestGetLanguagesByManpage_6" , filename : "test.mdoc.in" , expected : [ ] string { "Roff Manpage" , "Roff" } } ,
{ name : "TestGetLanguagesByManpage_7" , filename : "foo.h" , expected : nil } ,
{ name : "TestGetLanguagesByManpage_8" , filename : "" , expected : nil } ,
}
for _ , test := range tests {
languages := GetLanguagesByManpage ( test . filename , test . content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
}
}
2020-11-15 14:43:37 +01:00
func ( s * EnryTestSuite ) TestGetLanguagesByXML ( ) {
tests := [ ] struct {
name string
filename string
candidates [ ] string
expected [ ] string
} {
{ name : "TestGetLanguagesByXML_1" , filename : filepath . Join ( s . testFixturesDir , "XML/app.config" ) , expected : [ ] string { "XML" } } ,
{ name : "TestGetLanguagesByXML_2" , filename : filepath . Join ( s . testFixturesDir , "XML/AssertionIDRequestOptionalAttributes.xml.svn-base" ) , expected : [ ] string { "XML" } } ,
// no XML header so should not be identified by this strategy
{ name : "TestGetLanguagesByXML_3" , filename : filepath . Join ( s . samplesDir , "XML/libsomething.dll.config" ) , expected : nil } ,
{ name : "TestGetLanguagesByXML_4" , filename : filepath . Join ( s . samplesDir , "Eagle/Eagle.sch" ) , candidates : [ ] string { "Eagle" } , expected : [ ] string { "Eagle" } } ,
}
for _ , test := range tests {
content , err := ioutil . ReadFile ( test . filename )
assert . NoError ( s . T ( ) , err )
languages := GetLanguagesByXML ( test . filename , content , test . candidates )
assert . Equal ( s . T ( ) , test . expected , languages , fmt . Sprintf ( "%v: languages = %v, expected: %v" , test . name , languages , test . expected ) )
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesByClassifier ( ) {
2017-05-29 10:05:16 +02:00
test := [ ] struct {
name string
filename string
2017-06-12 13:42:20 +02:00
candidates [ ] string
2017-05-29 10:05:16 +02:00
expected string
} {
2017-07-11 16:58:41 +02:00
{ name : "TestGetLanguagesByClassifier_1" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : [ ] string { "python" , "ruby" , "c" , "c++" } , expected : "C" } ,
{ name : "TestGetLanguagesByClassifier_2" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : nil , expected : OtherLanguage } ,
{ name : "TestGetLanguagesByClassifier_3" , filename : filepath . Join ( s . samplesDir , "C/main.c" ) , candidates : [ ] string { } , expected : OtherLanguage } ,
{ name : "TestGetLanguagesByClassifier_4" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : [ ] string { "python" , "ruby" , "c++" } , expected : "C++" } ,
{ name : "TestGetLanguagesByClassifier_5" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : [ ] string { "ruby" } , expected : "Ruby" } ,
{ name : "TestGetLanguagesByClassifier_6" , filename : filepath . Join ( s . samplesDir , "Python/django-models-base.py" ) , candidates : [ ] string { "python" , "ruby" , "c" , "c++" } , expected : "Python" } ,
2019-03-14 13:26:00 +01:00
{ name : "TestGetLanguagesByClassifier_7" , filename : "" , candidates : [ ] string { "python" } , expected : "Python" } ,
2017-05-29 10:05:16 +02:00
}
for _ , test := range test {
2017-12-07 16:42:02 +01:00
var content [ ] byte
var err error
if test . filename != "" {
content , err = ioutil . ReadFile ( test . filename )
assert . NoError ( s . T ( ) , err )
}
2017-05-29 10:05:16 +02:00
2017-06-12 13:42:20 +02:00
languages := GetLanguagesByClassifier ( test . filename , content , test . candidates )
var language string
if len ( languages ) == 0 {
language = OtherLanguage
} else {
language = languages [ 0 ]
}
assert . Equal ( s . T ( ) , test . expected , language , fmt . Sprintf ( "%v: language = %v, expected: %v" , test . name , language , test . expected ) )
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguagesBySpecificClassifier ( ) {
2017-06-12 13:42:20 +02:00
test := [ ] struct {
name string
filename string
candidates [ ] string
2019-10-29 17:56:13 +01:00
classifier classifier
2017-06-12 13:42:20 +02:00
expected string
} {
2019-10-29 16:03:50 +01:00
{ name : "TestGetLanguagesByClassifier_1" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : [ ] string { "python" , "ruby" , "c" , "c++" } , classifier : defaultClassifier , expected : "C" } ,
{ name : "TestGetLanguagesByClassifier_2" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : nil , classifier : defaultClassifier , expected : "C" } ,
{ name : "TestGetLanguagesByClassifier_3" , filename : filepath . Join ( s . samplesDir , "C/main.c" ) , candidates : [ ] string { } , classifier : defaultClassifier , expected : "C" } ,
{ name : "TestGetLanguagesByClassifier_4" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : [ ] string { "python" , "ruby" , "c++" } , classifier : defaultClassifier , expected : "C++" } ,
{ name : "TestGetLanguagesByClassifier_5" , filename : filepath . Join ( s . samplesDir , "C/blob.c" ) , candidates : [ ] string { "ruby" } , classifier : defaultClassifier , expected : "Ruby" } ,
{ name : "TestGetLanguagesByClassifier_6" , filename : filepath . Join ( s . samplesDir , "Python/django-models-base.py" ) , candidates : [ ] string { "python" , "ruby" , "c" , "c++" } , classifier : defaultClassifier , expected : "Python" } ,
{ name : "TestGetLanguagesByClassifier_7" , filename : os . DevNull , candidates : nil , classifier : defaultClassifier , expected : "XML" } ,
2017-06-12 13:42:20 +02:00
}
for _ , test := range test {
content , err := ioutil . ReadFile ( test . filename )
assert . NoError ( s . T ( ) , err )
2019-10-29 17:56:13 +01:00
languages := getLanguagesBySpecificClassifier ( content , test . candidates , test . classifier )
2017-06-12 13:42:20 +02:00
var language string
if len ( languages ) == 0 {
language = OtherLanguage
} else {
language = languages [ 0 ]
}
assert . Equal ( s . T ( ) , test . expected , language , fmt . Sprintf ( "%v: language = %v, expected: %v" , test . name , language , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguageExtensions ( ) {
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
name string
language string
expected [ ] string
} {
{ name : "TestGetLanguageExtensions_1" , language : "foo" , expected : nil } ,
{ name : "TestGetLanguageExtensions_2" , language : "COBOL" , expected : [ ] string { ".cob" , ".cbl" , ".ccp" , ".cobol" , ".cpy" } } ,
{ name : "TestGetLanguageExtensions_3" , language : "Maven POM" , expected : nil } ,
}
for _ , test := range tests {
extensions := GetLanguageExtensions ( test . language )
2017-05-31 12:07:46 +02:00
assert . EqualValues ( s . T ( ) , test . expected , extensions , fmt . Sprintf ( "%v: extensions = %v, expected: %v" , test . name , extensions , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguageType ( ) {
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
name string
language string
expected Type
} {
{ name : "TestGetLanguageType_1" , language : "BestLanguageEver" , expected : Unknown } ,
{ name : "TestGetLanguageType_2" , language : "JSON" , expected : Data } ,
{ name : "TestGetLanguageType_3" , language : "COLLADA" , expected : Data } ,
{ name : "TestGetLanguageType_4" , language : "Go" , expected : Programming } ,
{ name : "TestGetLanguageType_5" , language : "Brainfuck" , expected : Programming } ,
{ name : "TestGetLanguageType_6" , language : "HTML" , expected : Markup } ,
{ name : "TestGetLanguageType_7" , language : "Sass" , expected : Markup } ,
{ name : "TestGetLanguageType_8" , language : "AsciiDoc" , expected : Prose } ,
{ name : "TestGetLanguageType_9" , language : "Textile" , expected : Prose } ,
}
for _ , test := range tests {
langType := GetLanguageType ( test . language )
2017-05-31 12:07:46 +02:00
assert . Equal ( s . T ( ) , test . expected , langType , fmt . Sprintf ( "%v: langType = %v, expected: %v" , test . name , langType , test . expected ) )
2017-05-29 10:05:16 +02:00
}
}
2016-07-13 22:21:18 +02:00
2020-03-21 14:37:39 +01:00
func ( s * EnryTestSuite ) TestGetLanguageGroup ( ) {
tests := [ ] struct {
name string
language string
expected string
} {
{ name : "TestGetLanguageGroup_1" , language : "BestLanguageEver" , expected : "" } ,
2021-03-07 17:34:08 +01:00
{ name : "TestGetLanguageGroup_2" , language : "Bison" , expected : "Yacc" } ,
2020-03-21 14:37:39 +01:00
{ name : "TestGetLanguageGroup_3" , language : "HTML+PHP" , expected : "HTML" } ,
{ name : "TestGetLanguageGroup_4" , language : "HTML" , expected : "" } ,
}
for _ , test := range tests {
langGroup := GetLanguageGroup ( test . language )
assert . Equal ( s . T ( ) , test . expected , langGroup , fmt . Sprintf ( "%v: langGroup = %v, expected: %v" , test . name , langGroup , test . expected ) )
}
}
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestGetLanguageByAlias ( ) {
2017-05-29 10:05:16 +02:00
tests := [ ] struct {
name string
alias string
expectedLang string
expectedOk bool
} {
{ name : "TestGetLanguageByAlias_1" , alias : "BestLanguageEver" , expectedLang : OtherLanguage , expectedOk : false } ,
2020-09-17 09:33:08 +02:00
{ name : "TestGetLanguageByAlias_2" , alias : "aspx-vb" , expectedLang : "ASP.NET" , expectedOk : true } ,
2017-05-29 10:05:16 +02:00
{ name : "TestGetLanguageByAlias_3" , alias : "C++" , expectedLang : "C++" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_4" , alias : "c++" , expectedLang : "C++" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_5" , alias : "objc" , expectedLang : "Objective-C" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_6" , alias : "golang" , expectedLang : "Go" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_7" , alias : "GOLANG" , expectedLang : "Go" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_8" , alias : "bsdmake" , expectedLang : "Makefile" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_9" , alias : "xhTmL" , expectedLang : "HTML" , expectedOk : true } ,
{ name : "TestGetLanguageByAlias_10" , alias : "python" , expectedLang : "Python" , expectedOk : true } ,
}
2016-07-18 16:20:12 +02:00
2017-05-29 10:05:16 +02:00
for _ , test := range tests {
lang , ok := GetLanguageByAlias ( test . alias )
2017-05-31 12:07:46 +02:00
assert . Equal ( s . T ( ) , test . expectedLang , lang , fmt . Sprintf ( "%v: lang = %v, expected: %v" , test . name , lang , test . expectedLang ) )
assert . Equal ( s . T ( ) , test . expectedOk , ok , fmt . Sprintf ( "%v: ok = %v, expected: %v" , test . name , ok , test . expectedOk ) )
2017-05-29 10:05:16 +02:00
}
2016-07-18 16:20:12 +02:00
}
2017-06-09 15:49:07 +02:00
2017-06-21 15:18:27 +02:00
func ( s * EnryTestSuite ) TestLinguistCorpus ( ) {
const filenamesDir = "filenames"
2017-06-09 15:49:07 +02:00
var cornerCases = map [ string ] bool {
2019-02-14 12:47:45 +01:00
"drop_stuff.sql" : true , // https://github.com/src-d/enry/issues/194
2020-09-17 09:33:08 +02:00
"modeline.txt" : true , // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
2019-02-14 12:47:45 +01:00
// .es and .ice fail heuristics parsing, but do not fail any tests
2017-06-09 15:49:07 +02:00
}
var total , failed , ok , other int
var expected string
2017-07-11 16:58:41 +02:00
filepath . Walk ( s . samplesDir , func ( path string , f os . FileInfo , err error ) error {
2017-06-09 15:49:07 +02:00
if f . IsDir ( ) {
if f . Name ( ) != filenamesDir {
2019-02-14 12:47:45 +01:00
expected , _ = data . LanguageByAlias ( f . Name ( ) )
2017-06-09 15:49:07 +02:00
}
return nil
}
filename := filepath . Base ( path )
content , _ := ioutil . ReadFile ( path )
total ++
obtained := GetLanguage ( filename , content )
if obtained == OtherLanguage {
2017-06-12 13:42:20 +02:00
obtained = "Other"
2017-06-09 15:49:07 +02:00
other ++
}
var status string
if expected == obtained {
status = "ok"
ok ++
} else {
status = "failed"
failed ++
}
if _ , ok := cornerCases [ filename ] ; ok {
2019-02-14 12:47:45 +01:00
s . T ( ) . Logf ( "\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n" , filename , expected , obtained , status )
2017-06-09 15:49:07 +02:00
} else {
2017-06-12 13:42:20 +02:00
assert . Equal ( s . T ( ) , expected , obtained , fmt . Sprintf ( "%s\texpected: %s\tobtained: %s\tstatus: %s\n" , filename , expected , obtained , status ) )
2017-06-09 15:49:07 +02:00
}
return nil
} )
2019-02-14 12:47:45 +01:00
s . T ( ) . Logf ( "\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n" , total , ok , failed , other )
2017-06-09 15:49:07 +02:00
}
2021-04-13 20:49:21 +02:00
func ( s * EnryTestSuite ) TestGetLanguageID ( ) {
tests := [ ] struct {
name string
language string
expectedID int
2021-04-23 01:55:42 +02:00
found bool
2021-04-13 20:49:21 +02:00
} {
2021-04-23 01:55:42 +02:00
{ name : "TestGetLanguageID_1" , language : "1C Enterprise" , expectedID : 0 , found : true } ,
{ name : "TestGetLanguageID_2" , language : "BestLanguageEver" , expectedID : 0 , found : false } ,
{ name : "TestGetLanguageID_3" , language : "C++" , expectedID : 43 , found : true } ,
{ name : "TestGetLanguageID_5" , language : "Objective-C" , expectedID : 257 , found : true } ,
{ name : "TestGetLanguageID_6" , language : "golang" , expectedID : 0 , found : false } , // Aliases are not supported
{ name : "TestGetLanguageID_7" , language : "Go" , expectedID : 132 , found : true } ,
{ name : "TestGetLanguageID_8" , language : "Makefile" , expectedID : 220 , found : true } ,
2021-04-13 20:49:21 +02:00
}
for _ , test := range tests {
2021-04-23 01:55:42 +02:00
id , found := GetLanguageID ( test . language )
2021-04-13 20:49:21 +02:00
assert . Equal ( s . T ( ) , test . expectedID , id , fmt . Sprintf ( "%v: id = %v, expected: %v" , test . name , id , test . expectedID ) )
2021-04-23 01:55:42 +02:00
assert . Equal ( s . T ( ) , test . found , found , fmt . Sprintf ( "%v: found = %t, expected: %t" , test . name , found , test . found ) )
2021-04-13 20:49:21 +02:00
}
}