tokenizer: hide flex-based impl, avoid build failures on win

TestPlan:
 - go test -run TestTokenize ./internal/tokenizer
 - go test -tags flex -run TestTokenize ./internal/tokenizer
   (shold fail as default fixtures are from regex-based tokenizer)
This commit is contained in:
Alexander Bezzubov 2020-03-19 19:58:48 +01:00
parent 1ab8148c10
commit 78d8f43a88
No known key found for this signature in database
GPG Key ID: 8039F5787EFCD05D
2 changed files with 4 additions and 1 deletions

@ -1,3 +1,5 @@
// +build flex
package flex
// #include <stdlib.h>

@ -4,6 +4,7 @@ import (
"fmt"
"testing"
"github.com/go-enry/go-enry/v2/regex"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -136,7 +137,7 @@ func TestRegexpOnInvalidUtf8(t *testing.T) {
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
}
re := reRegularToken
re := regex.MustCompile(`[0-9A-Za-z_\.@#\/\*]+`) // a reRegularToken from tokenizer.go
for _, content := range origContent {
t.Run("", func(t *testing.T) {