go-enry/utils.go

package enry

import (
	"bytes"
	"path/filepath"
	"regexp"
	"sort"
	"strings"

	"github.com/go-enry/go-enry/v2/data"
	"github.com/go-enry/go-enry/v2/regex"
)

const binSniffLen = 8000

var configurationLanguages = map[string]struct{}{
	"XML":  {},
	"JSON": {},
	"TOML": {},
	"YAML": {},
	"INI":  {},
	"SQL":  {},
}

// IsConfiguration tells if filename is in one of the configuration languages.
func IsConfiguration(path string) bool {
	language, _ := GetLanguageByExtension(path)
	_, is := configurationLanguages[language]
	return is
}

// IsImage tells if a given file is an image (PNG, JPEG or GIF format).
func IsImage(path string) bool {
	extension := filepath.Ext(path)
	if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
		return true
	}

	return false
}

// GetMIMEType returns a MIME type of a given file based on its languages.
func GetMIMEType(path string, language string) string {
	if mime, ok := data.LanguagesMime[language]; ok {
		return mime
	}

	if IsImage(path) {
		return "image/" + filepath.Ext(path)[1:]
	}

	return "text/plain"
}

// IsDocumentation returns whether or not path is a documentation path.
func IsDocumentation(path string) bool {
	return matchRegexSlice(data.DocumentationMatchers, path)
}

// IsDotFile returns whether or not path has dot as a prefix.
func IsDotFile(path string) bool {
	base := filepath.Base(filepath.Clean(path))
	return strings.HasPrefix(base, ".") && base != "."
}

var isVendorRegExp *regexp.Regexp

// IsVendor returns whether or not path is a vendor path.
func IsVendor(path string) bool {
	return isVendorRegExp.MatchString(path)
}

// IsTest returns whether or not path is a test path.
func IsTest(path string) bool {
	return matchRegexSlice(data.TestMatchers, path)
}

// IsBinary detects if data is a binary value based on:
// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198
func IsBinary(data []byte) bool {
	if len(data) > binSniffLen {
		data = data[:binSniffLen]
	}

	if bytes.IndexByte(data, byte(0)) == -1 {
		return false
	}

	return true
}

// GetColor returns a HTML color code of a given language.
func GetColor(language string) string {
	if color, ok := data.LanguagesColor[language]; ok {
		return color
	}

	if color, ok := data.LanguagesColor[GetLanguageGroup(language)]; ok {
		return color
	}

	return "#cccccc"
}

func matchRegexSlice(exprs []regex.EnryRegexp, str string) bool {
	for _, expr := range exprs {
		if expr.MatchString(str) {
			return true
		}
	}

	return false
}

// IsGenerated returns whether the file with the given path and content is a
// generated file.
func IsGenerated(path string, content []byte) bool {
	ext := strings.ToLower(filepath.Ext(path))
	if _, ok := data.GeneratedCodeExtensions[ext]; ok {
		return true
	}

	for _, m := range data.GeneratedCodeNameMatchers {
		if m(path) {
			return true
		}
	}

	path = strings.ToLower(path)
	for _, m := range data.GeneratedCodeMatchers {
		if m(path, ext, content) {
			return true
		}
	}

	return false
}

func init() {
	// We now collate the individual regexps that make up the VendorMatchers to
	// produce a single large regexp which is around twice as fast to test than
	// simply iterating through all the regexps or naïvely collating the
	// regexps.
	//
	// ---
	//
	// data.VendorMatchers here is a slice containing individual regexps that
	// match a vendor file therefore if we want to test if a filename is a
	// Vendor we need to test whether that filename matches one or more of
	// those regexps.
	//
	// Now we could test each matcher in turn using a shortcircuiting test i.e.
	//
	//  	func IsVendor(filename string) bool {
	// 			for _, matcher := range data.VendorMatchers {
	// 				if matcher.Match(filename) {
	//					return true
	//				}
	//			}
	//			return false
	//		}
	//
	// Or concatentate all these regexps using groups i.e.
	//
	//		`(regexp1)|(regexp2)|(regexp3)|...`
	//
	// However both of these are relatively slow and they don't take advantage
	// of the inherent structure within our regexps...
	//
	// If we look at our regexps there are essentially three types of regexp:
	//
	// 1. Those that start with `^`
	// 2. Those that start with `(^|/)`
	// 3. Others
	//
	// If we collate our regexps into these groups that will significantly
	// reduce the likelihood of backtracking within the regexp trie matcher.
	//
	// A further improvement is to use non-capturing groups as otherwise the
	// regexp parser, whilst matching, will have to allocate slices for
	// matching positions. (A future improvement here could be in the use of
	// enforcing non-capturing groups within the sub-regexps too.)
	//
	// Finally if we sort the segments we can help the matcher build a more
	// efficient matcher and trie.

	// alias the VendorMatchers to simplify things
	matchers := data.VendorMatchers

	// Create three temporary string slices for our three groups above - prefixes removed
	caretStrings := make([]string, 0, 10)
	caretSegmentStrings := make([]string, 0, 10)
	matcherStrings := make([]string, 0, len(matchers))

	// Walk the matchers and check their string representation for each group prefix, remove it and add to the respective group slices
	for _, matcher := range matchers {
		str := matcher.String()
		if str[0] == '^' {
			caretStrings = append(caretStrings, str[1:])
		} else if str[0:5] == "(^|/)" {
			caretSegmentStrings = append(caretSegmentStrings, str[5:])
		} else {
			matcherStrings = append(matcherStrings, str)
		}
	}

	// Sort the strings within each group - a potential further improvement could be in simplifying within these groups
	sort.Strings(caretSegmentStrings)
	sort.Strings(caretStrings)
	sort.Strings(matcherStrings)

	// Now build the collated regexp
	sb := &strings.Builder{}

	// Start with group 1 - those that started with `^`
	sb.WriteString("(?:^(?:")
	sb.WriteString(caretStrings[0])
	for _, matcher := range caretStrings[1:] {
		sb.WriteString(")|(?:")
		sb.WriteString(matcher)
	}
	sb.WriteString("))")
	sb.WriteString("|")

	// Now add group 2 - those that started with `(^|/)`
	sb.WriteString("(?:(?:^|/)(?:")
	sb.WriteString(caretSegmentStrings[0])
	for _, matcher := range caretSegmentStrings[1:] {
		sb.WriteString(")|(?:")
		sb.WriteString(matcher)
	}
	sb.WriteString("))")
	sb.WriteString("|")

	// Finally add the rest
	sb.WriteString("(?:")
	sb.WriteString(matcherStrings[0])
	for _, matcher := range matcherStrings[1:] {
		sb.WriteString(")|(?:")
		sb.WriteString(matcher)
	}
	sb.WriteString(")")

	// Compile the whole thing as the isVendorRegExp
	isVendorRegExp = regexp.MustCompile(sb.String())
}
renamed package and cli to enry 2017-06-13 13:56:07 +02:00			`package enry`
code from domain 2016-07-13 19:05:09 +02:00
			`import (`
			`"bytes"`
			`"path/filepath"`
Make IsVendor quicker Although iterating across the regexps is quicker than naively concatenating them, it is still quite slow. This PR proposes a slightly cleverer solution. First instead of just concatenating with groups this PR uses non-capturing groups. This speeds up the regexp processing. Secondly we group the regexps in to 3 groups - those that have to be at the start, those that are segments or at the start and the rest. This makes a considerable speed improvement. Thirdly the regexps are sorted within those groups - which also speeds things up. All in all for a non-vendored file this makes IsVendor around twice as fast. Signed-off-by: Andrew Thornton <art27@cantab.net> 2021-03-31 21:34:43 +02:00			`"regexp"`
			`"sort"`
code from domain 2016-07-13 19:05:09 +02:00			`"strings"`
moved generated data to data subpackage 2017-06-08 12:28:36 +02:00
*: module rename to go-enry/go-enry/v4 2020-03-19 17:31:29 +01:00			`"github.com/go-enry/go-enry/v2/data"`
data: replace substring package with regex package 2020-04-15 17:27:48 +02:00			`"github.com/go-enry/go-enry/v2/regex"`
code from domain 2016-07-13 19:05:09 +02:00			`)`

refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`const binSniffLen = 8000`
Rearranged code 2017-05-29 10:05:16 +02:00
implement IsGenerated helper to filter out generated files Closes #17 Implements the IsGenerated helper function to filter out generated files using the rules and matchers in: - https://github.com/github/linguist/blob/master/lib/linguist/generated.rb Since the vast majority of matchers have very different logic, it cannot be autogenerated directly from linguist like other logics in enry, so it's translated by hand. There are three different types of matchers in this implementation: - By extension, which mark as generated based only in the extension. These are the fastest matchers, so they're done first. - By file name, which matches patterns against the filename. These are performed in second place. Unlike linguist, we try to use string functions instead of regexps as much as possible. - Finally, the rest of the matchers, which go into the content and try to identify if they're generated or not based on the content. Unlike linguist, we try to only read the content we need and not split it all unless it's necessary and use byte functions instead of regexps as much as possible. Signed-off-by: Miguel Molina <miguel@erizocosmi.co> 2020-05-27 15:07:57 +02:00			`var configurationLanguages = map[string]struct{}{`
			`"XML": {},`
			`"JSON": {},`
			`"TOML": {},`
			`"YAML": {},`
			`"INI": {},`
			`"SQL": {},`
unified GetLanguage function 2016-07-18 16:20:12 +02:00			`}`

refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`// IsConfiguration tells if filename is in one of the configuration languages.`
tests 2016-07-13 22:21:18 +02:00			`func IsConfiguration(path string) bool {`
changed signatures for strategies 2017-06-12 13:42:20 +02:00			`language, _ := GetLanguageByExtension(path)`
			`_, is := configurationLanguages[language]`
tests 2016-07-13 22:21:18 +02:00			`return is`
			`}`

refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`// IsImage tells if a given file is an image (PNG, JPEG or GIF format).`
Fixed output text 2017-07-11 12:27:48 +02:00			`func IsImage(path string) bool {`
			`extension := filepath.Ext(path)`
Added mymeType.gold 2017-07-11 11:13:49 +02:00			`if extension == ".png" \|\| extension == ".jpg" \|\| extension == ".jpeg" \|\| extension == ".gif" {`
			`return true`
			`}`
Fixed output text 2017-07-11 12:27:48 +02:00
Added mymeType.gold 2017-07-11 11:13:49 +02:00			`return false`
			`}`

refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`// GetMIMEType returns a MIME type of a given file based on its languages.`
			`func GetMIMEType(path string, language string) string {`
Returns text/plain when mime it's undefined 2017-07-10 12:59:39 +02:00			`if mime, ok := data.LanguagesMime[language]; ok {`
			`return mime`
			`}`

Fixed output text 2017-07-11 12:27:48 +02:00			`if IsImage(path) {`
			`return "image/" + filepath.Ext(path)[1:]`
			`}`

Returns text/plain when mime it's undefined 2017-07-10 12:59:39 +02:00			`return "text/plain"`
Now generates mime file 2017-07-10 12:50:52 +02:00			`}`

refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`// IsDocumentation returns whether or not path is a documentation path.`
			`func IsDocumentation(path string) bool {`
data: replace substring package with regex package 2020-04-15 17:27:48 +02:00			`return matchRegexSlice(data.DocumentationMatchers, path)`
refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`}`

			`// IsDotFile returns whether or not path has dot as a prefix.`
			`func IsDotFile(path string) bool {`
			`base := filepath.Base(filepath.Clean(path))`
			`return strings.HasPrefix(base, ".") && base != "."`
			`}`

Make IsVendor quicker Although iterating across the regexps is quicker than naively concatenating them, it is still quite slow. This PR proposes a slightly cleverer solution. First instead of just concatenating with groups this PR uses non-capturing groups. This speeds up the regexp processing. Secondly we group the regexps in to 3 groups - those that have to be at the start, those that are segments or at the start and the rest. This makes a considerable speed improvement. Thirdly the regexps are sorted within those groups - which also speeds things up. All in all for a non-vendored file this makes IsVendor around twice as fast. Signed-off-by: Andrew Thornton <art27@cantab.net> 2021-03-31 21:34:43 +02:00			`var isVendorRegExp *regexp.Regexp`

refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`// IsVendor returns whether or not path is a vendor path.`
			`func IsVendor(path string) bool {`
Make IsVendor quicker Although iterating across the regexps is quicker than naively concatenating them, it is still quite slow. This PR proposes a slightly cleverer solution. First instead of just concatenating with groups this PR uses non-capturing groups. This speeds up the regexp processing. Secondly we group the regexps in to 3 groups - those that have to be at the start, those that are segments or at the start and the rest. This makes a considerable speed improvement. Thirdly the regexps are sorted within those groups - which also speeds things up. All in all for a non-vendored file this makes IsVendor around twice as fast. Signed-off-by: Andrew Thornton <art27@cantab.net> 2021-03-31 21:34:43 +02:00			`return isVendorRegExp.MatchString(path)`
refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`}`
tests 2016-07-13 22:21:18 +02:00
IsTest function for top 10 languages 2020-04-06 16:23:48 +02:00			`// IsTest returns whether or not path is a test path.`
			`func IsTest(path string) bool {`
data: replace substring package with regex package 2020-04-15 17:27:48 +02:00			`return matchRegexSlice(data.TestMatchers, path)`
IsTest function for top 10 languages 2020-04-06 16:23:48 +02:00			`}`

Rearranged code 2017-05-29 10:05:16 +02:00			`// IsBinary detects if data is a binary value based on:`
			`// http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198`
tests 2016-07-13 22:21:18 +02:00			`func IsBinary(data []byte) bool {`
refactoring: remove un-used code, add go doc, fix ci (#199) Refactoring, consisting of - remove unused method `isAuxiliaryLanguage` and `FileCountList` in order to reduce public API surfaces (go/java) - add GoDoc to public APIs - ci: java profile use latest go src It also now mimics https://docs.travis-ci.com/user/languages/go/#go-import-path for non-go build image, as code relies on internal imports. TEST PLAN: - make test 2019-02-05 22:54:14 +01:00			`if len(data) > binSniffLen {`
			`data = data[:binSniffLen]`
tests 2016-07-13 22:21:18 +02:00			`}`

			`if bytes.IndexByte(data, byte(0)) == -1 {`
			`return false`
			`}`

			`return true`
			`}`
Implement getting color code for languages Signed-off-by: Lauris Bukšis-Haberkorns <lauris@nix.lv> 2019-07-19 22:28:57 +02:00
			`// GetColor returns a HTML color code of a given language.`
			`func GetColor(language string) string {`
			`if color, ok := data.LanguagesColor[language]; ok {`
			`return color`
			`}`

Return group color if language has none 2020-03-21 14:37:39 +01:00			`if color, ok := data.LanguagesColor[GetLanguageGroup(language)]; ok {`
			`return color`
			`}`

Implement getting color code for languages Signed-off-by: Lauris Bukšis-Haberkorns <lauris@nix.lv> 2019-07-19 22:28:57 +02:00			`return "#cccccc"`
			`}`
data: replace substring package with regex package 2020-04-15 17:27:48 +02:00
			`func matchRegexSlice(exprs []regex.EnryRegexp, str string) bool {`
			`for _, expr := range exprs {`
			`if expr.MatchString(str) {`
			`return true`
			`}`
			`}`

			`return false`
			`}`
implement IsGenerated helper to filter out generated files Closes #17 Implements the IsGenerated helper function to filter out generated files using the rules and matchers in: - https://github.com/github/linguist/blob/master/lib/linguist/generated.rb Since the vast majority of matchers have very different logic, it cannot be autogenerated directly from linguist like other logics in enry, so it's translated by hand. There are three different types of matchers in this implementation: - By extension, which mark as generated based only in the extension. These are the fastest matchers, so they're done first. - By file name, which matches patterns against the filename. These are performed in second place. Unlike linguist, we try to use string functions instead of regexps as much as possible. - Finally, the rest of the matchers, which go into the content and try to identify if they're generated or not based on the content. Unlike linguist, we try to only read the content we need and not split it all unless it's necessary and use byte functions instead of regexps as much as possible. Signed-off-by: Miguel Molina <miguel@erizocosmi.co> 2020-05-27 15:07:57 +02:00
			`// IsGenerated returns whether the file with the given path and content is a`
			`// generated file.`
			`func IsGenerated(path string, content []byte) bool {`
			`ext := strings.ToLower(filepath.Ext(path))`
			`if _, ok := data.GeneratedCodeExtensions[ext]; ok {`
			`return true`
			`}`

			`for _, m := range data.GeneratedCodeNameMatchers {`
			`if m(path) {`
			`return true`
			`}`
			`}`

			`path = strings.ToLower(path)`
			`for _, m := range data.GeneratedCodeMatchers {`
			`if m(path, ext, content) {`
			`return true`
			`}`
			`}`

			`return false`
			`}`
Make IsVendor quicker Although iterating across the regexps is quicker than naively concatenating them, it is still quite slow. This PR proposes a slightly cleverer solution. First instead of just concatenating with groups this PR uses non-capturing groups. This speeds up the regexp processing. Secondly we group the regexps in to 3 groups - those that have to be at the start, those that are segments or at the start and the rest. This makes a considerable speed improvement. Thirdly the regexps are sorted within those groups - which also speeds things up. All in all for a non-vendored file this makes IsVendor around twice as fast. Signed-off-by: Andrew Thornton <art27@cantab.net> 2021-03-31 21:34:43 +02:00
			`func init() {`
			`// We now collate the individual regexps that make up the VendorMatchers to`
			`// produce a single large regexp which is around twice as fast to test than`
			`// simply iterating through all the regexps or naïvely collating the`
			`// regexps.`
			`//`
			`// ---`
			`//`
			`// data.VendorMatchers here is a slice containing individual regexps that`
			`// match a vendor file therefore if we want to test if a filename is a`
			`// Vendor we need to test whether that filename matches one or more of`
			`// those regexps.`
			`//`
			`// Now we could test each matcher in turn using a shortcircuiting test i.e.`
			`//`
			`// func IsVendor(filename string) bool {`
			`// for _, matcher := range data.VendorMatchers {`
			`// if matcher.Match(filename) {`
			`// return true`
			`// }`
			`// }`
			`// return false`
			`// }`
			`//`
			`// Or concatentate all these regexps using groups i.e.`
			`//`
			// `(regexp1)\|(regexp2)\|(regexp3)\|...`
			`//`
			`// However both of these are relatively slow and they don't take advantage`
			`// of the inherent structure within our regexps...`
			`//`
			`// If we look at our regexps there are essentially three types of regexp:`
			`//`
			// 1. Those that start with `^`
			// 2. Those that start with `(^\|/)`
			`// 3. Others`
			`//`
			`// If we collate our regexps into these groups that will significantly`
			`// reduce the likelihood of backtracking within the regexp trie matcher.`
			`//`
			`// A further improvement is to use non-capturing groups as otherwise the`
			`// regexp parser, whilst matching, will have to allocate slices for`
			`// matching positions. (A future improvement here could be in the use of`
			`// enforcing non-capturing groups within the sub-regexps too.)`
			`//`
			`// Finally if we sort the segments we can help the matcher build a more`
			`// efficient matcher and trie.`

			`// alias the VendorMatchers to simplify things`
			`matchers := data.VendorMatchers`

			`// Create three temporary string slices for our three groups above - prefixes removed`
			`caretStrings := make([]string, 0, 10)`
			`caretSegmentStrings := make([]string, 0, 10)`
			`matcherStrings := make([]string, 0, len(matchers))`

			`// Walk the matchers and check their string representation for each group prefix, remove it and add to the respective group slices`
			`for _, matcher := range matchers {`
			`str := matcher.String()`
			`if str[0] == '^' {`
			`caretStrings = append(caretStrings, str[1:])`
			`} else if str[0:5] == "(^\|/)" {`
			`caretSegmentStrings = append(caretSegmentStrings, str[5:])`
			`} else {`
			`matcherStrings = append(matcherStrings, str)`
			`}`
			`}`

			`// Sort the strings within each group - a potential further improvement could be in simplifying within these groups`
			`sort.Strings(caretSegmentStrings)`
			`sort.Strings(caretStrings)`
			`sort.Strings(matcherStrings)`

			`// Now build the collated regexp`
			`sb := &strings.Builder{}`

			// Start with group 1 - those that started with `^`
			`sb.WriteString("(?:^(?:")`
			`sb.WriteString(caretStrings[0])`
			`for _, matcher := range caretStrings[1:] {`
			`sb.WriteString(")\|(?:")`
			`sb.WriteString(matcher)`
			`}`
			`sb.WriteString("))")`
			`sb.WriteString("\|")`

			// Now add group 2 - those that started with `(^\|/)`
			`sb.WriteString("(?:(?:^\|/)(?:")`
			`sb.WriteString(caretSegmentStrings[0])`
			`for _, matcher := range caretSegmentStrings[1:] {`
			`sb.WriteString(")\|(?:")`
			`sb.WriteString(matcher)`
			`}`
			`sb.WriteString("))")`
			`sb.WriteString("\|")`

			`// Finally add the rest`
			`sb.WriteString("(?:")`
			`sb.WriteString(matcherStrings[0])`
			`for _, matcher := range matcherStrings[1:] {`
			`sb.WriteString(")\|(?:")`
			`sb.WriteString(matcher)`
			`}`
			`sb.WriteString(")")`

			`// Compile the whole thing as the isVendorRegExp`
			`isVendorRegExp = regexp.MustCompile(sb.String())`
			`}`