Merge pull request #73 from dpaz/issue72

Cli to analyze a single file
This commit is contained in:
Alfredo Beaumont 2017-07-20 13:40:56 +02:00 committed by GitHub
commit 4d42cb06d3
9 changed files with 557 additions and 1 deletions

@ -9,6 +9,7 @@ import (
"log"
"os"
"path/filepath"
"strings"
"gopkg.in/src-d/enry.v1"
"gopkg.in/src-d/enry.v1/data"
@ -48,6 +49,7 @@ func main() {
}
if relativePath == "." {
fmt.Print(printFileAnalysis(root))
return nil
}
@ -154,6 +156,44 @@ func printPercents(out map[string][]string, buff *bytes.Buffer) {
}
}
func printFileAnalysis(file string) string {
content, err := ioutil.ReadFile(file)
if err != nil {
fmt.Println(err)
}
totalLines, nonBlank := getLines(file, string(content))
fileType := getFileType(file, content)
language := enry.GetLanguage(file, content)
mimeType := enry.GetMimeType(file, language)
return fmt.Sprintf(
`%s: %d lines (%d sloc)
type: %s
mime_type: %s
language: %s
`,
filepath.Base(file), totalLines, nonBlank, fileType, mimeType, language,
)
}
func getLines(file string, content string) (int, int) {
totalLines := strings.Count(content, "\n")
nonBlank := totalLines - strings.Count(content, "\n\n")
return totalLines, nonBlank
}
func getFileType(file string, content []byte) string {
switch {
case enry.IsImage(file):
return "Image"
case enry.IsBinary(content):
return "Binary"
default:
return "Text"
}
}
func writeStringLn(s string, buff *bytes.Buffer) {
buff.WriteString(s)
buff.WriteByte('\n')

204
data/mimeType.go Normal file

@ -0,0 +1,204 @@
package data
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: 37979b26b04e10868017469e5cc56263b0a39c84
var LanguagesMime = map[string]string{
"AGS Script": "text/x-c++src",
"APL": "text/apl",
"ASN.1": "text/x-ttcn-asn",
"ASP": "application/x-aspx",
"Alpine Abuild": "text/x-sh",
"Ant Build System": "application/xml",
"Apex": "text/x-java",
"Arduino": "text/x-c++src",
"Brainfuck": "text/x-brainfuck",
"C": "text/x-csrc",
"C#": "text/x-csharp",
"C++": "text/x-c++src",
"C2hs Haskell": "text/x-haskell",
"CMake": "text/x-cmake",
"COBOL": "text/x-cobol",
"COLLADA": "text/xml",
"CSON": "text/x-coffeescript",
"CSS": "text/css",
"ChucK": "text/x-java",
"Clojure": "text/x-clojure",
"Closure Templates": "text/x-soy",
"CoffeeScript": "text/x-coffeescript",
"Common Lisp": "text/x-common-lisp",
"Component Pascal": "text/x-pascal",
"Crystal": "text/x-crystal",
"Cuda": "text/x-c++src",
"Cycript": "text/javascript",
"Cython": "text/x-cython",
"D": "text/x-d",
"DTrace": "text/x-csrc",
"Dart": "application/dart",
"Diff": "text/x-diff",
"Dockerfile": "text/x-dockerfile",
"Dylan": "text/x-dylan",
"EBNF": "text/x-ebnf",
"ECL": "text/x-ecl",
"EQ": "text/x-csharp",
"Eagle": "text/xml",
"Easybuild": "text/x-python",
"Ecere Projects": "application/json",
"Eiffel": "text/x-eiffel",
"Elm": "text/x-elm",
"Emacs Lisp": "text/x-common-lisp",
"EmberScript": "text/x-coffeescript",
"Erlang": "text/x-erlang",
"F#": "text/x-fsharp",
"Factor": "text/x-factor",
"Forth": "text/x-forth",
"Fortran": "text/x-fortran",
"GCC Machine Description": "text/x-common-lisp",
"GN": "text/x-python",
"Game Maker Language": "text/x-c++src",
"Genshi": "text/xml",
"Gentoo Ebuild": "text/x-sh",
"Gentoo Eclass": "text/x-sh",
"Glyph": "text/x-tcl",
"Go": "text/x-go",
"Grammatical Framework": "text/x-haskell",
"Groovy": "text/x-groovy",
"Groovy Server Pages": "application/x-jsp",
"HCL": "text/x-ruby",
"HTML": "text/html",
"HTML+Django": "text/x-django",
"HTML+ECR": "text/html",
"HTML+EEX": "text/html",
"HTML+ERB": "application/x-erb",
"HTML+PHP": "application/x-httpd-php",
"HTTP": "message/http",
"Hack": "application/x-httpd-php",
"Haml": "text/x-haml",
"Haskell": "text/x-haskell",
"Haxe": "text/x-haxe",
"IDL": "text/x-idl",
"INI": "text/x-properties",
"IRC log": "text/mirc",
"JSON": "application/json",
"JSON5": "application/json",
"JSONiq": "application/json",
"JSX": "text/jsx",
"Java": "text/x-java",
"Java Server Pages": "application/x-jsp",
"JavaScript": "text/javascript",
"Julia": "text/x-julia",
"Jupyter Notebook": "application/json",
"Kit": "text/html",
"Kotlin": "text/x-kotlin",
"LFE": "text/x-common-lisp",
"LabVIEW": "text/xml",
"Latte": "text/x-smarty",
"Less": "text/css",
"Literate Haskell": "text/x-literate-haskell",
"LiveScript": "text/x-livescript",
"LookML": "text/x-yaml",
"Lua": "text/x-lua",
"M": "text/x-mumps",
"MTML": "text/html",
"MUF": "text/x-forth",
"Makefile": "text/x-cmake",
"Markdown": "text/x-gfm",
"Marko": "text/html",
"Mathematica": "text/x-mathematica",
"Matlab": "text/x-octave",
"Maven POM": "text/xml",
"Max": "application/json",
"Metal": "text/x-c++src",
"Mirah": "text/x-ruby",
"Modelica": "text/x-modelica",
"NSIS": "text/x-nsis",
"NetLogo": "text/x-common-lisp",
"NewLisp": "text/x-common-lisp",
"Nginx": "text/x-nginx-conf",
"Nu": "text/x-scheme",
"NumPy": "text/x-python",
"OCaml": "text/x-ocaml",
"Objective-C": "text/x-objectivec",
"Objective-C++": "text/x-objectivec",
"OpenCL": "text/x-csrc",
"OpenRC runscript": "text/x-sh",
"Oz": "text/x-oz",
"PHP": "application/x-httpd-php",
"PLSQL": "text/x-plsql",
"PLpgSQL": "text/x-sql",
"Pascal": "text/x-pascal",
"Perl": "text/x-perl",
"Perl 6": "text/x-perl",
"Pic": "text/troff",
"Pod": "text/x-perl",
"PowerShell": "application/x-powershell",
"Protocol Buffer": "text/x-protobuf",
"Public Key": "application/pgp",
"Pug": "text/x-pug",
"Puppet": "text/x-puppet",
"PureScript": "text/x-haskell",
"Python": "text/x-python",
"R": "text/x-rsrc",
"RAML": "text/x-yaml",
"RHTML": "application/x-erb",
"RMarkdown": "text/x-gfm",
"RPM Spec": "text/x-rpm-spec",
"Reason": "text/x-rustsrc",
"Roff": "text/troff",
"Rouge": "text/x-clojure",
"Ruby": "text/x-ruby",
"Rust": "text/x-rustsrc",
"SAS": "text/x-sas",
"SCSS": "text/x-scss",
"SPARQL": "application/sparql-query",
"SQL": "text/x-sql",
"SQLPL": "text/x-sql",
"SRecode Template": "text/x-common-lisp",
"SVG": "text/xml",
"Sage": "text/x-python",
"SaltStack": "text/x-yaml",
"Sass": "text/x-sass",
"Scala": "text/x-scala",
"Scheme": "text/x-scheme",
"Shell": "text/x-sh",
"ShellSession": "text/x-sh",
"Slim": "text/x-slim",
"Smalltalk": "text/x-stsrc",
"Smarty": "text/x-smarty",
"Squirrel": "text/x-c++src",
"Standard ML": "text/x-ocaml",
"Sublime Text Config": "text/javascript",
"Swift": "text/x-swift",
"SystemVerilog": "text/x-systemverilog",
"TOML": "text/x-toml",
"Tcl": "text/x-tcl",
"Tcsh": "text/x-sh",
"TeX": "text/x-stex",
"Terra": "text/x-lua",
"Textile": "text/x-textile",
"Turtle": "text/turtle",
"Twig": "text/x-twig",
"TypeScript": "application/typescript",
"Unified Parallel C": "text/x-csrc",
"Unity3D Asset": "text/x-yaml",
"Uno": "text/x-csharp",
"UnrealScript": "text/x-java",
"VHDL": "text/x-vhdl",
"Verilog": "text/x-verilog",
"Visual Basic": "text/x-vb",
"Volt": "text/x-d",
"WebAssembly": "text/x-common-lisp",
"WebIDL": "text/x-webidl",
"XC": "text/x-csrc",
"XML": "text/xml",
"XPages": "text/xml",
"XProc": "text/xml",
"XQuery": "application/xquery",
"XS": "text/x-csrc",
"XSLT": "text/xml",
"YAML": "text/x-yaml",
"edn": "text/x-clojure",
"reStructuredText": "text/x-rst",
"wisp": "text/x-clojure",
}

@ -0,0 +1,11 @@
package data
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: {{ getCommit }}
var LanguagesMime = map[string]string{
{{range $language, $mime := . -}}
"{{$language}}": "{{$mime -}}",
{{end -}}
}

@ -70,6 +70,11 @@ const (
commitGold = "test_files/commit.gold"
commitTestTmplPath = "../assets/commit.go.tmpl"
commitTestTmplName = "commit.go.tmpl"
// mime test
mimeTypeGold = "test_files/mimeType.gold"
mimeTypeTestTmplPath = "../assets/mimeType.go.tmpl"
mimeTypeTestTmplName = "mimeType.go.tmpl"
)
type GeneratorTestSuite struct {
@ -218,6 +223,16 @@ func (s *GeneratorTestSuite) TestGenerationFiles() {
generate: Commit,
wantOut: commitGold,
},
{
name: "MimeType()",
fileToParse: filepath.Join(s.tmpLinguist, languagesFile),
samplesDir: "",
tmplPath: mimeTypeTestTmplPath,
tmplName: mimeTypeTestTmplName,
commit: commit,
generate: MimeType,
wantOut: mimeTypeGold,
},
}
for _, test := range tests {
@ -227,7 +242,6 @@ func (s *GeneratorTestSuite) TestGenerationFiles() {
outPath, err := ioutil.TempFile("/tmp", "generator-test-")
assert.NoError(s.T(), err)
defer os.Remove(outPath.Name())
err = test.generate(test.fileToParse, test.samplesDir, outPath.Name(), test.tmplPath, test.tmplName, test.commit)
assert.NoError(s.T(), err)
out, err := ioutil.ReadFile(outPath.Name())

@ -8,6 +8,7 @@ type languageInfo struct {
Extensions []string `yaml:"extensions,omitempty,flow"`
Interpreters []string `yaml:"interpreters,omitempty,flow"`
Filenames []string `yaml:"filenames,omitempty,flow"`
MimeType string `yaml:"codemirror_mime_type,omitempty,flow"`
}
func getAlphabeticalOrderedKeys(languages map[string]*languageInfo) []string {

@ -0,0 +1,55 @@
package generator
import (
"bytes"
"io"
"io/ioutil"
"text/template"
yaml "gopkg.in/yaml.v2"
)
func MimeType(fileToParse, samplesDir, outPath, tmplPath, tmplName, commit string) error {
data, err := ioutil.ReadFile(fileToParse)
if err != nil {
return err
}
languages := make(map[string]*languageInfo)
if err := yaml.Unmarshal(data, &languages); err != nil {
return err
}
langMimeMap := buildLanguageMimeMap(languages)
buf := &bytes.Buffer{}
if err := executeMimeTemplate(buf, langMimeMap, tmplPath, tmplName, commit); err != nil {
return err
}
return formatedWrite(outPath, buf.Bytes())
}
func buildLanguageMimeMap(languages map[string]*languageInfo) map[string]string {
langMimeMap := make(map[string]string)
for lang, info := range languages {
if len(info.MimeType) != 0 {
langMimeMap[lang] = info.MimeType
}
}
return langMimeMap
}
func executeMimeTemplate(out io.Writer, langMimeMap map[string]string, tmplPath, tmplName, commit string) error {
fmap := template.FuncMap{
"getCommit": func() string { return commit },
}
t := template.Must(template.New(tmplName).Funcs(fmap).ParseFiles(tmplPath))
if err := t.Execute(out, langMimeMap); err != nil {
return err
}
return nil
}

@ -0,0 +1,204 @@
package data
// CODE GENERATED AUTOMATICALLY WITH gopkg.in/src-d/enry.v1/internal/code-generator
// THIS FILE SHOULD NOT BE EDITED BY HAND
// Extracted from github/linguist commit: d5c8db3fb91963c4b2762ca2ea2ff7cfac109f68
var LanguagesMime = map[string]string{
"AGS Script": "text/x-c++src",
"APL": "text/apl",
"ASN.1": "text/x-ttcn-asn",
"ASP": "application/x-aspx",
"Alpine Abuild": "text/x-sh",
"Ant Build System": "application/xml",
"Apex": "text/x-java",
"Arduino": "text/x-c++src",
"Brainfuck": "text/x-brainfuck",
"C": "text/x-csrc",
"C#": "text/x-csharp",
"C++": "text/x-c++src",
"C2hs Haskell": "text/x-haskell",
"CMake": "text/x-cmake",
"COBOL": "text/x-cobol",
"COLLADA": "text/xml",
"CSON": "text/x-coffeescript",
"CSS": "text/css",
"ChucK": "text/x-java",
"Clojure": "text/x-clojure",
"Closure Templates": "text/x-soy",
"CoffeeScript": "text/x-coffeescript",
"Common Lisp": "text/x-common-lisp",
"Component Pascal": "text/x-pascal",
"Crystal": "text/x-crystal",
"Cuda": "text/x-c++src",
"Cycript": "text/javascript",
"Cython": "text/x-cython",
"D": "text/x-d",
"DTrace": "text/x-csrc",
"Dart": "application/dart",
"Diff": "text/x-diff",
"Dockerfile": "text/x-dockerfile",
"Dylan": "text/x-dylan",
"EBNF": "text/x-ebnf",
"ECL": "text/x-ecl",
"EQ": "text/x-csharp",
"Eagle": "text/xml",
"Easybuild": "text/x-python",
"Ecere Projects": "application/json",
"Eiffel": "text/x-eiffel",
"Elm": "text/x-elm",
"Emacs Lisp": "text/x-common-lisp",
"EmberScript": "text/x-coffeescript",
"Erlang": "text/x-erlang",
"F#": "text/x-fsharp",
"Factor": "text/x-factor",
"Forth": "text/x-forth",
"Fortran": "text/x-fortran",
"GCC Machine Description": "text/x-common-lisp",
"GN": "text/x-python",
"Game Maker Language": "text/x-c++src",
"Genshi": "text/xml",
"Gentoo Ebuild": "text/x-sh",
"Gentoo Eclass": "text/x-sh",
"Glyph": "text/x-tcl",
"Go": "text/x-go",
"Grammatical Framework": "text/x-haskell",
"Groovy": "text/x-groovy",
"Groovy Server Pages": "application/x-jsp",
"HCL": "text/x-ruby",
"HTML": "text/html",
"HTML+Django": "text/x-django",
"HTML+ECR": "text/html",
"HTML+EEX": "text/html",
"HTML+ERB": "application/x-erb",
"HTML+PHP": "application/x-httpd-php",
"HTTP": "message/http",
"Hack": "application/x-httpd-php",
"Haml": "text/x-haml",
"Haskell": "text/x-haskell",
"Haxe": "text/x-haxe",
"IDL": "text/x-idl",
"INI": "text/x-properties",
"IRC log": "text/mirc",
"JSON": "application/json",
"JSON5": "application/json",
"JSONiq": "application/json",
"JSX": "text/jsx",
"Java": "text/x-java",
"Java Server Pages": "application/x-jsp",
"JavaScript": "text/javascript",
"Julia": "text/x-julia",
"Jupyter Notebook": "application/json",
"Kit": "text/html",
"Kotlin": "text/x-kotlin",
"LFE": "text/x-common-lisp",
"LabVIEW": "text/xml",
"Latte": "text/x-smarty",
"Less": "text/css",
"Literate Haskell": "text/x-literate-haskell",
"LiveScript": "text/x-livescript",
"LookML": "text/x-yaml",
"Lua": "text/x-lua",
"M": "text/x-mumps",
"MTML": "text/html",
"MUF": "text/x-forth",
"Makefile": "text/x-cmake",
"Markdown": "text/x-gfm",
"Marko": "text/html",
"Mathematica": "text/x-mathematica",
"Matlab": "text/x-octave",
"Maven POM": "text/xml",
"Max": "application/json",
"Metal": "text/x-c++src",
"Mirah": "text/x-ruby",
"Modelica": "text/x-modelica",
"NSIS": "text/x-nsis",
"NetLogo": "text/x-common-lisp",
"NewLisp": "text/x-common-lisp",
"Nginx": "text/x-nginx-conf",
"Nu": "text/x-scheme",
"NumPy": "text/x-python",
"OCaml": "text/x-ocaml",
"Objective-C": "text/x-objectivec",
"Objective-C++": "text/x-objectivec",
"OpenCL": "text/x-csrc",
"OpenRC runscript": "text/x-sh",
"Oz": "text/x-oz",
"PHP": "application/x-httpd-php",
"PLSQL": "text/x-plsql",
"PLpgSQL": "text/x-sql",
"Pascal": "text/x-pascal",
"Perl": "text/x-perl",
"Perl 6": "text/x-perl",
"Pic": "text/troff",
"Pod": "text/x-perl",
"PowerShell": "application/x-powershell",
"Protocol Buffer": "text/x-protobuf",
"Public Key": "application/pgp",
"Pug": "text/x-pug",
"Puppet": "text/x-puppet",
"PureScript": "text/x-haskell",
"Python": "text/x-python",
"R": "text/x-rsrc",
"RAML": "text/x-yaml",
"RHTML": "application/x-erb",
"RMarkdown": "text/x-gfm",
"RPM Spec": "text/x-rpm-spec",
"Reason": "text/x-rustsrc",
"Roff": "text/troff",
"Rouge": "text/x-clojure",
"Ruby": "text/x-ruby",
"Rust": "text/x-rustsrc",
"SAS": "text/x-sas",
"SCSS": "text/x-scss",
"SPARQL": "application/sparql-query",
"SQL": "text/x-sql",
"SQLPL": "text/x-sql",
"SRecode Template": "text/x-common-lisp",
"SVG": "text/xml",
"Sage": "text/x-python",
"SaltStack": "text/x-yaml",
"Sass": "text/x-sass",
"Scala": "text/x-scala",
"Scheme": "text/x-scheme",
"Shell": "text/x-sh",
"ShellSession": "text/x-sh",
"Slim": "text/x-slim",
"Smalltalk": "text/x-stsrc",
"Smarty": "text/x-smarty",
"Squirrel": "text/x-c++src",
"Standard ML": "text/x-ocaml",
"Sublime Text Config": "text/javascript",
"Swift": "text/x-swift",
"SystemVerilog": "text/x-systemverilog",
"TOML": "text/x-toml",
"Tcl": "text/x-tcl",
"Tcsh": "text/x-sh",
"TeX": "text/x-stex",
"Terra": "text/x-lua",
"Textile": "text/x-textile",
"Turtle": "text/turtle",
"Twig": "text/x-twig",
"TypeScript": "application/typescript",
"Unified Parallel C": "text/x-csrc",
"Unity3D Asset": "text/x-yaml",
"Uno": "text/x-csharp",
"UnrealScript": "text/x-java",
"VHDL": "text/x-vhdl",
"Verilog": "text/x-verilog",
"Visual Basic": "text/x-vb",
"Volt": "text/x-d",
"WebAssembly": "text/x-common-lisp",
"WebIDL": "text/x-webidl",
"XC": "text/x-csrc",
"XML": "text/xml",
"XPages": "text/xml",
"XProc": "text/xml",
"XQuery": "application/xquery",
"XS": "text/x-csrc",
"XSLT": "text/xml",
"YAML": "text/x-yaml",
"edn": "text/x-clojure",
"reStructuredText": "text/x-rst",
"wisp": "text/x-clojure",
}

@ -67,6 +67,11 @@ const (
commitTmplPath = "internal/code-generator/assets/commit.go.tmpl"
commitTmpl = "commit.go.tmpl"
// mimeType.go generation
mimeTypeFile = "data/mimeType.go"
mimeTypeTmplPath = "internal/code-generator/assets/mimeType.go.tmpl"
mimeTypeTmpl = "mimeType.go.tmpl"
commitPath = ".linguist/.git/HEAD"
)
@ -97,6 +102,7 @@ func main() {
&generatorFiles{generator.Aliases, languagesYAML, "", aliasesFile, aliasesTmplPath, aliasesTmpl, commit},
&generatorFiles{generator.Frequencies, "", samplesDir, frequenciesFile, frequenciesTmplPath, frequenciesTmpl, commit},
&generatorFiles{generator.Commit, "", "", commitFile, commitTmplPath, commitTmpl, commit},
&generatorFiles{generator.MimeType, languagesYAML, "", mimeTypeFile, mimeTypeTmplPath, mimeTypeTmpl, commit},
}
for _, file := range fileList {

@ -53,6 +53,27 @@ func IsDocumentation(path string) bool {
return data.DocumentationMatchers.Match(path)
}
func IsImage(path string) bool {
extension := filepath.Ext(path)
if extension == ".png" || extension == ".jpg" || extension == ".jpeg" || extension == ".gif" {
return true
}
return false
}
func GetMimeType(path string, language string) string {
if mime, ok := data.LanguagesMime[language]; ok {
return mime
}
if IsImage(path) {
return "image/" + filepath.Ext(path)[1:]
}
return "text/plain"
}
const sniffLen = 8000
// IsBinary detects if data is a binary value based on: