22cb6f602e
updated .gitignore
127 lines
2.9 KiB
Ruby
Executable File
127 lines
2.9 KiB
Ruby
Executable File
#!/usr/bin/env ruby
|
|
|
|
require 'benchmark'
|
|
require 'linguist'
|
|
|
|
iterations = (ARGV[0] || 1).to_i
|
|
|
|
# BenchBlob wraps a FileBlob to keep data loaded and to clean attributes added by language detection.
|
|
class BenchBlob < Linguist::FileBlob
|
|
attr_accessor :data
|
|
|
|
def initialize(path, base_path = nil)
|
|
super
|
|
@data = File.read(@fullpath)
|
|
end
|
|
|
|
def clean
|
|
@_mime_type = nil
|
|
@detect_encoding = nil
|
|
@lines = nil
|
|
end
|
|
end
|
|
|
|
def get_samples(root)
|
|
samples = Array.new
|
|
Dir.foreach(root) do |file|
|
|
path = File.join(root, file)
|
|
if file == "." or file == ".."
|
|
next
|
|
elsif File.directory?(path)
|
|
get_samples(path).each do |blob|
|
|
samples << blob
|
|
end
|
|
else
|
|
samples << BenchBlob.new(path)
|
|
end
|
|
end
|
|
return samples
|
|
end
|
|
|
|
samples = get_samples('.linguist/samples')
|
|
languages = Linguist::Language.all
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('GetLanguage()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::detect(blob)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('Classify()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::Classifier.classify(Linguist::Samples.cache, blob.data)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('GetLanguagesByModeline()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::Strategy::Modeline.call(blob, languages)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('GetLanguagesByFilename()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::Strategy::Filename.call(blob, languages)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('GetLanguagesByShebang()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::Shebang.call(blob, languages)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('GetLanguagesByExtension()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::Strategy::Extension.call(blob, languages)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
samples.each do |blob|
|
|
sample_name = blob.path.gsub(/\s/, '_')
|
|
Benchmark.bmbm do |bm|
|
|
bm.report('GetLanguagesByContent()_SAMPLE_' + sample_name + ' ' + iterations.to_s) do
|
|
iterations.times do
|
|
Linguist::Heuristics.call(blob, languages)
|
|
blob.clean
|
|
end
|
|
end
|
|
end
|
|
end
|