initial commit
This commit is contained in:
commit
c68db63fc8
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
freqs/
|
26
da_detector.py
Normal file
26
da_detector.py
Normal file
@ -0,0 +1,26 @@
|
||||
"""
|
||||
this file holds a class, which implements a "detector" - language classifier -
|
||||
that learns from predefined, frequency-analysed sets of ngrams
|
||||
"""
|
||||
|
||||
|
||||
class da_detector:
|
||||
def __init__(self, langs_to_check):
|
||||
# langs to check
|
||||
self.langs_to_check = ["cz", "sk", "de", "en", "fr"]
|
||||
|
||||
def rm_interpunction(data):
|
||||
import string
|
||||
for ngram in data:
|
||||
try:
|
||||
ngram = ngram.translate(str.maketrans('', '', string.punctuation))
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def rm_digits(data):
|
||||
from string import digits
|
||||
for ngram in data:
|
||||
try:
|
||||
ngram = ngram.translate(None, digits)
|
||||
except Exception as e:
|
||||
raise e
|
Reference in New Issue
Block a user