initial commit

This commit is contained in:
surtur 2021-12-16 02:12:42 +01:00
commit c68db63fc8
Signed by: wanderer
GPG Key ID: 19CE1EC1D9E0486D
2 changed files with 27 additions and 0 deletions

1
.gitignore vendored Normal file

@ -0,0 +1 @@
freqs/

26
da_detector.py Normal file

@ -0,0 +1,26 @@
"""
this file holds a class, which implements a "detector" - language classifier -
that learns from predefined, frequency-analysed sets of ngrams
"""
class da_detector:
def __init__(self, langs_to_check):
# langs to check
self.langs_to_check = ["cz", "sk", "de", "en", "fr"]
def rm_interpunction(data):
import string
for ngram in data:
try:
ngram = ngram.translate(str.maketrans('', '', string.punctuation))
except Exception as e:
raise e
def rm_digits(data):
from string import digits
for ngram in data:
try:
ngram = ngram.translate(None, digits)
except Exception as e:
raise e