fix: have stable n-grams picked once per pass

..so that the detection is performed on the same set for every lang
This commit is contained in:
surtur 2021-12-20 02:35:56 +01:00
parent bda5b336ed
commit 21a87bbfec
Signed by: wanderer
GPG Key ID: 19CE1EC1D9E0486D

@ -9,6 +9,7 @@ class da_detector:
def __init__(self, langs_to_check: list = ["sk", "en"]):
# langs to check
# to be picked from ["cz", "sk", "de", "en", "fr"]
self.da_ngrams = []
if not isinstance(langs_to_check, list):
raise TypeError("not a list, bailing")
@ -134,11 +135,13 @@ class da_detector:
probabs = []
try:
# only pick n-grams once per pass
self.da_ngrams = self.pick_ngrams(what_grams, how_many, txt)
for lang in langs:
probabs.append(
self.gimme_probabilities(
lang,
self.pick_ngrams(what_grams, how_many, txt)
self.da_ngrams
)
)
except Exception as e: