fix: have stable n-grams picked once per pass
..so that the detection is performed on the same set for every lang
This commit is contained in:
parent
bda5b336ed
commit
21a87bbfec
@ -9,6 +9,7 @@ class da_detector:
|
||||
def __init__(self, langs_to_check: list = ["sk", "en"]):
|
||||
# langs to check
|
||||
# to be picked from ["cz", "sk", "de", "en", "fr"]
|
||||
self.da_ngrams = []
|
||||
|
||||
if not isinstance(langs_to_check, list):
|
||||
raise TypeError("not a list, bailing")
|
||||
@ -134,11 +135,13 @@ class da_detector:
|
||||
|
||||
probabs = []
|
||||
try:
|
||||
# only pick n-grams once per pass
|
||||
self.da_ngrams = self.pick_ngrams(what_grams, how_many, txt)
|
||||
for lang in langs:
|
||||
probabs.append(
|
||||
self.gimme_probabilities(
|
||||
lang,
|
||||
self.pick_ngrams(what_grams, how_many, txt)
|
||||
self.da_ngrams
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
|
Reference in New Issue
Block a user