From 21a87bbfeccc6901ddf30f75cd8ecf31d8364d22 Mon Sep 17 00:00:00 2001 From: surtur Date: Mon, 20 Dec 2021 02:35:56 +0100 Subject: [PATCH] fix: have stable n-grams picked once per pass ..so that the detection is performed on the same set for every lang --- da_detector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/da_detector.py b/da_detector.py index 945f749..58ad4d0 100755 --- a/da_detector.py +++ b/da_detector.py @@ -9,6 +9,7 @@ class da_detector: def __init__(self, langs_to_check: list = ["sk", "en"]): # langs to check # to be picked from ["cz", "sk", "de", "en", "fr"] + self.da_ngrams = [] if not isinstance(langs_to_check, list): raise TypeError("not a list, bailing") @@ -134,11 +135,13 @@ class da_detector: probabs = [] try: + # only pick n-grams once per pass + self.da_ngrams = self.pick_ngrams(what_grams, how_many, txt) for lang in langs: probabs.append( self.gimme_probabilities( lang, - self.pick_ngrams(what_grams, how_many, txt) + self.da_ngrams ) ) except Exception as e: