From 789ccd5856bca8ae77eb73b26489b8d3e3af91b0 Mon Sep 17 00:00:00 2001 From: surtur Date: Mon, 20 Dec 2021 01:20:22 +0100 Subject: [PATCH] add gimme_probabilities and replace_nones methods for "converting" a list of ngrams into a list of probabilities these ngrams occur in a given language, replacing "None" values on the fly with a very low "probability of occurence" value --- da_detector.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/da_detector.py b/da_detector.py index 8fde623..5638369 100755 --- a/da_detector.py +++ b/da_detector.py @@ -90,6 +90,32 @@ class da_detector: print(r_ngrams) return r_ngrams + # return a probability list for a list of ngrams and a given language + def gimme_probabilities(self, lang_probs: dict, ngrams: list): + if not isinstance(lang_probs, dict): + raise TypeError("lang_probs has to be a dict") + if not isinstance(ngrams, list): + raise TypeError("ngrams has to be a list") + + if len(lang_probs) == 0: + raise ValueError("empty lang_probs dict") + if len(ngrams) == 0: + raise ValueError("empty ngrams list") + + # may contain None values if not found, hence uncleansed + uncleansed_probabilities = [] + for ngram in ngrams: + uncleansed_probabilities.append(lang_probs.get(ngram)) + return self.replace_nones(uncleansed_probabilities) + + def replace_nones(self, probabilities: list): + if not isinstance(probabilities, list): + raise TypeError("not a list, bailing") + if len(probabilities) == 0: + raise ValueError("empty list, bailing") + # a pretty good bogus probability is a one closeâ„¢ reasonably to zero + return [0.000000123 if n is None else n for n in probabilities] + freqs_folder = "./freqs/" test_str = "what freaking ever, nobody cares one bit of a heck"