diff --git a/da_detector.py b/da_detector.py index 8fde623..5638369 100755 --- a/da_detector.py +++ b/da_detector.py @@ -90,6 +90,32 @@ class da_detector: print(r_ngrams) return r_ngrams + # return a probability list for a list of ngrams and a given language + def gimme_probabilities(self, lang_probs: dict, ngrams: list): + if not isinstance(lang_probs, dict): + raise TypeError("lang_probs has to be a dict") + if not isinstance(ngrams, list): + raise TypeError("ngrams has to be a list") + + if len(lang_probs) == 0: + raise ValueError("empty lang_probs dict") + if len(ngrams) == 0: + raise ValueError("empty ngrams list") + + # may contain None values if not found, hence uncleansed + uncleansed_probabilities = [] + for ngram in ngrams: + uncleansed_probabilities.append(lang_probs.get(ngram)) + return self.replace_nones(uncleansed_probabilities) + + def replace_nones(self, probabilities: list): + if not isinstance(probabilities, list): + raise TypeError("not a list, bailing") + if len(probabilities) == 0: + raise ValueError("empty list, bailing") + # a pretty good bogus probability is a one closeâ„¢ reasonably to zero + return [0.000000123 if n is None else n for n in probabilities] + freqs_folder = "./freqs/" test_str = "what freaking ever, nobody cares one bit of a heck"