diff --git a/da_detector.py b/da_detector.py index 0cd1eea..de0b93d 100755 --- a/da_detector.py +++ b/da_detector.py @@ -46,6 +46,7 @@ class da_detector: return obj def pick_ngrams(self, what_grams: int, how_many: int, text: str): + from random import randint if not isinstance(what_grams, int): raise TypeError("what_grams has to be an int") if not isinstance(how_many, int): @@ -62,14 +63,40 @@ class da_detector: if (len(text) <= 10): raise ValueError("not doing anything with text shorter than 10 characters") - # TODO(me): complete n-gram picking method implementation + t_len = len(text) + # list of random n-grams + r_ngrams = [] + # how many times to attempt to skin the cat + insanity_threshold = 1000 + sanity_ctr = 0 + + while (len(r_ngrams) < how_many and sanity_ctr < insanity_threshold): + # not truly random, but hey.. + r_position = randint(0, t_len - 1) + if (r_position + what_grams >= (t_len - 1)): + continue + + # this is the block where we start counting how many times we've + # been there + ++sanity_ctr + candidate_ngram = text[r_position:r_position + what_grams] + if (candidate_ngram not in r_ngrams): + r_ngrams.append(candidate_ngram) + print(r_ngrams) freqs_folder = "./freqs/" test_str = "what freaking ever, nobody cares one bit of a heck" detector = da_detector() -detector.pick_ngrams(3, 10, test_str) +detector.pick_ngrams( + what_grams=3, + how_many=10, + text=test_str) +detector.pick_ngrams( + what_grams=2, + how_many=10, + text=test_str) sk_json = detector.parse_freqs("sk.json") cz_json = detector.parse_freqs("cz.json")