pick_ngrams: complete the implementation
This commit is contained in:
parent
b56662dd87
commit
225b683541
@ -46,6 +46,7 @@ class da_detector:
|
|||||||
return obj
|
return obj
|
||||||
|
|
||||||
def pick_ngrams(self, what_grams: int, how_many: int, text: str):
|
def pick_ngrams(self, what_grams: int, how_many: int, text: str):
|
||||||
|
from random import randint
|
||||||
if not isinstance(what_grams, int):
|
if not isinstance(what_grams, int):
|
||||||
raise TypeError("what_grams has to be an int")
|
raise TypeError("what_grams has to be an int")
|
||||||
if not isinstance(how_many, int):
|
if not isinstance(how_many, int):
|
||||||
@ -62,14 +63,40 @@ class da_detector:
|
|||||||
if (len(text) <= 10):
|
if (len(text) <= 10):
|
||||||
raise ValueError("not doing anything with text shorter than 10 characters")
|
raise ValueError("not doing anything with text shorter than 10 characters")
|
||||||
|
|
||||||
# TODO(me): complete n-gram picking method implementation
|
t_len = len(text)
|
||||||
|
# list of random n-grams
|
||||||
|
r_ngrams = []
|
||||||
|
# how many times to attempt to skin the cat
|
||||||
|
insanity_threshold = 1000
|
||||||
|
sanity_ctr = 0
|
||||||
|
|
||||||
|
while (len(r_ngrams) < how_many and sanity_ctr < insanity_threshold):
|
||||||
|
# not truly random, but hey..
|
||||||
|
r_position = randint(0, t_len - 1)
|
||||||
|
if (r_position + what_grams >= (t_len - 1)):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# this is the block where we start counting how many times we've
|
||||||
|
# been there
|
||||||
|
++sanity_ctr
|
||||||
|
candidate_ngram = text[r_position:r_position + what_grams]
|
||||||
|
if (candidate_ngram not in r_ngrams):
|
||||||
|
r_ngrams.append(candidate_ngram)
|
||||||
|
print(r_ngrams)
|
||||||
|
|
||||||
|
|
||||||
freqs_folder = "./freqs/"
|
freqs_folder = "./freqs/"
|
||||||
test_str = "what freaking ever, nobody cares one bit of a heck"
|
test_str = "what freaking ever, nobody cares one bit of a heck"
|
||||||
|
|
||||||
detector = da_detector()
|
detector = da_detector()
|
||||||
detector.pick_ngrams(3, 10, test_str)
|
detector.pick_ngrams(
|
||||||
|
what_grams=3,
|
||||||
|
how_many=10,
|
||||||
|
text=test_str)
|
||||||
|
detector.pick_ngrams(
|
||||||
|
what_grams=2,
|
||||||
|
how_many=10,
|
||||||
|
text=test_str)
|
||||||
|
|
||||||
sk_json = detector.parse_freqs("sk.json")
|
sk_json = detector.parse_freqs("sk.json")
|
||||||
cz_json = detector.parse_freqs("cz.json")
|
cz_json = detector.parse_freqs("cz.json")
|
||||||
|
Reference in New Issue
Block a user