From 3160dcba407e14db5aeb98a637a13cc9f68f936c Mon Sep 17 00:00:00 2001 From: 2EEEB <192235@vutbr.cz> Date: Mon, 6 Dec 2021 00:22:40 +0100 Subject: [PATCH] New procedure using contour detection --- ocr.py | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/ocr.py b/ocr.py index b2ef9e4..8ea8ca6 100644 --- a/ocr.py +++ b/ocr.py @@ -6,21 +6,42 @@ import cv2 from PIL import Image def process(image): + # Read image img = cv2.imread(image) - norm_img = np.zeros((img.shape[0], img.shape[1])) - # Normalize - img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX) - # Crop - img = img[165:-120, 100:-100] - # Threshold - img = cv2.threshold(img, 80, 255, cv2.THRESH_BINARY)[1] - # Blur - img = cv2.GaussianBlur(img,(9,9),cv2.BORDER_DEFAULT) - # Write modified image - cv2.imwrite("output/{}_tmp.png".format(image.split("/")[1].split(".")[0]),img) - # Actual ocr in one call - return pytesseract.image_to_string(img) - + # Convert to single-channel grayscale + img_gs = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + # Binary threshold to get rid of blemishes + _, img_gs = cv2.threshold(img_gs, 50, 255, cv2.THRESH_BINARY) + # OTSU threshold for contour detection + _, thr = cv2.threshold(img_gs, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) + # Rectangular kernel for contour detection + rect_k = cv2.getStructuringElement(cv2.MORPH_RECT, (32, 32)) + # Dilation + dil = cv2.dilate(thr, rect_k, iterations = 1) + # Finding contours + conts, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + txt="" + # Iterate over found contours and attemt to ocr text within + for cnt in conts: + x, y, w, h = cv2.boundingRect(cnt) # location and size of found feature + if x == 0 or y == 0: + # Do not do anything if the feature is on image edges, expected text is more or less centered + pass + else: + cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) + # Crop only the text feature with some padding in horizontal direction to help with ocr + txt_ft = img_gs[y:y+h, x:x+w] + try: + # Write image with the detected text bounding box + cv2.imwrite("output/{}_tmp.png".format(image.split("/")[1].split(".")[0]),img) + # Apply blur to remove sharp edges and help with ocr + txt_ft = cv2.GaussianBlur(txt_ft,(5,5),cv2.BORDER_DEFAULT) + # Actual ocr in one call + txt += pytesseract.image_to_string(txt_ft) + except: + # Fail silently, only the emtpy string will be returned + pass + return txt if __name__ == "__main__": if not os.path.exists("input"):