New procedure using contour detection

2021-12-06 00:22:40 +01:00 · 2021-12-06 00:22:40 +01:00 · 3160dcba40
parent 074678eaa0
commit 3160dcba40
1 changed files with 35 additions and 14 deletions
--- a/ocr.py
+++ b/ocr.py
@ -6,21 +6,42 @@ import cv2
 from PIL import Image

 def process(image):
+    # Read image
    img = cv2.imread(image)
-    norm_img = np.zeros((img.shape[0], img.shape[1]))
-    # Normalize
-    img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
-    # Crop
-    img = img[165:-120, 100:-100]
-    # Threshold
-    img = cv2.threshold(img, 80, 255, cv2.THRESH_BINARY)[1]
-    # Blur
-    img = cv2.GaussianBlur(img,(9,9),cv2.BORDER_DEFAULT)
-    # Write modified image
-    cv2.imwrite("output/{}_tmp.png".format(image.split("/")[1].split(".")[0]),img)
-    # Actual ocr in one call
-    return pytesseract.image_to_string(img)
-
+    # Convert to single-channel grayscale
+    img_gs = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Binary threshold to get rid of blemishes
+    _, img_gs = cv2.threshold(img_gs, 50, 255, cv2.THRESH_BINARY)
+    # OTSU threshold for contour detection
+    _, thr = cv2.threshold(img_gs, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
+    # Rectangular kernel for contour detection
+    rect_k = cv2.getStructuringElement(cv2.MORPH_RECT, (32, 32))
+    # Dilation
+    dil = cv2.dilate(thr, rect_k, iterations = 1)
+    # Finding contours
+    conts, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
+    txt=""
+    # Iterate over found contours and attemt to ocr text within
+    for cnt in conts:
+        x, y, w, h = cv2.boundingRect(cnt) # location and size of found feature
+        if x == 0 or y == 0:
+            # Do not do anything if the feature is on image edges, expected text is more or less centered
+            pass
+        else:
+            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
+            # Crop only the text feature with some padding in horizontal direction to help with ocr
+            txt_ft = img_gs[y:y+h, x:x+w]
+            try:
+                # Write image with the detected text bounding box
+                cv2.imwrite("output/{}_tmp.png".format(image.split("/")[1].split(".")[0]),img)
+                # Apply blur to remove sharp edges and help with ocr
+                txt_ft = cv2.GaussianBlur(txt_ft,(5,5),cv2.BORDER_DEFAULT)
+                # Actual ocr in one call
+                txt += pytesseract.image_to_string(txt_ft)
+            except:
+                # Fail silently, only the emtpy string will be returned
+                pass
+    return txt

 if __name__ == "__main__":
    if not os.path.exists("input"):