import os import sys import numpy as np import pytesseract import cv2 from PIL import Image def process(image): # Read image img = cv2.imread(image) # Convert to single-channel grayscale img_gs = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Binary threshold to get rid of blemishes _, img_gs = cv2.threshold(img_gs, 50, 255, cv2.THRESH_BINARY) # OTSU threshold for contour detection _, thr = cv2.threshold(img_gs, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) # Rectangular kernel for contour detection rect_k = cv2.getStructuringElement(cv2.MORPH_RECT, (32, 32)) # Dilation dil = cv2.dilate(thr, rect_k, iterations = 1) # Finding contours conts, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) txt="" # Iterate over found contours and attemt to ocr text within for cnt in conts: x, y, w, h = cv2.boundingRect(cnt) # location and size of found feature if x == 0 or y == 0: # Do not do anything if the feature is on image edges, expected text is more or less centered pass else: cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) # Crop only the text feature with some padding in horizontal direction to help with ocr txt_ft = img_gs[y:y+h, x:x+w] try: # Write image with the detected text bounding box cv2.imwrite("output/{}_tmp.png".format(image.split("/")[1].split(".")[0]),img) # Apply blur to remove sharp edges and help with ocr txt_ft = cv2.GaussianBlur(txt_ft,(5,5),cv2.BORDER_DEFAULT) # Actual ocr in one call txt += pytesseract.image_to_string(txt_ft) except: # Fail silently, only the emtpy string will be returned pass return txt if __name__ == "__main__": if not os.path.exists("input"): sys.exit(1) if not os.path.exists("output"): os.mkdir("output") for item in ["input/"+file for file in os.listdir("input") if file.split(".")[1] in ["jpg", "jpeg", "png"]]: print(item) txt = process(item) print(txt.strip())