使用OpenCV对pytesseract OCR进行预处理

问题描述

我想使用OCR(PyTesseract)来识别位于此类图像中的文本:

enter image description here

enter image description here

enter image description here

我有成千上万的箭。到现在为止,过程如下:我首先调整图像的大小(用于另一个过程)。然后,我裁剪图像以消除大部分箭头。接下来,我绘制一个白色矩形作为框架,以消除更多的噪点,但在文本和图像边框之间仍然留有距离,以实现更好的文本识别。我再次调整图像大小以确保大写字母的高度为〜30 px(https://groups.google.com/forum/#!msg/tesseract-ocr/Wdh_JJwnw94/24JHDYQbBQAJ)。最后,我将图像的二值化阈值为150。

完整代码

import cv2

image_file = '001.jpg'

# load the input image and grab the image dimensions
image = cv2.imread(image_file,cv2.IMREAD_GRAYSCALE)
(h_1,w_1) = image.shape[:2]

# resize the image and grab the new image dimensions
image = cv2.resize(image,(int(w_1*320/h_1),320))
(h_1,w_1) = image.shape

# crop image
image_2 = image[70:h_1-70,20:w_1-20]

# get image_2 height,width
(h_2,w_2) = image_2.shape

# draw white rectangle as a frame around the number -> remove noise
cv2.rectangle(image_2,(0,0),(w_2,h_2),(255,255,255),40)

# resize image,that capital letters are ~ 30 px in height
image_2 = cv2.resize(image_2,(int(w_2*50/h_2),50))

# image binarization
ret,image_2 = cv2.threshold(image_2,150,cv2.THRESH_BINARY)

# save image to file
cv2.imwrite('processed_' + image_file,image_2)

# tesseract part can be commented out
import PyTesseract
config_7 = ("-c tessedit_char_whitelist=0123456789AB --oem 1 --psm 7")
text = PyTesseract.image_to_string(image_2,config=config_7)
print("OCR TEXT: " + "{}\n".format(text))

问题在于箭头中的文本从未居中。有时我会使用上述方法删除部分文字(例如,图50A中的图片)。

在图像处理中是否有一种方法可以更优雅地摆脱箭头?例如使用轮廓检测​​和删除?我对OpenCV部分比对tesseract部分更感兴趣,以便于识别文本。

感谢您的帮助。

解决方法

如果查看图片,将会看到图像中有一个白色箭头,这也是最大的轮廓(尤其是在图像上绘制黑色边框时)。如果制作空白蒙版并绘制箭头(图像上最大的轮廓),然后稍微腐蚀一下,则可以对实际图像和侵蚀的蒙版进行逐元素逐位结合。如果不清楚,请查看底部的代码和注释,您会发现它实际上非常简单。

# imports
import cv2
import numpy as np

img = cv2.imread("number.png")  # read image
# you can resize the image here if you like - it should still work for both sizes
h,w = img.shape[:2]  # get the actual images height and width
img = cv2.resize(img,(int(w*320/h),320))
h,w = img.shape[:2]

gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)  # transform to grayscale
thresh = cv2.threshold(gray,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]  # perform OTSU threhold
cv2.rectangle(thresh,(0,0),(w,h),2)
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)[0]  # search for contours
max_cnt = max(contours,key=cv2.contourArea)  # select biggest one
mask = np.zeros((h,w),dtype=np.uint8)  # create a black mask
cv2.drawContours(mask,[max_cnt],-1,(255,255),-1)  # draw biggest contour on the mask
kernel = np.ones((15,15),dtype=np.uint8)  # make a kernel with appropriate values - in both cases (resized and original) 15 is ok
erosion = cv2.erode(mask,kernel,iterations=1)  # erode the mask with given kernel

reverse = cv2.bitwise_not(img.copy())  # reversed image of the actual image 0 becomes 255 and 255 becomes 0
img = cv2.bitwise_and(reverse,reverse,mask=erosion)  # per-element bit-wise conjunction of the actual image and eroded mask (erosion)
img = cv2.bitwise_not(img)  # revers the image again

# save image to file and display
cv2.imwrite("res.png",img)
cv2.imshow("img",img)
cv2.waitKey(0)
cv2.destroyAllWindows()

结果:

enter image description here

,

您可以尝试使用简单的Python脚本:

import cv2
import numpy as np
img = cv2.imread('mmubS.png',cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img,200,cv2.THRESH_BINARY_INV )[1]
im_flood_fill = thresh.copy()
h,w = thresh.shape[:2]
im_flood_fill=cv2.rectangle(im_flood_fill,(w-1,h-1),2)
mask = np.zeros((h + 2,w + 2),np.uint8)
cv2.floodFill(im_flood_fill,mask,0)
im_flood_fill = cv2.bitwise_not(im_flood_fill)
cv2.imshow('clear text',im_flood_fill)
cv2.imwrite('text.png',im_flood_fill)

结果: enter image description here