问题描述
我正在从事一个小型个人项目,以自动从不同的测量设备(没有数字输出)采集数据;万用表或发光表。 Example of the input image. 我已经阅读了一些关于该主题的教程和论坛帖子。即:this post from stackowerflow 和 this tutorial。
有两个主要问题:
-
寻找显示投资回报率
-
OCR
我暂时跳过了第一点,因为简单的canny>>contours>>aproxpoly在所有情况下都不起作用。在某些情况下,最大的元素不是显示器,而是发光计本身或图像中的其他东西。
我主要处理的是从显示器中分割数字及其识别。 分割是通过使用自适应阈值对图像本身进行预处理并从图像中去除一些小的连通分量来解决的。分割数字后,我尝试应用上述教程中使用的相同方法。我添加了一些额外的单个数字预处理,以防它们第一次无法识别(旋转 +-5 度)。
此时,为预处理和分割方法调整单个参数变得相当复杂,这些方法可以同时在多个设备上运行。
我在寻求建议,是仅使用计算机视觉技术将这个项目推到最后,还是应该制作某种混合机器学习/CV 解决方案(例如训练 CNN 以识别单个数字或用于在图像中查找显示的 YOLO 等)?
PS:我已经尝试过 tesseract,但是在它无法识别单个数字并阅读它的文档之后 - 这解释说,这个网络更适合识别文本或单词块,而不是图像中某处的单个数字- 我放弃了。 请注意,相机和测量设备的相对位置或单个设备的显示不保证,我会添加一些额外的照片。
还有我的代码:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
## MAKE THE PROCESS MORE 'VERBOSE'
show_images=True
#checking active 7 segmentDP segments in digit_roi
def search4segments(digit_roi):
dig_H,dig_W = digit_roi.shape
seg_short_side= dig_W // 4
# define a set of tuples for creating the mask for the 7 segments
segments = [
((0,0),(seg_short_side,dig_W)),# top
((0,(dig_H // 2,seg_short_side)),# top-left
((0,dig_W - seg_short_side),# top-right
((dig_H // 2 - seg_short_side // 2,(dig_H // 2 + seg_short_side // 2,# center
((dig_H // 2,(dig_H,# bottom-left
((dig_H // 2,# bottom-right
((dig_H - seg_short_side,dig_W)) # bottom
]
# set of activated segments
on = [0] * len(segments)
# loop over the individual segments
for (i,((xA,yA),(xB,yB))) in enumerate(segments):
# extract the segment ROI,count the total number of
# thresholded pixels in the segment,and then compute
# the area of the segment
segROI = digit[xA:xB,yA:yB]
total = cv2.countNonZero(segROI)
area = (xB - xA) * (yB - yA)
# if the total number of non-zero pixels is greater than
# 40% of the area,mark the segment as "on"
if total / float(area + 1e-1) > 0.5:
on[i] = 1
return on
#function for rotating an image by other angles than 90,180 or 270
def rotate_image(image,angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center,angle,1.0)
result = cv2.warpAffine(image,rot_mat,image.shape[1::-1],flags=cv2.INTER_LINEAR)
return result
#trimmean function deFinition
def trimmean(arr,percent):
n = len(arr)
k = int(round(n*(float(percent)/100)/2))
return np.mean(arr[k+1:n-k])
#read in the image paths to a list
current_dir=os.getcwd()
relative_imdir=r'\RealWorldImages'
img_paths=[os.path.join(current_dir+relative_imdir,path) for path in os.listdir(current_dir+relative_imdir)]
img=cv2.imread(img_paths[1],0)
#blurr the imag to get rid of high frequency noise
cv2.medianBlur(img,5,img)
#OPTIONAL: resize the image,so it can be displayed with cv2.imshow for ROI selection recommended for large images,which would not fit on the display screen
img=cv2.resize(img,(int(img.shape[1]/1.2),int(img.shape[0]/1.2)))
#Manual selection of ROI
roi = cv2.selectROI(img)
cv2.destroyAllWindows()
#ROI image
img_roi = img[int(roi[1]):int(roi[1] + roi[3]),int(roi[0]):int(roi[0] + roi[2])]
#kernel size for adaptive thresholding (depending on the size of the ROI ->expected No. of digits in ROI 4-6)
ksize=2*(roi[2]//8)+1 #make sure it has odd dimensions
#adaptive tresholding
img_bw=cv2.adaptiveThreshold(img_roi,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,ksize,3)
#getting connected components with stats
img_bw_CC = cv2.connectedComponentsWithStats(img_bw)
#create and populate a matrix with only the relevant information from CC with stats
CC_stats=np.zeros((len(img_bw_CC[2]),4),np.uint32)
CC_stats[:,0]=range(len(img_bw_CC[2]))
CC_stats[:,1]= img_bw_CC[2][:,-1]
CC_stats[:,2:]= img_bw_CC[2][:,2:-1]
#remove all CC,which have a bounding Box that touches the border of the ROI - probably not a digit
relevant_cc_stats =np.delete(CC_stats,\
np.logical_or(np.logical_or(img_bw_CC[2][:,0] == 0,img_bw_CC[2][:,1] == 0),\
np.logical_or(img_bw_CC[2][:,0] + img_bw_CC[2][:,2] == img_roi.shape[1],1] + img_bw_CC[2][:,3] == img_roi.shape[0])),0)
#sort the matrix rows according to the pixelcount of regions (from largest -> smallest)
relevant_cc_stats=relevant_cc_stats[np.argsort(-relevant_cc_stats[:,1])]
#Todo: FIX THE STATISTIC
#trimmed mean of CC sizes (calculated from the middle 40%)
mean=trimmean(relevant_cc_stats[:,1],60)
#standard deviation
stddev=np.std(relevant_cc_stats[1:,1])
#remove all components which are smaller than a third of the mean (NOT QUITE ROBUST)
relevant_cc_stats=relevant_cc_stats[relevant_cc_stats[:,1] > (mean / 4)]# relevant_cc_stats#relevant_cc_stats#relevant_cc_stats[np.bitwise_and(relevant_cc_stats[:,1]<mean+2*stddev,relevant_cc_stats[:,1]>mean-2*stddev)] ###
#creating an empty image to draw the remaining components
masks=np.zeros((img_bw.shape),dtype=np.uint8)
for i in range(relevant_cc_stats.shape[0]):
cv2.bitwise_or(masks,np.array(img_bw_CC[1] == relevant_cc_stats[i,0],dtype=np.uint8),masks)
roi_thinned=cv2.ximgproc.thinning(masks)
lines=cv2.houghlines(roi_thinned,1,np.pi/180,30)
if show_images:
#zobrazeni vysledku predzpracovani
_,axs = plt.subplots(1,2)
axs[0].imshow(img_bw,cmap='binary')
# kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,((2*int(mean/50)+1),(2*int(mean/50)+1)))
# # cv2.morphologyEx(masks,cv2.MORPH_OPEN,kernel,masks)
axs[1].imshow(masks,cmap='binary')
plt.show()
#getting the images projection on the horizontal axis
ver_proj=np.sum(masks,axis=0)
ver_proj=np.sum(masks,axis=0)
#Threshold the vector according to its mean (1/5)
ver_proj[ver_proj<np.mean(ver_proj)/5]=0
ver_proj[ver_proj>=np.mean(ver_proj)]=1
ver_proj=ver_proj.astype(np.bool)
########################################################################
########################################################################
##################### DIGIT SEGMENTATION ###############################
########################################################################
########################################################################
#getting the borders (places where the thresholded projection changes)
borders=[]
for i in range(1,len(ver_proj)):
if ver_proj[i-1] ^ ver_proj[i]:
borders.append(i)
#getting the outermost borders
lengths=list(np.diff(borders))
if len(borders)>2:
if borders[0]>2:
lengths.insert(0,borders[0])
if sum(lengths)<masks.shape[1]:
lengths.append(masks.shape[1]-borders[-1])
else:
raise RuntimeError
#cutting the image to smaller images and storing these in a list
img_segments=[]
for i in range(1,len(borders)):
img_segments.append(masks[:,borders[i-1]:borders[i]])
if show_images:
_,len(img_segments))
for i in range(len(img_segments)):
axs[i].imshow(img_segments[i],cmap='binary')
plt.show()
#distinguishing the digits from the supposedly empty in-between-digit regions
digits=[]
i=0
for i in range(len(img_segments)):
if cv2.countNonZero(img_segments[i])>img_segments[i].size/5:
digits.append(img_segments[i])
if show_images:
_,len(digits))
for i in range(len(digits)):
axs[i].imshow(digits[i],cmap='binary')
plt.show()
#projecting the rows in sub-images to the vertical axis
digits_hor_proj=[np.sum(digit,axis=1) for digit in digits]
#further shrinking the images to get the most tight bounding Boxes
for i in range(len(digits)):
tmp=np.where(digits_hor_proj[i]>np.mean(digits_hor_proj[i]/2))
digits[i]=digits[i][int(tmp[0][0]):int(tmp[0][-1])+1,:]
if show_images:
_,cmap='binary')
plt.show()
digits_closed=[]
for i in range(len(digits)):
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(int(digits[i].shape[1]/5),int(digits[i].shape[1]/5)))
digits_closed.append(cv2.morphologyEx(digits[i],cv2.MORPH_DILATE,kernel))
##########################################################################
##########################################################################
####################### DIGIT RECOGNITION ################################
##########################################################################
##########################################################################
# define the dictionary of digit segments
DIGITS_LOOKUP = {
(1,1): 0,# (0,0): 1,#calculated differently
(1,1): 2,(1,1): 3,(0,0): 4,1): 5,1): 6,0): 7,1): 8,1): 9
}
#dreating an output list for recognised digits
digit_rec=[]
for digit in digits:
dig_H,dig_W = digit.shape
# if dig_H > 3 * dig_W:
# expanded_one = np.zeros((dig_H,3 * dig_W),dtype=np.uint8)
# expanded_one[:,2 * dig_W:] = digit
# digit = expanded_one
# dig_H,dig_W = digit.shape
seg_short_side= dig_W // 3
#slight rotation in case of not recognising the digit on the first try
trials=0
rot_pos=[-5,-3,3,5]
#for recognising a one
#Todo: expand the image with the one,as it is on its original position in the digit space (left side)
if dig_H>3*dig_W and cv2.countNonZero(digit)>0.4*dig_W*dig_H: #second part is commented out,as it fails on skewed ones
digit_rec.append(1)
else:
#if the digit does not fill at least 30% of its space,delate it
if cv2.countNonZero(digit)>0.3*dig_W*dig_H:
pass
else:
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(int(digits[i].shape[1] / 5),int(digits[i].shape[1] / 5)))
cv2.morphologyEx(digits[i],digits[i])
#cycle for multiple rotationary positions
while trials<len(rot_pos):
if trials==0:
on=search4segments(digit)
#lookup the digit and draw it on the image,append None,if the segment was not succesfully recognised
#(but to kNow,that there was something)
try:
digit_rec.append(DIGITS_LOOKUP[tuple(on)])
break
except KeyError:
rot_digit=rotate_image(digit,rot_pos[trials])
on = search4segments(rot_digit)
print(trials)
trials+=1
if trials==4:
digit_rec.append(None)
if show_images:
_,cmap='binary')
axs[i].title.set_text(digit_rec[i])
plt.show()
#Print the result vector
print(digit_rec)
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)