如何将 filter_detection(tf.image.non_max_suppression) 的输出转换为 numpy nd 数组

问题描述

我在自定义图像数据集上使用 EfficientDet 模型进行对象检测。我正在尝试在 Jetson 上进行部署，但目前 Jetson 上的 cuda 库不支持 NMS 功能。因此我不得不在后处理管道中实现 nms 功能。 nms 函数的输出是张量。我曾尝试使用 eval()、numpy() 和 session 转换它们，但它们不起作用。这是下面的代码。

def filter_detections(
        Boxes,classification,alphas=None,ratios=None,class_specific_filter=True,nms=True,score_threshold=0.01,max_detections=100,nms_threshold=0.5,detect_quadrangle=False,):
    """
    Filter detections using the Boxes and classification values.
    Args
        Boxes: Tensor of shape (num_Boxes,4) containing the Boxes in (x1,y1,x2,y2) format.
        classification: Tensor of shape (num_Boxes,num_classes) containing the classification scores.
        other: List of tensors of shape (num_Boxes,...) to filter along with the Boxes and classification scores.
        class_specific_filter: Whether to perform filtering per class,or take the best scoring class and filter those.
        nms: Flag to enable/disable non maximum suppression.
        score_threshold: Threshold used to prefilter the Boxes with.
        max_detections: Maximum number of detections to keep.
        nms_threshold: Threshold for the IoU value to determine when a Box should be suppressed.
    Returns
        A list of [Boxes,scores,labels,other[0],other[1],...].
        Boxes is shaped (max_detections,4) and contains the (x1,y2) of the non-suppressed Boxes.
        scores is shaped (max_detections,) and contains the scores of the predicted class.
        labels is shaped (max_detections,) and contains the predicted label.
        other[i] is shaped (max_detections,...) and contains the filtered other[i] data.
        In case there are less than max_detections detections,the tensors are padded with -1's.
    """

    def _filter_detections(scores_,labels_):
        # threshold based on score
        # (num_score_keeps,1)
        indices_ = tf.where(keras.backend.greater(scores_,score_threshold))

        if nms:
            # (num_score_keeps,4)
            filtered_Boxes = tf.gather_nd(Boxes,indices_)
            # In [4]: scores = np.array([0.1,0.5,0.4,0.2,0.7,0.2])
            # In [5]: tf.greater(scores,0.4)
            # Out[5]: <tf.Tensor: id=2,shape=(6,),dtype=bool,numpy=array([False,True,False,False])>
            # In [6]: tf.where(tf.greater(scores,0.4))
            # Out[6]:
            # <tf.Tensor: id=7,shape=(2,1),dtype=int64,numpy=
            # array([[1],#        [4]])>
            #
            # In [7]: tf.gather(scores,tf.where(tf.greater(scores,0.4)))
            # Out[7]:
            # <tf.Tensor: id=15,dtype=float64,numpy=
            # array([[0.5],#        [0.7]])>
            filtered_scores = keras.backend.gather(scores_,indices_)[:,0]

            # perform NMS
            # filtered_Boxes = tf.concat([filtered_Boxes[...,1:2],filtered_Boxes[...,0:1],#                             filtered_Boxes[...,3:4],2:3]],axis=-1)
            nms_indices = tf.image.non_max_suppression(filtered_Boxes,filtered_scores,max_output_size=max_detections,IoU_threshold=nms_threshold)

            # filter indices based on NMS
            # (num_score_nms_keeps,1)
            indices_ = keras.backend.gather(indices_,nms_indices)

        # add indices to list of all indices
        # (num_score_nms_keeps,)
        labels_ = tf.gather_nd(labels_,indices_)
        # (num_score_nms_keeps,2)
        indices_ = keras.backend.stack([indices_[:,0],labels_],axis=1)
        return indices_

    if class_specific_filter:
        all_indices = []
        # perform per class filtering
        for c in range(int(classification.shape[1])):
            scores = classification[:,c]
            labels = c * tf.ones((keras.backend.shape(scores)[0],dtype='int64')
            all_indices.append(_filter_detections(scores,labels))

        # concatenate indices to single tensor
        # (concatenated_num_score_nms_keeps,2)
        indices = keras.backend.concatenate(all_indices,axis=0)
    else:
        scores = keras.backend.max(classification,axis=1)
        labels = keras.backend.argmax(classification,axis=1)
        indices = _filter_detections(scores,labels)

    # select top k
    scores = tf.gather_nd(classification,indices)
    labels = indices[:,1]
    scores,top_indices = tf.nn.top_k(scores,k=keras.backend.minimum(max_detections,keras.backend.shape(scores)[0]))

    # filter input using the final set of indices
    indices = keras.backend.gather(indices[:,top_indices)
    Boxes = keras.backend.gather(Boxes,indices)
    labels = keras.backend.gather(labels,top_indices)

    # zero pad the outputs
    pad_size = keras.backend.maximum(0,max_detections - keras.backend.shape(scores)[0])
    Boxes = tf.pad(Boxes,[[0,pad_size],[0,0]],constant_values=-1)
    scores = tf.pad(scores,pad_size]],constant_values=-1)
    labels = tf.pad(labels,constant_values=-1)
    labels = keras.backend.cast(labels,'int32')

    # set shapes,since we kNow what they are
    Boxes.set_shape([max_detections,4])
    scores.set_shape([max_detections])
    labels.set_shape([max_detections])

    if detect_quadrangle:
        alphas = keras.backend.gather(alphas,indices)
        ratios = keras.backend.gather(ratios,indices)
        alphas = tf.pad(alphas,constant_values=-1)
        ratios = tf.pad(ratios,constant_values=-1)
        alphas.set_shape([max_detections,4])
        ratios.set_shape([max_detections])
        return [Boxes,alphas,ratios,labels]
    else:
        return [Boxes,labels]


class FilterDetections(keras.layers.Layer):
    """
    Keras layer for filtering detections using score threshold and NMS.
    """

    def __init__(
            self,parallel_iterations=32,**kwargs
    ):
        """
        Filters detections using score threshold,NMS and selecting the top-k detections.
        Args
            nms: Flag to enable/disable NMS.
            class_specific_filter: Whether to perform filtering per class,or take the best scoring class and filter those.
            nms_threshold: Threshold for the IoU value to determine when a Box should be suppressed.
            score_threshold: Threshold used to prefilter the Boxes with.
            max_detections: Maximum number of detections to keep.
            parallel_iterations: Number of batch items to process in parallel.
        """
        self.nms = nms
        self.class_specific_filter = class_specific_filter
        self.nms_threshold = nms_threshold
        self.score_threshold = score_threshold
        self.max_detections = max_detections
        self.parallel_iterations = parallel_iterations
        self.detect_quadrangle = detect_quadrangle
        super(FilterDetections,self).__init__(**kwargs)

    def call(self,inputs,**kwargs):
        """
        Constructs the NMS graph.
        Args
            inputs : List of [Boxes,...] tensors.
        """
        Boxes = inputs[0]
        classification = inputs[1]
        if self.detect_quadrangle:
            alphas = inputs[2]
            ratios = inputs[3]

        # wrap nms with our parameters
        def _filter_detections(args):
            Boxes_ = args[0]
            classification_ = args[1]
            alphas_ = args[2] if self.detect_quadrangle else None
            ratios_ = args[3] if self.detect_quadrangle else None

            return filter_detections(
                Boxes_,classification_,alphas_,ratios_,nms=self.nms,class_specific_filter=self.class_specific_filter,score_threshold=self.score_threshold,max_detections=self.max_detections,nms_threshold=self.nms_threshold,detect_quadrangle=self.detect_quadrangle,)

        # call filter_detections on each batch item
        if self.detect_quadrangle:
            outputs = tf.map_fn(
                _filter_detections,elems=[Boxes,ratios],dtype=['float32','float32','int32'],parallel_iterations=self.parallel_iterations
            )
        else:
            outputs = tf.map_fn(
                _filter_detections,classification],parallel_iterations=self.parallel_iterations
            )
        return outputs

    def compute_output_shape(self,input_shape):
        """
        Computes the output shapes given the input shapes.
        Args
            input_shape : List of input shapes [Boxes,classification].
        Returns
            List of tuples representing the output shapes:
            [filtered_Boxes.shape,filtered_scores.shape,filtered_labels.shape,filtered_other[0].shape,filtered_other[1].shape,...]
        """
        if self.detect_quadrangle:
            return [
                (input_shape[0][0],self.max_detections,4),(input_shape[1][0],self.max_detections),]
        else:
            return [
                (input_shape[0][0],]

    def compute_mask(self,mask=None):
        """
        This is required in Keras when there is more than 1 output.
        """
        return (len(inputs) + 1) * [None]

    def get_config(self):
        """
        Gets the configuration of this layer.
        Returns
            Dictionary containing the parameters of this layer.
        """
        config = super(FilterDetections,self).get_config()
        config.update({
            'nms': self.nms,'class_specific_filter': self.class_specific_filter,'nms_threshold': self.nms_threshold,'score_threshold': self.score_threshold,'max_detections': self.max_detections,'parallel_iterations': self.parallel_iterations,})

        return config

推理文件如下：

import tensorflow as tf
import numpy as np
import cv2
import os
from contextlib import redirect_stdout
import glob
import nms_2 as nm
from tensorflow import keras
from nms import FilterDetections
import time
#from utils import preprocess_image
from tensorflow.python.platform import gfile

def postprocess_Boxes(Boxes,scale,height,width):
    scale_x = 768/width
    scale_y = 768/height
    Boxes[:,0] = Boxes[:,0]/scale_x
    Boxes[:,1] = Boxes[:,1]/scale_y
    Boxes[:,2] = Boxes[:,2]/scale_x
    Boxes[:,3] = Boxes[:,3]/scale_y
    return Boxes

def preprocess_image(image,image_size):
    image = cv2.resize(image,(768,768))
    image = image.astype(np.float32)
    image /= 255.
    return image,0.2

def get_frozen_graph(graph_file):
    with tf.gfile.FastGFile(graph_file,"rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def,name='')
    return graph

def main():
    phi = 2
    model_path = '/EfficientDet/checkpoints/model.pb'
    image_sizes = (512,640,768,896,1024,1280,1408)
    image_size = image_sizes[phi]
    classes = ['case',]
    num_classes = len(classes)
    score_threshold = 0.5
    colors = [np.random.randint(0,256,3).tolist() for i in range(num_classes)]

    output_names = {
            'output_Boxes': 'clipped_Boxes/stack:0',#'output_scores': 'filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0','output_class': 'classification/concat:0'
    }

    common_config = tf.ConfigProto()
    common_config.gpu_options.allow_growth = True
    # common_config.gpu_options.per_process_gpu_memory_fraction = 0.4
    graph = get_frozen_graph(model_path)
    sess = tf.Session(config=common_config,graph=graph)

    output_Boxes = sess.graph.get_tensor_by_name(output_names["output_Boxes"])
    #output_scores = sess.graph.get_tensor_by_name(output_names['output_scores'])
    output_class = sess.graph.get_tensor_by_name(output_names['output_class'])
    
    frames_path = '/images/*.jpg'
    #cap = cv2.VideoCapture(video_path)
    for img in glob.glob(frames_path):
        st = time.time()
        image = cv2.imread(img)
        #if not ret:
         #   break
        image = image[:,:,::-1]
        h,w = image.shape[:2]
        
        image,scale = preprocess_image(image,image_size=image_size)
        image_batch = np.expand_dims(image,axis=0)
        
       # print("Image time : ",(time.time() - st))
        start = time.time()
        Feed_dict = {"input_1:0": image_batch}
        Boxes,classes = sess.run([output_Boxes,output_class],Feed_dict)


       # Boxes = np.squeeze(Boxes)
       # classes = classes)
       # print(classes)
       # print(Boxes)
        tensor1,tensor2,tensor3 = FilterDetections(score_threshold=score_threshold)([Boxes,classes])
        
       #**This part is not working as I am unable to convert the tesnors**
       # a,b,c = sess.run([tensor1,tensor3],Feed_dict)
       # print(tensor1,tensor3)
       # print(tensor1)
       # print(tf.make_ndarray(tensor2.op.get_attr('value')))
       # print("Model time : ",(time.time() - start))
        Boxes = postprocess_Boxes(Boxes=Boxes,scale=0.2,height=h,width=w)

       # print(Boxes)

        # select indices which have a score above the threshold
        #indices = np.where(scores[:] > score_threshold)[0]

        # select those detections
        #Boxes = Boxes[indices]
        #labels = labels[indices]
       # print("Total Time : ",(time.time() - st))

if __name__ == '__main__':
    main()

我试图将盒子作为 nd 数组，但我得到了张量，我无法转换它们，因为模型图没有 nms 层。我需要转换方面的帮助。

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

non-maximum-suppression python tensorflow1.15