如何将 RTSP 视频输入放入 OpenCV

问题描述

我正在 Ubuntu 18.04 中使用 OpenVINO 设置 PPE 检测 module。虽然视频输入与我的网络摄像头 dev/video/0 配合良好,但我希望它可以更改为 RTSP 输入。每当我将 RTSP Url 放入 config.json 时,它都不起作用并显示 Either wrong input path or empty line is found. Please check the conf.json file

这里是 main.py

#!/usr/bin/env python3


from __future__ import print_function
import sys
import os
import cv2
import numpy as np
from argparse import ArgumentParser
import datetime
import json
from inference import Network

# Global vars
cpu_extension = ''
conf_modelLayers = ''
conf_modelWeights = ''
conf_safety_modelLayers = ''
conf_safety_modelWeights = ''
targetDevice = "cpu"
conf_batchSize = 1
conf_modelPersonLabel = 1
conf_inferConfidenceThreshold = 0.7
conf_inFrameViolationsThreshold = 19
conf_inFramePeopleThreshold = 5
use_safety_model = False
padding = 30
viol_wk = 0
acceptedDevices = ['cpu','GPU','MYRIAD','HETERO:FPGA,cpu','HDDL']
videos = []
name_of_videos = []
CONfig_FILE = '../resources/config.json'
is_async_mode = True


class Video:
    def __init__(self,idx,path):
        if path.isnumeric():
            self.video = cv2.VideoCapture(int(path))
            self.name = "Cam " + str(idx)
        else:
            if os.path.exists(path):
                self.video = cv2.VideoCapture("rtsp://edwin:Passw0rd@192.168.0.144:554/cam/realmonitor?channel=1&subtype=1")
                self.name = "Video " + str(idx)
            else:
                print("Either wrong input path or empty line is found. Please check the conf.json file")
                exit(21)
        if not self.video.isOpened():
            print("Couldn't open video: " + path)
            sys.exit(20)
        self.height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH))

        self.currentViolationCount = 0
        self.currentViolationCountConfidence = 0
        self.prevViolationCount = 0
        self.totalViolations = 0
        self.totalPeopleCount = 0
        self.currentPeopleCount = 0
        self.currentPeopleCountConfidence = 0
        self.prevPeopleCount = 0
        self.currentTotalPeopleCount = 0

        cv2.namedWindow(self.name,cv2.WINDOW_norMAL)
        self.frame_start_time = datetime.datetime.Now()


def get_args():
    """
    Parses the argument.
    :return: None
    """
    global is_async_mode
    parser = ArgumentParser()
    parser.add_argument("-d","--device",help="Specify the target device to infer on; cpu,GPU,"
                             "FPGA,MYRIAD or HDDL is acceptable. Application will"
                             "look for a suitable plugin for device specified"
                             " (cpu by default)",type=str,required=False)
    parser.add_argument("-m","--model",help="Path to an .xml file with a trained model's"
                             " weights.",required=True,type=str)
    parser.add_argument("-sm","--safety_model",required=False,default=None)
    parser.add_argument("-e","--cpu_extension",help="MKLDNN (cpu)-targeted custom layers. Absolute "
                             "path to a shared library with the kernels impl",default=None)
    parser.add_argument("-f","--flag",help="sync or async",default="async",type=str)

    args = parser.parse_args()

    global conf_modelLayers,conf_modelWeights,conf_safety_modelLayers,conf_safety_modelWeights,\
        targetDevice,cpu_extension,videos,use_safety_model
    if args.model:
        conf_modelLayers = args.model
        conf_modelWeights = os.path.splitext(conf_modelLayers)[0] + ".bin"
    if args.safety_model:
        conf_safety_modelLayers = args.safety_model
        conf_safety_modelWeights = os.path.splitext(conf_safety_modelLayers)[0] + ".bin"
        use_safety_model = True
    if args.device:
        targetDevice = args.device
        if "MULTI:" not in targetDevice:
            if targetDevice not in acceptedDevices:
                print("Selected device,%s not supported." % (targetDevice))
                sys.exit(12)
    if args.cpu_extension:
        cpu_extension = args.cpu_extension
    if args.flag == "async":
        is_async_mode = True
        print('Application running in Async mode')
    else:
        is_async_mode = False
        print('Application running in Sync mode')
    assert os.path.isfile(CONfig_FILE),"{} file doesn't exist".format(CONfig_FILE)
    config = json.loads(open(CONfig_FILE).read())
    for idx,item in enumerate(config['inputs']):
        vid = Video(idx,item['video'])
        name_of_videos.append([idx,item['video']])
        videos.append([idx,vid])


def detect_safety_hat(img):
    """
    Detection of the hat of the person.
    :param img: Current frame
    :return: Boolean value of the detected hat
    """
    lowH = 15
    lowS = 65
    lowV = 75

    highH = 30
    highS = 255
    highV = 255

    crop = 0
    height = 15
    perc = 8

    hsv = np.zeros(1)

    try:
        hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    except cv2.error as e:
        print("%d %d %d" % (img.shape))
        print("%d %d %d" % (img.shape))
        print(e)

    threshold_img = cv2.inRange(hsv,(lowH,lowS,lowV),(highH,highS,highV))

    x = 0
    y = int(threshold_img.shape[0] * crop / 100)
    w = int(threshold_img.shape[1])
    h = int(threshold_img.shape[0] * height / 100)
    img_cropped = threshold_img[y: y + h,x: x + w]

    if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
        return False
    return True


def detect_safety_jacket(img):
    """
    Detection of the safety jacket of the person.
    :param img: Current frame
    :return: Boolean value of the detected jacket
    """
    lowH = 0
    lowS = 150
    lowV = 42

    highH = 11
    highS = 255
    highV = 255

    crop = 15
    height = 40
    perc = 23

    hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)

    threshold_img = cv2.inRange(hsv,x: x + w]

    if cv2.countNonZero(threshold_img) < img_cropped.size * perc / 100:
        return False
    return True


def detect_workers(workers,frame):
    """
    Detection of the person with the safety guards.
    :param workers: Total number of the person in the current frame
    :param frame: Current frame
    :return: Total violation count of the person
    """
    violations = 0
    global viol_wk
    for worker in workers:
        xmin,ymin,xmax,ymax = worker
        crop = frame[ymin:ymax,xmin:xmax]
        if 0 not in crop.shape:
            if detect_safety_hat(crop):
                if detect_safety_jacket(crop):
                    cv2.rectangle(frame,(xmin,ymin),(xmax,ymax),(0,255,0),2)
                else:
                    cv2.rectangle(frame,255),2)
                    violations += 1
                    viol_wk += 1

            else:
                cv2.rectangle(frame,2)
                violations += 1
                viol_wk += 1
    return violations


def main():
    """
    Load the network and parse the output.
    :return: None
    """
    get_args()
    global is_async_mode
    nextReq = 1
    currReq = 0
    nextReq_s = 1
    currReq_s = 0
    prevVideo = None
    vid_finished = [False] * len(videos)
    min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])

    # Initialise the class
    infer_network = Network()
    infer_network_safety = Network()
    # Load the network to IE plugin to get shape of input layer
    plugin,(batch_size,channels,model_height,model_width) = \
        infer_network.load_model(conf_modelLayers,targetDevice,1,2,cpu_extension)
    if use_safety_model:
        batch_size_sm,channels_sm,model_height_sm,model_width_sm = \
            infer_network_safety.load_model(conf_safety_modelLayers,plugin)[1]

    while True:
        for index,currVideo in videos:
            # Read image from video/cam
            vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
            for i in range(0,int(round(vfps / min_FPS))):
                ret,current_img = currVideo.video.read()
                if not ret:
                    vid_finished[index] = True
                    break
            if vid_finished[index]:
                stream_end_frame = np.zeros((int(currVideo.height),int(currVideo.width),1),dtype='uint8')
                cv2.putText(stream_end_frame,"Input file {} has ended".format
                (name_of_videos[index][1].split('/')[-1]),(10,int(currVideo.height / 2)),cv2.FONT_HERShey_COMPLEX,(255,2)
                cv2.imshow(currVideo.name,stream_end_frame)
                continue
            # Transform image to person detection model input
            rsImg = cv2.resize(current_img,(model_width,model_height))
            rsImg = rsImg.transpose((2,1))
            rsImg = rsImg.reshape((batch_size,model_width))

            infer_start_time = datetime.datetime.Now()
            # Infer current image
            if is_async_mode:
                infer_network.exec_net(nextReq,rsImg)
            else:
                infer_network.exec_net(currReq,rsImg)
                prevVideo = currVideo
                prevIoUs_img = current_img

            # Wait for prevIoUs request to end
            if infer_network.wait(currReq) == 0:
                infer_end_time = (datetime.datetime.Now() - infer_start_time) * 1000

                in_frame_workers = []

                people = 0
                violations = 0
                hard_hat_detection = False
                vest_detection = False
                result = infer_network.get_output(currReq)
                # Filter output
                for obj in result[0][0]:
                    if obj[2] > conf_inferConfidenceThreshold:
                        xmin = int(obj[3] * prevVideo.width)
                        ymin = int(obj[4] * prevVideo.height)
                        xmax = int(obj[5] * prevVideo.width)
                        ymax = int(obj[6] * prevVideo.height)
                        xmin = int(xmin - padding) if (xmin - padding) > 0 else 0
                        ymin = int(ymin - padding) if (ymin - padding) > 0 else 0
                        xmax = int(xmax + padding) if (xmax + padding) < prevVideo.width else prevVideo.width
                        ymax = int(ymax + padding) if (ymax + padding) < prevVideo.height else prevVideo.height
                        cv2.rectangle(prevIoUs_img,2)
                        people += 1
                        in_frame_workers.append((xmin,ymax))
                        new_frame = prevIoUs_img[ymin:ymax,xmin:xmax]
                        if use_safety_model:

                            # Transform image to safety model input
                            in_frame_sm = cv2.resize(new_frame,(model_width_sm,model_height_sm))
                            in_frame_sm = in_frame_sm.transpose((2,1))
                            in_frame_sm = in_frame_sm.reshape(
                                (batch_size_sm,model_width_sm))

                            infer_start_time_sm = datetime.datetime.Now()
                            if is_async_mode:
                                infer_network_safety.exec_net(nextReq_s,in_frame_sm)
                            else:
                                infer_network_safety.exec_net(currReq_s,in_frame_sm)
                            # Wait for the result
                            infer_network_safety.wait(currReq_s)
                            infer_end_time_sm = (datetime.datetime.Now() - infer_start_time_sm) * 1000

                            result_sm = infer_network_safety.get_output(currReq_s)
                            # Filter output
                            hard_hat_detection = False
                            vest_detection = False
                            detection_list = []
                            for obj_sm in result_sm[0][0]:

                                if (obj_sm[2] > 0.4):
                                    # Detect safety vest
                                    if (int(obj_sm[1])) == 2:
                                        xmin_sm = int(obj_sm[3] * (xmax - xmin))
                                        ymin_sm = int(obj_sm[4] * (ymax - ymin))
                                        xmax_sm = int(obj_sm[5] * (xmax - xmin))
                                        ymax_sm = int(obj_sm[6] * (ymax - ymin))
                                        if vest_detection == False:
                                            detection_list.append(
                                                [xmin_sm + xmin,ymin_sm + ymin,xmax_sm + xmin,ymax_sm + ymin])
                                            vest_detection = True

                                    # Detect hard-hat
                                    if int(obj_sm[1]) == 4:
                                        xmin_sm_v = int(obj_sm[3] * (xmax - xmin))
                                        ymin_sm_v = int(obj_sm[4] * (ymax - ymin))
                                        xmax_sm_v = int(obj_sm[5] * (xmax - xmin))
                                        ymax_sm_v = int(obj_sm[6] * (ymax - ymin))
                                        if hard_hat_detection == False:
                                            detection_list.append([xmin_sm_v + xmin,ymin_sm_v + ymin,xmax_sm_v + xmin,ymax_sm_v + ymin])
                                            hard_hat_detection = True

                            if hard_hat_detection is False or vest_detection is False:
                                violations += 1
                            for _rect in detection_list:
                                cv2.rectangle(current_img,(_rect[0],_rect[1]),(_rect[2],_rect[3]),2)
                            if is_async_mode:
                                currReq_s,nextReq_s = nextReq_s,currReq_s

                        # Use OpenCV if worker-safety-model is not provided
                        else:
                            violations = detect_workers(in_frame_workers,prevIoUs_img)

                # Check if detected violations equals prevIoUs frames
                if violations == prevVideo.currentViolationCount:
                    prevVideo.currentViolationCountConfidence += 1

                    # If frame threshold is reached,change validated count
                    if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:

                        # If another violation occurred,save image
                        if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
                            prevVideo.totalViolations += (
                                        prevVideo.currentViolationCount - prevVideo.prevViolationCount)
                        prevVideo.prevViolationCount = prevVideo.currentViolationCount
                else:
                    prevVideo.currentViolationCountConfidence = 0
                    prevVideo.currentViolationCount = violations

                # Check if detected people count equals prevIoUs frames
                if people == prevVideo.currentPeopleCount:
                    prevVideo.currentPeopleCountConfidence += 1

                    # If frame threshold is reached,change validated count
                    if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
                        prevVideo.currentTotalPeopleCount += (
                                prevVideo.currentPeopleCount - prevVideo.prevPeopleCount)
                        if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
                            prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
                        prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
                else:
                    prevVideo.currentPeopleCountConfidence = 0
                    prevVideo.currentPeopleCount = people

                frame_end_time = datetime.datetime.Now()
                cv2.putText(prevIoUs_img,'Total people count: ' + str(
                    prevVideo.totalPeopleCount),prevVideo.height - 10),cv2.FONT_HERShey_SIMPLEX,2)
                cv2.putText(prevIoUs_img,'Current people count: ' + str(
                    prevVideo.currentTotalPeopleCount),prevVideo.height - 40),'Total violation count: ' + str(
                    prevVideo.totalViolations),prevVideo.height - 70),'FPS: %0.2fs' % (1 / (
                        frame_end_time - prevVideo.frame_start_time).total_seconds()),prevVideo.height - 100),"Inference time: N\A for async mode" if is_async_mode else \
                    "Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),prevVideo.height - 130),2)

                cv2.imshow(prevVideo.name,prevIoUs_img)
                prevVideo.frame_start_time = datetime.datetime.Now()
            # Swap
            if is_async_mode:
                currReq,nextReq = nextReq,currReq
                prevIoUs_img = current_img
                prevVideo = currVideo
            if cv2.waitKey(1) == 27:
                print("Attempting to stop input files")
                infer_network.clean()
                infer_network_safety.clean()
                cv2.destroyAllWindows()
                return

        if False not in vid_finished:
            infer_network.clean()
            infer_network_safety.clean()
            cv2.destroyAllWindows()
            break


if __name__ == '__main__':
    main()

这是配置文件

{
     "inputs": [
        {
           "video": "rtsp://xxx:xxx@192.168.0.144:554/cam/realmonitor?channel=1&subtype=1"
        }
     ]
   }

解决方法

这是因为行if os.path.exists(path):。此 if 条件检查 path 是否指向现有文件。您的 RTSP 流不是文件,它会导致您的错误。

例如,您可以将此条件修改为:

if os.path.exists(path) or path.startswith("rtsp"):

顺便说一下,您在代码中硬编码了 rtsp 流地址,因此它不会使用您配置的路径。您可能想用 path 替换硬编码路径。