问题描述
我在 this repository 上运行代码,运行检测功能时太慢了。
在实时视频中看起来不自然,并且视频被切断了。
我猜计算需要很长时间,我们如何解决?
GPU 使用 Geforce RTX 3090。
预先感谢您的帮助。
---------detect.py------------
import cv2
import numpy as np
import mtcnn
from architecture import *
from train_v2 import normalize,l2_normalizer
from scipy.spatial.distance import cosine
from tensorflow.keras.models import load_model
import pickle
import time
confidence_t=0.99
recognition_t=0.5
required_size = (160,160)
def get_face(img,Box):
x1,y1,width,height = Box
x1,y1 = abs(x1),abs(y1)
x2,y2 = x1 + width,y1 + height
face = img[y1:y2,x1:x2]
return face,(x1,y1),(x2,y2)
def get_encode(face_encoder,face,size):
face = normalize(face)
face = cv2.resize(face,size)
encode = face_encoder.predict(np.expand_dims(face,axis=0))[0]
return encode
def load_pickle(path):
with open(path,'rb') as f:
encoding_dict = pickle.load(f)
return encoding_dict
def detect(img,detector,encoder,encoding_dict):
img_rgb = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
results = detector.detect_faces(img_rgb)
for res in results:
if res['confidence'] < confidence_t:
continue
face,pt_1,pt_2 = get_face(img_rgb,res['Box'])
encode = get_encode(encoder,required_size)
encode = l2_normalizer.transform(encode.reshape(1,-1))[0]
name = 'unkNown'
distance = float("inf")
for db_name,db_encode in encoding_dict.items():
dist = cosine(db_encode,encode)
if dist < recognition_t and dist < distance:
name = db_name
distance = dist
if name == 'unkNown':
cv2.rectangle(img,pt_2,(0,255),2)
cv2.putText(img,name,cv2.FONT_HERShey_SIMPLEX,1,1)
else:
cv2.rectangle(img,255,0),name + f'__{distance:.2f}',(pt_1[0],pt_1[1] - 5),200,200),2)
return img
if __name__ == "__main__":
required_shape = (160,160)
face_encoder = InceptionresnetV2()
path_m = "facenet_keras_weights.h5"
face_encoder.load_weights(path_m)
encodings_path = 'encodings/encodings.pkl'
face_detector = mtcnn.MTCNN()
encoding_dict = load_pickle(encodings_path)
cap = cv2.VideoCapture("http://192.168.0.2:8081/?action=stream")
prev_time = 0
FPS = 10
while cap.isOpened():
ret,frame = cap.read()
if not ret:
print("CAM NOT OPEND")
break
current_time = time.time() - prev_time
if (ret is True) and (current_time > 1./ FPS) :
prev_time = time.time()
frame= detect(frame,face_detector,face_encoder,encoding_dict)
print("detect")
cv2.imshow('camera',frame)
print("show")
if cv2.waitKey(1) & 0xFF == ord('q'):
break
-----------train_v2.py-----------
from architecture import *
import os
import cv2
import mtcnn
import pickle
import numpy as np
from sklearn.preprocessing import normalizer
from tensorflow.keras.models import load_model
######pathsandvairables#########
face_data = 'face/'
required_shape = (160,160)
face_encoder = InceptionresnetV2()
path = "facenet_keras_weights.h5"
face_encoder.load_weights(path)
face_detector = mtcnn.MTCNN()
encodes = []
encoding_dict = dict()
l2_normalizer = normalizer('l2')
###############################
def normalize(img):
mean,std = img.mean(),img.std()
return (img - mean) / std
for face_names in os.listdir(face_data):
person_dir = os.path.join(face_data,face_names)
for image_name in os.listdir(person_dir):
image_path = os.path.join(person_dir,image_name)
img_BGR = cv2.imread(image_path)
img_RGB = cv2.cvtColor(img_BGR,cv2.COLOR_BGR2RGB)
x = face_detector.detect_faces(img_RGB)
x1,height = x[0]['Box']
x1,abs(y1)
x2,y2 = x1+width,y1+height
face = img_RGB[y1:y2,x1:x2]
face = normalize(face)
face = cv2.resize(face,required_shape)
face_d = np.expand_dims(face,axis=0)
encode = face_encoder.predict(face_d)[0]
encodes.append(encode)
if encodes:
encode = np.sum(encodes,axis=0 )
encode = l2_normalizer.transform(np.expand_dims(encode,axis=0))[0]
encoding_dict[face_names] = encode
path = 'encodings/encodings.pkl'
with open(path,'wb') as file:
pickle.dump(encoding_dict,file)
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)