使用pybind11和pytorch在C ++中运行python时出现无效的指针错误

问题描述

在使用pybind11,pytorch 1.6.0在C ++中运行以下python代码时,出现“无效指针”错误。在python中,代码成功运行,没有任何错误。什么原因?我该如何解决这个问题?

import torch
import torch.nn.functional as F
import numpy as np
import cv2
import torchvision
import eval_widerface
import torchvision_model

def resize(image,size):
    image = F.interpolate(image.unsqueeze(0),size=size,mode="nearest").squeeze(0)
    return image    

# define constants
model_path = '/path/to/model.pt'
image_path = '/path/to/image_pad.jpg'
scale = 1.0 #Image resize scale (2 for half size)
font = cv2.FONT_HERShey_SIMPLEX

MIN_score = 0.9
image_bgr = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image_bgr,cv2.COLOR_BGR2RGB)#skimage.io.imread(args.image_path)
cv2.imshow("input image",image_bgr)
cv2.waitKey()
cv2.destroyAllWindows()

# load pre-trained model
return_layers = {'layer2':1,'layer3':2,'layer4':3}
RetinaFace = torchvision_model.create_retinaface(return_layers)

print('RetinaFace.state_dict().')
retina_dict = RetinaFace.state_dict()

以下功能会产生错误

def create_retinaface(return_layers,backbone_name='resnet50',anchors_num=3,pretrained=True):
    print('In create_retinaface.')
    print(resnet.__dict__)
    backbone = resnet.__dict__[backbone_name](pretrained=pretrained)
    print('backbone.')
    # freeze layer1
    for name,parameter in backbone.named_parameters():
        print('freeze layer 1.');
        # if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
        #     parameter.requires_grad_(False)
        if name == 'conv1.weight':
            # print('freeze first conv layer...')
            parameter.requires_grad_(False)
model = RetinaFace(backbone,return_layers,anchor_nums=3)

return model

语句backbone = resnet.__dict__ [backbone_name](pretrained=pretrained)生成错误看起来像

*** Error in `./p': munmap_chunk(): invalid pointer: 0x00007f4461866db0 ***
======= Backtrace: =========
/usr/lib64/libc.so.6(+0x7f3e4)[0x7f44736b43e4]
/usr/local/lib64/libopencv_gapi.so.4.1(_ZNSt10_HashtableISsSsSaISsENSt8__detail9_IdentityESt8equal_toISsESt4hashISsENS1_18_Mod_range_hashingENS1_20_Default_ranged_hashENS1_20_Prime_rehash_policyENS1_17_Hashtable_traitsILb1ELb1ELb1EEEE21_M_insert_unique_nodeemmPNS1_10_Hash_nodeISsLb1EEE+0xc9)[0x7f4483dee1a9]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x4403b5)[0x7f4460bb73b5]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x44570a)[0x7f4460bbc70a]
/home/20face/.virtualenvs/torch/lib64/python3.6/site-packages/torch/lib/libtorch_python.so(+0x275b20)[0x7f44609ecb20]
/usr/lib64/libpython3.6m.so.1.0(_PyCFunction_FastCallDict+0x147)[0x7f4474307167]
/usr/lib64/libpython3.6m.so.1.0(+0x1507df)[0x7f44743727df]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x3a7)[0x7f44743670f7]
/usr/lib64/libpython3.6m.so.1.0(+0x1505ca)[0x7f44743725ca]
/usr/lib64/libpython3.6m.so.1.0(+0x150903)[0x7f4474372903]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x3a7)[0x7f44743670f7]
/usr/lib64/libpython3.6m.so.1.0(+0x14fb69)[0x7f4474371b69]
/usr/lib64/libpython3.6m.so.1.0(_PyFunction_FastCallDict+0x24f)[0x7f44743739ff]
/usr/lib64/libpython3.6m.so.1.0(_PyObject_FastCallDict+0x10e)[0x7f44742ca1de]
/usr/lib64/libpython3.6m.so.1.0(_PyObject_Call_Prepend+0x61)[0x7f44742ca2f1]
/usr/lib64/libpython3.6m.so.1.0(PyObject_Call+0x43)[0x7f44742c9f63]
/usr/lib64/libpython3.6m.so.1.0(+0xfa7e5)[0x7f447431c7e5]
/usr/lib64/libpython3.6m.so.1.0(+0xf71e2)[0x7f44743191e2]
/usr/lib64/libpython3.6m.so.1.0(PyObject_Call+0x43)[0x7f44742c9f63]
/usr/lib64/libpython3.6m.so.1.0(_PyEval_EvalFrameDefault+0x2067)[0x7f4474368db7]
/usr/lib64/libpython3.6m.so.1.0(PyEval_EvalCodeEx+0x24f)[0x7f4474372c9f]

解决方法

此行导致错误,因为它假设__dict__有一个backbone_name元素:

backbone = resnet.__dict__[backbone_name](pretrained=pretrained)

如果不是这种情况,则基本上会尝试访问无效的内存。首先使用__dict__语句检查if或在使用前确保它具有backbone_name元素。