为什么我的代码的分配版本失败了?

问题描述

我正在处理一些Linux驱动程序代码。我对C还是很陌生,只是无法了解我所看到的某些行为。我什至不确定这是标准的C行为还是特定于Linux内核的特质。这是我目前拥有的工作代码的MRE:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/property.h>

#define LOCAL_DEVICE    0
#define LOCAL_PORT      1
#define LOCAL_ENDPOINT  2
#define REMOTE_PORT     3
#define REMOTE_ENDPOINT 4

struct sensor {
    char name[9];
    struct software_node swnodes[6];
    struct property_entry ep_properties[2];
    struct property_entry cio2_properties[2];
};

struct bridge {
    int n_sensors;
    struct sensor sensors[1];
};

static struct software_node remote_device = { "INT343E" };

static struct bridge bridge;

static const struct property_entry remote_endpoints[] = {
    PROPERTY_ENTRY_REF("remote-endpoint",&bridge.sensors[0].swnodes[REMOTE_ENDPOINT]),PROPERTY_ENTRY_REF("remote-endpoint",&bridge.sensors[0].swnodes[LOCAL_ENDPOINT]),};

static void create_fwnode_properties(struct sensor *sensor)
{
    struct property_entry *cio2_properties = sensor->cio2_properties;
    struct property_entry *ep_properties = sensor->ep_properties;

    ep_properties[0] = remote_endpoints[0];
    cio2_properties[0] = remote_endpoints[1];
}

static void create_connection_swnodes(struct sensor *sensor)
{
    sensor->swnodes[LOCAL_DEVICE] = (const struct software_node) { sensor->name };
    sensor->swnodes[LOCAL_PORT] = (const struct software_node) { "port0",&sensor->swnodes[0] };
    sensor->swnodes[LOCAL_ENDPOINT] = (const struct software_node) { "endpoint0",&sensor->swnodes[1],sensor->ep_properties };
    sensor->swnodes[REMOTE_PORT] = (const struct software_node) { "port0",&remote_device };
    sensor->swnodes[REMOTE_ENDPOINT] = (const struct software_node) { "endpoint0",&sensor->swnodes[3],sensor->cio2_properties };
}

static int connect_supported_devices(void)
{
    struct sensor *sensor;
    int ret = 0;

    sensor = &bridge.sensors[0];

    snprintf(sensor->name,9,"%s","OVTI2680");

    create_fwnode_properties(sensor);
    create_connection_swnodes(sensor);

    ret = software_node_register_nodes(sensor->swnodes);

    return ret;
}

int bridge_init(void)
{   
    struct fwnode_handle *local,*remote;
    int ret;

    ret = software_node_register(&remote_device);
    if (ret < 0) {
        pr_err("Failed to register the CIO2 HID node\n");
        return -EINVAL;
    }

        ret = connect_supported_devices();
        if (ret)
                goto err_unregister_remote;

    local = software_node_fwnode(&bridge.sensors[0].swnodes[LOCAL_ENDPOINT]);
    if (IS_ERR_OR_NULL(local)) {
        pr_err("Failed to get local ep\n");
        ret = PTR_ERR(local);
        goto err_unregister_remote;
    }

    remote = fwnode_graph_get_remote_endpoint(local);
    if (IS_ERR_OR_NULL(remote)) {
        pr_err("Failed to get remote ep\n");
        ret = PTR_ERR(remote);
        goto err_put_local;
    } else {
        pr_info("Successfully fetched remote ep\n");
    }

    fwnode_handle_put(remote);
    fwnode_handle_put(local);
    software_node_unregister_nodes(bridge.sensors[0].swnodes);
    software_node_unregister(&remote_device);

    return 0;

err_put_local:
    fwnode_handle_put(local);
err_unregister_remote:
    software_node_unregister(&remote_device);

    return ret;
}

void bridge_exit(void) { }

module_init(bridge_init);
module_exit(bridge_exit);
MODULE_LICENSE("GPL v2");

问题在于全局变量bridge和数组remote_endpoints。我想将bridge放在堆上(这使得remote_endpoints是全局的)。为此,需要做一些努力,仅涉及将struct property_entry的实例从remote_endpoints数组直接移动到create_fwnode_properties()

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/property.h>
#include <linux/slab.h>

#define LOCAL_DEVICE    0
#define LOCAL_PORT      1
#define LOCAL_ENDPOINT  2
#define REMOTE_PORT     3
#define REMOTE_ENDPOINT 4

struct sensor {
    char name[9];
    struct software_node swnodes[6];
    struct property_entry ep_properties[2];
    struct property_entry cio2_properties[2];
};

struct bridge {
    int n_sensors;
    struct sensor sensors[1];
};

static struct software_node remote_device = { "INT343E" };

static void create_fwnode_properties(struct sensor *sensor)
{
    struct property_entry *cio2_properties = sensor->cio2_properties;
    struct property_entry *ep_properties = sensor->ep_properties;

    ep_properties[0] = PROPERTY_ENTRY_REF("remote-endpoint",&sensor->swnodes[REMOTE_ENDPOINT]);
    cio2_properties[0] = PROPERTY_ENTRY_REF("remote-endpoint",&sensor->swnodes[LOCAL_ENDPOINT]);
}

static void create_connection_swnodes(struct sensor *sensor)
{
    sensor->swnodes[LOCAL_DEVICE] = (const struct software_node) { sensor->name };
    sensor->swnodes[LOCAL_PORT] = (const struct software_node) { "port0",sensor->cio2_properties };
}

static int connect_supported_devices(struct bridge *bridge)
{
    struct sensor *sensor;
    int ret = 0;

    sensor = &bridge->sensors[0];

    snprintf(sensor->name,*remote;
        struct bridge *bridge;
    int ret;

        bridge = kzalloc(sizeof(*bridge),GFP_KERNEL);
        if (!bridge)
                return -ENOMEM;

    ret = software_node_register(&remote_device);
    if (ret < 0) {
        pr_err("Failed to register the CIO2 HID node\n");
        goto err_free_bridge;
    }

        ret = connect_supported_devices(bridge);
        if (ret)
                goto err_unregister_remote;

    local = software_node_fwnode(&bridge->sensors[0].swnodes[LOCAL_ENDPOINT]);
    if (IS_ERR_OR_NULL(local)) {
        pr_err("Failed to get local ep\n");
        ret = PTR_ERR(local);
        goto err_unregister_remote;
    }

    remote = fwnode_graph_get_remote_endpoint(local);
    if (IS_ERR_OR_NULL(remote)) {
        pr_err("Failed to get remote ep\n");
        ret = PTR_ERR(remote);
        goto err_put_local;
    } else {
        pr_info("Successfully fetched remote ep\n");
    }

    fwnode_handle_put(remote);
    fwnode_handle_put(local);
    software_node_unregister_nodes(bridge->sensors[0].swnodes);
    software_node_unregister(&remote_device);
        kfree(bridge);

    return 0;

err_put_local:
    fwnode_handle_put(local);
err_unregister_remote:
    software_node_unregister(&remote_device);
err_free_bridge:
        kfree(bridge);

    return ret;
}

void bridge_exit(void) { }

module_init(bridge_init);
module_exit(bridge_exit);
MODULE_LICENSE("GPL v2");

第二个版本在插入模块时导致内核崩溃:

[  592.722091] BUG: kernel NULL pointer dereference,address: 0000000000000044
[  592.722093] #PF: supervisor read access in kernel mode
[  592.722094] #PF: error_code(0x0000) - not-present page
[  592.722095] PGD 0 P4D 0 
[  592.722097] Oops: 0000 [#1] SMP nopTI
[  592.722098] cpu: 1 PID: 31991 Comm: insmod Tainted: G           OE     5.9.0-rc7-backup #67
[  592.722099] Hardware name: Micro-Star International Co.,Ltd. MS-7B79/X470 GAMING PLUS MAX (MS-7B79),BIOS H.40 11/06/2019
[  592.722103] RIP: 0010:kobject_get+0xe/0x70
[  592.722104] Code: 63 bd e8 15 29 ff ff 4c 89 e7 e8 2d 8b d3 ff 41 5c 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 54 49 89 fc 48 85 ff 74 22 <f6> 47 3c 01 74 23 49 8d 7c 24 38 b8 01 00 00 00 f0 41 0f c1 44 24
[  592.722105] RSP: 0018:ffffb217c3953bd8 EFLAGS: 00010202
[  592.722106] RAX: 0000000000000048 RBX: 0000000000000000 RCX: ffff9b706c192c60
[  592.722107] RDX: ffff9b706c192c58 RSI: ffff9b6ef47b9ea8 RDI: 0000000000000008
[  592.722108] RBP: ffffb217c3953be0 R08: 0000000000000000 R09: ffffb217c3953a10
[  592.722108] R10: ffff9b6f747babff R11: 0000000000000000 R12: 0000000000000008
[  592.722109] R13: 0000000000000000 R14: ffff9b6ef47b9e18 R15: ffff9b6f57d35908
[  592.722110] FS:  00007fcf8761f540(0000) GS:ffff9b706e840000(0000) knlGS:0000000000000000
[  592.722111] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  592.722112] CR2: 0000000000000044 CR3: 00000003adf1c000 CR4: 0000000000350ee0
[  592.722113] Call Trace:
[  592.722116]  software_node_get+0x30/0x50
[  592.722117]  software_node_graph_get_remote_endpoint+0x8a/0xa0
[  592.722119]  fwnode_graph_get_remote_endpoint+0x25/0x30
[  592.722121]  init_module+0x158/0x180 [demo2]
[  592.722123]  ? create_fwnode_properties+0xb0/0xb0 [demo2]
[  592.722126]  do_one_initcall+0x4a/0x1fa
[  592.722128]  ? do_init_module+0x28/0x240
[  592.722130]  ? kmem_cache_alloc_trace+0x17e/0x2f0
[  592.722131]  do_init_module+0x62/0x240
[  592.722133]  load_module+0x280c/0x2b40
[  592.722136]  __do_sys_finit_module+0xbe/0x120
[  592.722137]  ? __do_sys_finit_module+0xbe/0x120
[  592.722139]  __x64_sys_finit_module+0x1a/0x20
[  592.722141]  do_syscall_64+0x38/0x90
[  592.722144]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  592.722145] RIP: 0033:0x7fcf8776489d
[  592.722146] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d c3 f5 0c 00 f7 d8 64 89 01 48
[  592.722147] RSP: 002b:00007ffe2f446cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[  592.722148] RAX: ffffffffffffffda RBX: 00005619943d8780 RCX: 00007fcf8776489d
[  592.722149] RDX: 0000000000000000 RSI: 0000561992936358 RDI: 0000000000000003
[  592.722149] RBP: 0000000000000000 R08: 0000000000000000 R09: 00007fcf87838260
[  592.722151] R10: 0000000000000003 R11: 0000000000000246 R12: 0000561992936358
[  592.722151] R13: 0000000000000000 R14: 00005619943daff0 R15: 0000000000000000
[  592.722153] Modules linked in: demo2(OE+) binfmt_misc xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c nf_tables nfnetlink ip6table_filter ip6_tables iptable_filter bridge stp llc snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_hda_intel nls_iso8859_1 snd_intel_dspcfg snd_hda_codec uvcvideo snd_usb_audio snd_hda_core snd_usbmidi_lib videobuf2_vmalloc edac_mce_amd snd_hwdep videobuf2_memops videobuf2_v4l2 kvm_amd snd_seq_midi videobuf2_common snd_seq_midi_event kvm snd_rawmidi snd_seq videodev irqbypass snd_pcm snd_seq_device mc joydev input_leds rapl snd_timer wmi_bmof efi_pstore snd k10temp soundcore ccp mac_hid sch_fq_codel nct6775 hwmon_vid vhost_net vhost tap vhost_iotlb parport_pc ppdev lp parport ip_tables x_tables autofs4 pci_stub uas usb_storage hid_generic usbhid hid amdgpu gpu_sched i2c_algo_bit ttm
[  592.722172]  crct10dif_pclmul crc32_pclmul ghash_clmulni_intel drm_kms_helper aesni_intel syscopyarea sysfillrect crypto_simd sysimgblt cryptd fb_sys_fops glue_helper drm i2c_piix4 r8169 ahci realtek libahci wmi gpio_amdpt gpio_generic [last unloaded: demo]
[  592.722179] CR2: 0000000000000044
[  592.722180] ---[ end trace 8ad8cd8d558d27ca ]---
[  592.829848] RIP: 0010:kobject_get+0xe/0x70
[  592.829849] Code: 63 bd e8 15 29 ff ff 4c 89 e7 e8 2d 8b d3 ff 41 5c 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 54 49 89 fc 48 85 ff 74 22 <f6> 47 3c 01 74 23 49 8d 7c 24 38 b8 01 00 00 00 f0 41 0f c1 44 24
[  592.829850] RSP: 0018:ffffb217c3953bd8 EFLAGS: 00010202
[  592.829851] RAX: 0000000000000048 RBX: 0000000000000000 RCX: ffff9b706c192c60
[  592.829852] RDX: ffff9b706c192c58 RSI: ffff9b6ef47b9ea8 RDI: 0000000000000008
[  592.829852] RBP: ffffb217c3953be0 R08: 0000000000000000 R09: ffffb217c3953a10
[  592.829853] R10: ffff9b6f747babff R11: 0000000000000000 R12: 0000000000000008
[  592.829853] R13: 0000000000000000 R14: ffff9b6ef47b9e18 R15: ffff9b6f57d35908
[  592.829854] FS:  00007fcf8761f540(0000) GS:ffff9b706e840000(0000) knlGS:0000000000000000
[  592.829855] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  592.829855] CR2: 0000000000000044 CR3: 00000003adf1c000 CR4: 0000000000350ee0

总而言之,“本地”端点software_node应该具有“远程端点”属性,该属性包含指向“远程”端点software_node的指针...但是显然现在是空指针。 / p>

据我所知,只有两个真正的区别;

  1. 指针是指向堆上某个位置的指针,而不是全局变量空间中的某个指针。我不知道为什么这很重要,但也许确实如此。
  2. 我不再声明端点const,因为我无法在分配内存的同时创建端点。据我所知,限定词主要是对编译器的承诺,而不是会影响实际行为的东西……但也许我对此有误。

我想的问题是,“为什么第二版代码不起作用?”

编辑:奖励代码的第3版,尽管在功能上与第2版相同,但起作用,只是将create_fwnode_properties()create_connection_swnodes()中的所有部分都移到了bridge_init()

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/property.h>
#include <linux/slab.h>

#define LOCAL_DEVICE    0
#define LOCAL_PORT      1
#define LOCAL_ENDPOINT  2
#define REMOTE_PORT     3
#define REMOTE_ENDPOINT 4

struct sensor {
    char name[9];
    struct software_node swnodes[6];
    struct property_entry ep_properties[2];
    struct property_entry cio2_properties[2];
};

struct bridge {
    int n_sensors;
    struct sensor sensors[1];
};

static struct software_node remote_device = { "INT343E" };

int bridge_init(void)
{   
    struct fwnode_handle *local,*remote;
        struct bridge *bridge;
        struct sensor *sensor;
    int ret;

        bridge = kzalloc(sizeof(*bridge),GFP_KERNEL);
        if (!bridge)
                return -ENOMEM;

    ret = software_node_register(&remote_device);
    if (ret < 0) {
        pr_err("Failed to register the CIO2 HID node\n");
        goto err_free_bridge;
    }

    sensor = &bridge->sensors[0];
    snprintf(sensor->name,"OVTI2680");

    struct property_entry *cio2_properties = sensor->cio2_properties;
    struct property_entry *ep_properties = sensor->ep_properties;
    ep_properties[0] = PROPERTY_ENTRY_REF("remote-endpoint",&sensor->swnodes[LOCAL_ENDPOINT]);

    sensor->swnodes[LOCAL_DEVICE] = (const struct software_node) { sensor->name };
    sensor->swnodes[LOCAL_PORT] = (const struct software_node) { "port0",sensor->cio2_properties };

        ret = software_node_register_nodes(sensor->swnodes);
        if (ret)
                goto err_unregister_remote;

    local = software_node_fwnode(&bridge->sensors[0].swnodes[LOCAL_ENDPOINT]);
    if (IS_ERR_OR_NULL(local)) {
        pr_err("Failed to get local ep\n");
        ret = PTR_ERR(local);
        goto err_unregister_remote;
    }

    remote = fwnode_graph_get_remote_endpoint(local);
    if (IS_ERR_OR_NULL(remote)) {
        pr_err("Failed to get remote ep\n");
        ret = PTR_ERR(remote);
        goto err_put_local;
    } else {
        pr_info("Successfully fetched remote ep\n");
    }

    fwnode_handle_put(remote);
    fwnode_handle_put(local);
    software_node_unregister_nodes(bridge->sensors[0].swnodes);
    software_node_unregister(&remote_device);
        kfree(bridge);

    return 0;

err_put_local:
    fwnode_handle_put(local);
err_unregister_remote:
    software_node_unregister(&remote_device);
err_free_bridge:
        kfree(bridge);

    return ret;
}

void bridge_exit(void) { }

module_init(bridge_init);
module_exit(bridge_exit);
MODULE_LICENSE("GPL v2");

这表明问题是由于变量范围引起的,但是我不是在使用堆栈变量,而是希望它在其块之外没有生命。

解决方法

变量范围,这是因为PROPERTY_ENTRY_REF()确实定义了一个新变量:

#define PROPERTY_ENTRY_REF(_name_,_ref_,...)              \
(struct property_entry) {                       \
    .name = _name_,\
    .length = sizeof(struct software_node_ref_args),\
    .type = DEV_PROP_REF,\
    { .pointer = &(const struct software_node_ref_args) {       \
        .node = _ref_,\
        .nargs = ARRAY_SIZE(((u64[]){ 0,##__VA_ARGS__ })) - 1,\
        .args = { __VA_ARGS__ },\
    } },\
}

那里的.pointer成员被初始化为新const struct software_node_ref_args)的地址。那是一个堆栈变量,它的寿命仅限于功能块。

编辑:解决方法;显式声明一个software_node_ref_args数组,然后将其传递给PROPERTY_ENTRY_REF_ARRAY

struct sensor {
    struct property_entry ep_properties[1];
    struct software_node_ref_args local_ref[1];
    struct software_node swnodes[6];
};

sensor->local_ref[0] = (struct software_node_ref_args){
        .node = &sensor->swnodes[SWNODE_CIO2_ENDPOINT]
        };

sensor->ep_properties[0] = PROPERTY_ENTRY_REF_ARRAY("remote-endpoint",sensor->local_ref);