如何在标签块libxml2中获取文本？

问题描述

我有htmlDocPtr htmlfile = htmlParseFile(localfileurl,NULL)。

本地html 文件

 <!DOCTYPE html>
<html>
<head>
<Meta></Meta>
<title>Page Title</title>
</head>
<body>

<h1>This is a heading</h1>
<p>This is a paragraph.</p>

</body>
</html>

我最终希望将页面标题存储在char variable

中

我尝试过

htmlNodePtr node = xmlDocgetRootElement(htmlfile);
// title is on the following
node = node->children->next->children->next->next->next;

我现在如何获得title的值

解决方法

遍历文档，查找名称为“ title”的元素，并获取其内容：

static void printTitle(xmlDoc *doc,xmlNode * a_node)
{
    xmlNode *cur_node = NULL;

    for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
        if (cur_node->type == XML_ELEMENT_NODE && !xmlStrcmp(cur_node->name,(const xmlChar *)"title")) {
            xmlChar* content;
            content = xmlNodeListGetString(doc,cur_node->xmlChildrenNode,1);
            printf("node type: Element,name: %s,content: %s\n",cur_node->name,content);
            xmlFree(content);
        }

        printTitle(doc,cur_node->children);
    }
}


int main(int argc,char **argv)
{
    xmlDoc *doc = NULL;
    xmlNode *root_element = NULL;

    if (argc != 2)
        return(1);

    LIBXML_TEST_VERSION

    doc = xmlReadFile(argv[1],NULL,0);

    if (doc == NULL) {
        printf("error: could not parse file %s\n",argv[1]);
    }

    root_element = xmlDocGetRootElement(doc);

    printTitle(doc,root_element);

    xmlFreeDoc(doc);

    xmlCleanupParser();

    return 0;
}

（请参见Retrieving Element Content）

c html libxml2 libxml2