解析器Bing浏览器

问题描述

我为Google创建了网络解析器,效果很好。在这里,我得到了前5个结果,并在程序中显示一个用户。附加代码

控制器

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;

namespace SearchDemos.Controllers
{
    public class SearchInfo
    {
        public string Title { get; set; }
        public string Link { get; set; }

        public string BTitle { get; set; }
        public string BLink { get; set; }
    }

    [ApiController]
    [Route("[controller]")]
    public class SearchController : ControllerBase
    {
        [HttpGet("Google")]
        public IList<SearchInfo> DoGoogleSearch(string q)
        {
            var html = @"https://www.google.com/search?q="+q ;

            HtmlWeb web = new HtmlWeb();
            //  accept-language : RU or EN 
            var htmlDoc = web.Load(html);


            var rows = htmlDoc.DocumentNode.SelectNodes("//*[@class='r']").Take(5);

            var result = new List<SearchInfo>();
            foreach (HtmlNode row in rows)
            {
                var nodeRef = row.SelectSingleNode("./a");

                var si = new SearchInfo
                {

                    Link = nodeRef.GetAttributeValue("href",string.Empty),Title = nodeRef.InnerText.Trim()
                };

                result.Add(si);
            }

            return result;
        }

我的HTML页面

@page

<!DOCTYPE html>
<html>
<head>
    <Meta charset="utf-8" />
    <title>Search page</title>
    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css"
          integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
    <link href="~/css/Style.css" rel="stylesheet" />
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
</head>
<body>
    <section class="main">
        <!-- Another variation with a button -->
        <div class="input-group">
            <input type="text" class="form-control" placeholder="Search Google & Bing" id="txtQuery">
            <div class="input-group-append">
                <button class="btn btn-secondary" type="button" id="btnSearch">
                    <i class="fa fa-search">Search</i>
                </button>
            </div>
        </div>

        <div id="searchResults">

        </div>

    </section>
        <script>
            $(function () {

                $("#btnSearch").click(function () {

                    $("#searchResults").html("");

                    $.ajax({
                        type: "GET",url: '@Url.Action("Google","Search")',data: {
                            q: $("#txtQuery").val()
                        },success: function (data) {
                            for (var i in data) {
                                var item = data[i];
                                console.log(item.title);
                                var p = $("<p></p>").text(item.title);
                                $("#searchResults").append(p);

                               /* console.log(item.append);
                                var h = $(" < p ></p >").text(item.a);
                                $("#searchResults").url(h);*/

                            }
                            console.log("data received");
                        },error: function () {
                            alert("Server error!");
                        }
                    });
                });
            });
        </script>

</body>
</html>

这部分效果很好。 之后,我尝试为bing浏览器创建解析器,但该解析器不起作用。但是我在示例中使用过。发生什么事了?

这里我在一行中出现错误(“ System.ArgumentNullException:'值不能为空。(参数'source')' “)。但是我从UI中获取数据。我的q!= null

var rows = htmlDoc.DocumentNode.SelectNodes("//*[@class='b_algo']").Take(5);
      

我的控制器

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;

namespace SearchDemos.Controllers
{
    public class SearchInfo
    {
        public string Title { get; set; }
        public string Link { get; set; }

        public string BTitle { get; set; }
        public string BLink { get; set; }
    }

    [ApiController]
    [Route("[controller]")]
    public class SearchController : ControllerBase
    {

        [HttpGet("Bing")]
        public IList<SearchInfo> dobingSearch(string q)
        {
            var html = @"https://www.bing.com/search?q=" + q;

            HtmlWeb web = new HtmlWeb();

            var htmlDoc = web.Load(html);


            var rows = htmlDoc.DocumentNode.SelectNodes("//*[@class='b_algo']").Take(5);

            var result = new List<SearchInfo>();
            foreach (HtmlNode row in rows)
            {
                var nodeRef = row.SelectSingleNode("./a");                //here Could be  var nodeRef = row.SelectSingleNode("//a");      

                var si = new SearchInfo
                {

                    BLink = nodeRef.GetAttributeValue("href",BTitle = nodeRef.InnerText.Trim()
                };

                result.Add(si);
            }

            return result;
        }
    }
}

和HTML

@page

<!DOCTYPE html>
<html>
<head>
    <Meta charset="utf-8" />
    <title>Search page</title>
    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css"
          integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
    <link href="~/css/Style.css" rel="stylesheet" />
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
</head>
<body>
    <section class="main">
        <!-- Another variation with a button -->
        <div class="input-group">
            <input type="text" class="form-control" placeholder="Search Google & Bing" id="txtQuery">
            <div class="input-group-append">
                <button class="btn btn-secondary" type="button" id="btnSearch">
                    <i class="fa fa-search">Search</i>
                </button>
            </div>
        </div>

        <div id="searchResults">

        </div>

    </section>

    <script>
        $(function () {

            $("#btnSearch").click(function () {

                $("#searchResults").html("");

                $.ajax({
                    type: "GET",url: '@Url.Action("Bing",data: {
                        q: $("#txtQuery").val()
                    },success: function (data) {
                        for (var i in data) {
                            var item = data[i];
                            console.log(item.title);
                            var p = $("<p></p>").text(item.title);
                            $("#searchResults").append(p);

                           /* console.log(item.append);
                            var h = $(" < p ></p >").text(item.a);
                            $("#searchResults").url(h);*/

                        }
                        console.log("data received");
                    },error: function () {
                        alert("Server error!");
                    }
                });

            });

        });
    </script>

</body>
</html>

解决方法

好的,最后我用一个快速的Google找到了解决方案。 https://blogs.msmvps.com/bsonnino/2019/06/16/parsing-html-data-with-c/

仍然存在错误,因为for循环搜索不正确,但至少会返回结果。

您必须进行修改才能在API控制器中使用。

请不要在生产环境或任何直播环境中使用-最终,您的站点/服务器将被bing或google阻止,从而使代码无法使用。请改为查看API代码。另外,如果google或microsoft修改了那里的代码/ css等,那么这也会中断

public class SearchController : Controller
    {

        public class SearchInfo
        {
            public string Title { get; set; }
            public string Link { get; set; }

            public string BTitle { get; set; }
            public string BLink { get; set; }
        }

        public async System.Threading.Tasks.Task<ActionResult> DoBingSearchAsync(string q)
        {

            var html = @"https://www.bing.com/search?q=" + q;

            HtmlWeb web = new HtmlWeb();
            web.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";

            var htmlDoc = await web.LoadFromWebAsync(html);




            var rows = htmlDoc.DocumentNode.SelectNodes("//*[@class='b_algo']").Take(5);

            var result = new List<SearchInfo>();
            foreach (HtmlNode row in rows)
            {
                var nodeRef = row.SelectSingleNode("./a");                //here could be  var nodeRef = row.SelectSingleNode("//a");      

                var si = new SearchInfo
                {

                    BLink = nodeRef.GetAttributeValue("href",string.Empty),BTitle = nodeRef.InnerText.Trim()
                };

                result.Add(si);
            }

            return View(result);
        }
    }