问题描述
我想抓取网站的数据,所以我尝试使用 String columns[] = {"Id","Name","Personal Number","Address","Phone Number","Contact Email","Currency","Group"};
npm 包
选择器在 chrome 开发工具中运行良好
cheerio
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
我从中抓取数据的网站网址是:https://www.commodityonline.com/mandiprices/
我从 const request = require("request-promise"),cheerio = require("cheerio"),fs = require("fs"),json2csv = require("json2csv").Parser;
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
let mandiData = [];
const response = await request({
uri: url,headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9","accept-encoding": "gzip,deflate,br","accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,la;q=0.6",},gzip: true,});
let $ = cheerio.load(response);
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
})();
youtube 频道 this video
了解到这种抓取方法
请求头有没有问题,
我是网络抓取的新手,所以我不明白我做错了什么
解决方法
在 http 标头中,您已指定 "accept-encoding": "gzip,deflate,br"
,这意味着您希望将请求结果压缩为 gzip。 Cheerio 需要文本,因此无法解析响应数据。
只需删除该标题即可使其工作:
const request = require("request-promise"),cheerio = require("cheerio");
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
const response = await request({
uri: url,headers: {
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9","accept-language": "en-IN,en-GB;q=0.9,en-US;q=0.8,en;q=0.7,la;q=0.6",}
});
let $ = cheerio.load(response);
let commodity_array = $(
"#tdm_base_scroll > div > div.dt_ta_09 > div.dt_ta_10"
)
.text()
.split("\n");
console.log(commodity_array);
})();
请注意,request 已被弃用。一种不错的选择是 axios :
const axios = require("axios"),cheerio = require("cheerio");
const url = "https://www.commodityonline.com/mandiprices/";
(async () => {
const response = await axios.get(url);
let $ = cheerio.load(response.data);
data = []
$("#tdm_base_scroll > div > div.dt_ta_09").each(function (i,elm) {
var price = $("div.dt_ta_14",elm)
data.push({
commodity: $("div.dt_ta_10",elm).text().trim(),marketCenter: $("div.dt_ta_11",variety: $("div.dt_ta_12",arrrivals: $("div.dt_ta_13",modalPrice: $(price[0]).text().trim(),minMaxPrice: $(price[1]).text().trim()
})
});
console.log(data);
})();