问题描述
我有一个函数在页面上抓取一些数据,并将其存储在变量“结果”中。有几页,因此该函数包含在“ for”循环中。
const PageScraping = async(page,query) =>{
await page.goto(query,{ waitUntil: "networkidle2" });
const result = await page.evaluate(() =>{
return(
[...document.querySelectorAll('.card--result__body')].map((o) => ({
type:
o.querySelector('.card__title-link') &&
o.querySelector('.card__title-link').innerText,price:
o.querySelector('.card--result__price > span > .sr-only') &&
parseInt(o.querySelector('.card--result__price > span > .sr-only').innerText.replace('€','')),rooms:
o.querySelector('.card__@R_50_404[email protected]__@R_50_4045@ions > .card__@R_50_404[email protected]__@R_50_404[email protected]__@R_50_4045@ion--property > .abbreviation > .sr-only') &&
parseInt(o.querySelector('.card__@R_50_404[email protected]__@R_50_4045@ions > .card__@R_50_404[email protected]__@R_50_404[email protected]__@R_50_4045@ion--property > .abbreviation > .sr-only').innerText.replace(' chambres',surface:
o.querySelector('.card__@R_50_404[email protected]__@R_50_404[email protected]__@R_50_4045@ion--property') &&
parseInt(o.querySelector('.card__@R_50_404[email protected]__@R_50_404[email protected]__@R_50_4045@ion--property').innerText.split(' ')[4]),postcode:
o.querySelector('.card__@R_50_404[email protected]__@R_50_404[email protected]__@R_50_4045@ion--locality') &&
o.querySelector('.card__@R_50_404[email protected]__@R_50_404[email protected]__@R_50_4045@ion--locality').innerText.split(' ')[0],url:
o.querySelector('.card__title.card--result__title > a') &&
o.querySelector('.card__title.card--result__title > a').href,}))
);
});
return result;
}
之后,我需要将结果存储在另一个包含所有数据的变量中(循环)。在那里,我正在使用“ push”,但出现错误:
无法读取未定义的属性“ push”
let all_results;
try{
let query = 'myurl'
await page.goto(query,{ waitUntil: "networkidle2" });
let max_pages = parseInt(await page.$eval('.pagination > li:nth-last-child(2) > a > .button__label',o => o.innerText));
for(let i = 1; i < max_pages+1; i++){
console.log('page ',i,' of ',max_pages);
try{
const result = await PageScraping(page,query);
query = `urlpage${i+1}`;
all_results.push(result);
console.log(all_results);
} catch(e){
console.log('ERROR: Could not reach landing page',e.message);
}
}
} catch(error) {
console.log('ERROR: Could not reach landing page',error.message);
} finally {
try{
await browser.close();
}catch(browser_error){
console.log('ERROR cant close browser??',browser_error.message)
}
}
}
解决方法
以下代码正在运行。按照建议,我将“ all_results”声明为数组。
let all_results = [];
try{
let query = 'myurl'
await page.goto(query,{ waitUntil: "networkidle2" });
let max_pages = parseInt(await page.$eval('.pagination > li:nth-last-child(2) > a > .button__label',o => o.innerText));
for(let i = 1; i < max_pages+1; i++){
console.log('page ',i,' of ',max_pages);
try{
const result = await PageScraping(page,query);
query = `urlpage${i+1}`;
all_results.push(result);
console.log(all_results);
} catch(e){
console.log('ERROR: could not reach landing page',e.message);
}
}
} catch(error) {
console.log('ERROR: could not reach landing page',error.message);
} finally {
try{
await browser.close();
}catch(browser_error){
console.log('ERROR cant close browser??',browser_error.message)
}
}
}