Reddit feed解析器与Google App脚本

问题描述

这是我的Google脚本Code.gs:

 /* Reddit Scraper written by Amit Agarwal */



var REDDIT = "HomeImprovement";

function run() {

   deleteTriggers_();

  /* Fetch Reddit posts every 5 minutes to avoid hitting
     the reddit and Google Script quotas */
  ScriptApp.newTrigger("scrapReddit")
           .timeBased().everyMinutes(5).create();
}


function scrapReddit() {

  // Process 20 Reddit posts in a batch
  var url = "http://www.reddit.com/r/"
            + REDDIT + "/new.xml?limit=100" + getLastID_();

  // Reddit API returns the results in XML format
  var response = UrlFetchApp.fetch(url).getContentText();
  var doc = XmlService.parse(response);
  var root = doc.getRootElement();

  var entries = root.getChildren("feed")[0].getChildren("entry");


  var data = new Array();

  for (var i=0; i<entries.length; i++) {

    /* Extract post date,title,description and link from Reddit */

    var date = entries[i].getChild('updated').getText();
    var title = entries[i].getChild('title').getText();
    var desc = entries[i].getChild('content').getText();
    var link = entries[i].getChild('link').getText();

    data[i] = new Array(date,desc,link);
  }

  if (data.length == 0) {
    /* There's no data so stop the background trigger */
    deleteTriggers_();
  } else {
    writeData_(data);
  }
}


/* Write the scrapped data in a batch to the
   Google Spreadsheet since this is more efficient */
function writeData_(data) {

  if (data.length === 0) {
    return;
  }

  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getSheets()[0];
  var row = sheet.getLastRow();
  var col = sheet.getLastColumn();

  var range = sheet.getRange(row+1,1,data.length,4);
  try {
    range.setValues(data);
  } catch (e) {
    Logger.log(e.toString());
  }
}

/* Use the ID of the last processed post from Reddit as token */
function getLastID_() {

  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = ss.getSheets()[0];
  var row = sheet.getLastRow();
  var col = sheet.getLastColumn();

  var url = sheet.getRange(row,col).getValue().toString();
  var pattern = /.*comments\/([^\/]*).*/;
  var id = url.match(pattern);

  return id ? "&after=t3_" + id[1] : "";

}

/* Posts Extracted,Delete the Triggers */
function deleteTriggers_() {
  var triggers = ScriptApp.getProjectTriggers();
  for (var i=0; i<triggers.length; i++) {
    ScriptApp.deleteTrigger(triggers[i]);
  }
}

这是我运行脚本时生成的错误:

TypeError:无法读取未定义的属性“ getChildren”(第29行,文件“代码”)

此处是网址Feed:https://www.reddit.com/r/HomeImprovement/new.xml?limit=100

有帮助吗?

谢谢

更多文字更多文字更多文字更多文字更多文字

解决方法

var entries = root.getChildren("feed")[0].getChildren("entry");

引发错误,因为<feed> 是文档的根元素,并且内部不包含另一个<feed>元素,因此root.getChildren("feed")[0]表达式返回{{ 1}},然后您就可以运行

undefined

此外,您需要包括namespace才能获得所有var entries = undefined.getChildren("entry"); 元素。

<entry>

相关问答

依赖报错 idea导入项目后依赖报错,解决方案:https://blog....
错误1:代码生成器依赖和mybatis依赖冲突 启动项目时报错如下...
错误1:gradle项目控制台输出为乱码 # 解决方案:https://bl...
错误还原:在查询的过程中,传入的workType为0时,该条件不起...
报错如下,gcc版本太低 ^ server.c:5346:31: 错误:‘struct...