问题描述
||
我正在尝试从RSS提要中检索数据(此部分成功),然后使用描述,标题和标题使用YQL Placemaker开放数据表获取地理信息(纬度,经度,woeId)。
所有这些都需要作为JSON输出。
这里是在YQL控制台中对其进行测试的固定链接。
任何想法,我的xml代码有什么问题,或者我应该尝试什么?
<meta>
<author>Yahoo! Inc.</author>
<documentationURL>http://i-magine.mobi/</documentationURL>
<sampleQuery>select * from {table} where section=\"topstories\" and description matches \".*jpg.*\"</sampleQuery>
<description>Searches Yahoo.news RSS</description>
</meta>
<bindings>
<select itemPath=\"\" produces=\"XML\">
<urls>
<url>http://rss.news.yahoo.com/rss/topstories/{section}
</url>
</urls>
<inputs>
<key id=\'section\' type=\"xs:string\" paramType=\"path\" required=\"true\" />
</inputs>
<execute><![CDATA[
default xml namespace = \"http://where.yahooapis.com/v1/schema.rng\";
// http://www.json.org/json2.js
y.include(\'http://www.i-magine.mobi/json2.js\');
rssStorySection = [];
rssStoryNumber = [];
rssStoryTitle = [];
rssStorySummary = [];
rssImageCaption = [];
rssStoryUrl = [];
rssImageUrl = [];
rssGeoText = [];
// var content = \'They followed him to deepest Africa after Brussels and Tokyo and found him there in Timbuktu\';
var rssQuery = \'select * from rss where url = \' + \"\'\" + \'http://rss.news.yahoo.com/rss/\' + section + \"\'\" + \' and description matches \".*jpg.*\" limit 30\';
res1 = y.query(rssQuery);
data1 = res1.results;
// var geoQuery = \'SELECT * FROM geo.placemaker WHERE documentContent =\' + \'\"\' + content + \'\"\' + \'AND documentType=\' + \'\"\' + \'text/plain\' + \'\"\';
// res2 = y.query(geoQuery);
// data2 = res2.results;
for (var c=0;c<data1.item.length;c++)
{
var story = data1.item[c];
var storyTitleText0 = story.title + \"\";
var storyUrl = story.link + \"\";
var description = story.description;
var storyTitleText = storyTitleText0.replace(/\\n/ig,\"\") + \"\";
var imageUrl = description.match(/http:\\/\\/d.*?jpg/i) + \"\";
var imageCaptionText0 = description.match(/alt=\"([^ ]).*border/) + \"\";
var imageCaptionText1 = imageCaptionText0.replace(/alt=\"/ig,\"\") + \"\";
var imageCaptionText = imageCaptionText1.replace(/\" border.*/ig,\"\") + \"\";
var storySummaryText = description.replace(/<[^>]*>([\\s]?)*<[^>]*>/g,\"\") + \"\";
var storySection0 = description.match(/http[^ ].*\\*/i) + \"\";
var storySection1 = storySection0.replace(/\\/\\*/ig,\"\") + \"\";
var storySection = storySection1.replace(/http:\\/\\/us.rd.yahoo.com\\/dailynews\\/rss\\//ig,\"\") + \"\";
var geoString = (imageCaptionText + \" \" + storyTitleText + \" \" + storySummaryText);
rssStorySection.push(storySection);
rssStoryTitle.push(storyTitleText);
rssStorySummary.push(storySummaryText);
rssImageCaption.push(imageCaptionText);
rssStoryUrl.push(storyUrl);
rssImageUrl.push(imageUrl);
rssGeoText.push(geoString);
rssStoryNumber.push(c);
var content = geoString;
var geoQuery = \'SELECT * FROM geo.placemaker WHERE documentContent =\' + \'\"\' + content + \'\"\' + \'AND documentType=\' + \'\"\' + \'text/plain\' + \'\"\';
var res2 = y.query(geoQuery);
var data2 = res2.results;
}
var d = data1;
var e = data2;
response.object = <stories>
<c>{section}</c>
<d>{d}</d>
<e>{e}</e>
</stories>;
]]></execute>
</select>
</bindings>
</table>
解决方法
整理过的表格(从“ RSS + Placemaker结果”的意义上讲,也是“有效的”)看起来像这样:
<?xml version=\"1.0\" encoding=\"UTF-8\" ?>
<table xmlns=\"http://query.yahooapis.com/v1/schema/table.xsd\">
<meta>
<author>Peter Cowburn</author>
<documentationURL>http://stackoverflow.com/questions/6168564/creating-a-yql-opentable-to-combine-rss-data-with-placemaker-map-info</documentationURL>
<sampleQuery>select * from {table} where section=\'topstories\'</sampleQuery>
<description>Searches Yahoo! News RSS and injects Placemaker Places</description>
</meta>
<bindings>
<select itemPath=\"stories.story\" produces=\"XML\">
<urls>
<url>
http://rss.news.yahoo.com/rss/{section}
</url>
</urls>
<inputs>
<key id=\"section\" type=\"xs:string\" paramType=\"path\" required=\"true\" />
</inputs>
<execute><![CDATA[
// Fetch top 30 feed items with jpg images
var feed = y.query(
\'select * from rss where url=@url and description matches \".*jpg.*\" limit 30\',{url: request.url}
).results;
// Build geo queries
var placeQuery = \'select * from geo.placemaker where documentContent=@text and documentType=\"text/plain\"\';
var placeQueries = [];
var title,description,caption,summary,content;
for each (result in feed.item) {
title = result.title.text().toString();
description = y.tidy(result.description.toString()).body.p;
caption = description.a.img.@alt.toString();
summary = description..*.text().toString();
content = caption + \" \" + title + \" \" + summary;
placeQueries.push({
query: y.query(placeQuery,{text: content}),item: result,results: null
});
}
// Execute all queries
var where = new Namespace(\'http://wherein.yahooapis.com/v1/schema\');
var matches,match,places = [];
for (var q in placeQueries) {
matches = placeQueries[q].query.results.matches.match;
placeQueries[q].results = matches..where::place;
}
// Build response object
var stories = <stories/>;
for each (q in placeQueries) {
stories.node += <story>
{q.item}
{q.results}
</story>;
}
response.object = stories;
]]></execute>
</select>
</bindings>
</table>
您可以通过在查询中在线指向表(它可能不会永远存在!)来使用它:
use \'https://raw.github.com/salathe/yql-tables/so-6168564/yahoo/newswithplaces.xml\'
as rssplaces;
select * from rssplaces where section=\'topstories\';
(在YQL控制台中尝试)
该表使用了<execute>
块中可用的一些功能,例如E4X,查询参数和并行查询,这些功能都使生活更轻松,但乍一看可能有些陌生。
附言上面的内容是按原样提供的,我不会在这方面向后屈服于“支持”问题。它主要是为了让您动起来,它是一种可能对您有用的方法的介绍。