尝试获取MongoDB字段中每个单词的计数是否对MapReduce有用？

问题描述

| 我有一个带有很多正文的收藏集。例如：

posts = { { id: 0,body: \"foo bar baz\",otherstuff: {...} },{ id: 1,body: \"baz bar oof\",{ id: 2,body: \"baz foo oof\",otherstuff: {...} }
        };

我想弄清楚如何遍历集合中的每个文档，并在每个帖子正文中对每个单词进行计数。

post_word_frequency = { { foo: 2 },{ bar: 2 },{ baz: 3 },{ oof: 2 },};

我从未使用过MapReduce，对mongo还是很新鲜，但是我正在看http://cookbook.mongodb.org/patterns/unique_items_map_reduce/上的文档

map = function() {
    words = this.body.split(\' \');
    for (i in words) {
       emit({ words[i] },{count: 1});   
    }
};

reduce = function(key,values) {
     var count = 0;
     values.forEach(function(v) {
          count += v[\'count\'];
     });
     return {count: count};
};

db.posts.mapReduce(map,reduce,{out: post_word_frequency});

更加困难的是，我正在node.js中进行操作（使用node-mongo-native，但如果有更简单的方法，我愿意切换为执行reduce查询）。

    var db = new Db(\'mydb\',new Server(\'localhost\',27017,{}),{native_parser:false});
    db.open(function(err,db){
            db.collection(\'posts\',function(err,col) {
                db.col.mapReduce(map,{out: post_word_frequency});
            });
    });

到目前为止，我在该节点告诉我ReferenceError: post_word_frequency is not defined时遇到了困难（我尝试在shell中创建它，但这仍然无济于事）。那么有人用node.js做mapreduce吗？这是map reduce的错误用法吗？也许是另一种方式呢？（也许只是循环并向上插入另一个集合？）感谢您的任何反馈和建议！ :) 编辑下面的Ryanos是正确的（谢谢！）基于MongoDB的解决方案缺少的一件事是找到集合并将其转换为数组。

 db.open(function(err,db){
    db.collection(\'posts\',col) {
            col.find({}).toArray(function(err,posts){    // this line creates the \'posts\' array as needed by the MAPreduce functions.
                    var words= _.flatten(_.map(posts,function(val) {

解决方法

{out: post_word_frequency}有一个错误，也许您想要{out: \"post_word_frequency\"}，但没有此out变量也可以工作。使用underscore可以轻松完成。

/*
  [{\"word\": \"foo\",\"count\": 1},...]
*/
var words = _.flatten(_.map(posts,function(val) {
    return _.map(val.body.split(\" \"),function(val) {
        return {\"word\": val,\"count\": 1};
    });
}));

/*
  {
    \"foo\": n,...
  }
*/
var count = _.reduce(words,function(memo,val) {
    if (_.isNaN(++memo[val.word])) {
        memo[val.word] = 1;
    }
    return memo;
},{});

现场例子 _.reduce，_.map，_.isNaN，_.flatten

mongodb 单词单词字段字段尝试尝试尝试是否是否每个获取获取获取计数计数