测试目的:综测前面的聚合函数。
测试数据:
> db.tianyc_test3.find()
{ "_id" : ObjectId("51258e47ff13bc6383473137"), "name" : "xtt", "dic" : 1 }{ "_id" : ObjectId("51258e49ff13bc6383473138"), "name" : "xtt", "dic" : 2 }{ "_id" : ObjectId("51258e4bff13bc6383473139"), "name" : "xtt", "dic" : 3 }{ "_id" : ObjectId("51258e4eff13bc638347313a"), "name" : "xtt", "dic" : 4 }{ "_id" : ObjectId("51258e50ff13bc638347313b"), "name" : "xtt", "dic" : 5 }{ "_id" : ObjectId("51258e5aff13bc638347313c"), "name" : "yct", "dic" : 1 }{ "_id" : ObjectId("51258e5cff13bc638347313d"), "name" : "yct", "dic" : 2 }{ "_id" : ObjectId("51258e62ff13bc638347313e"), "name" : "neu", "dic" : 1 }{ "_id" : ObjectId("51258e64ff13bc638347313f"), "name" : "neu", "dic" : 2 }{ "_id" : ObjectId("51258e65ff13bc6383473140"), "name" : "neu", "dic" : 3 }{ "_id" : ObjectId("51258e68ff13bc6383473141"), "name" : "neu", "dic" : 4 }>实现如下SQL:
select name, count(*), sum(dic), avg(dic), max(dic), min(dic)
from tianyc_test3where dic>1group by namehaving count(*)>2这个SQL中的5个聚合函数都使用了dic列。实际应用中,可以换成不同的列。
1. 编写map
> var m = function(){
... if (this.dic>1){ #where条件对应到这里,也可以对应到执行mapreduce时的query参数中。... key=this.name;... value={dic_count:this.dic, dic_sum:this.dic, dic_avg:this.dic, dic_max:this.dic, dic_min:this.dic}#实际应用中,可以换用不同的列。... emit(key, value);... }... }2. 编写reduce
> var r = function (key,values) {
... var result={dic_count:values.length, dic_sum:0, dic_avg:0, dic_max:values[0].dic_max, dic_min:values[0].dic_min}#返回结果为内嵌集合,设定了初始值。其中count直接使用数组长度,不需再计算。... for (var i=0; i<values.length; i++) { ... result.dic_sum+=values[i].dic_sum; #sum... if (result.dic_max<values[i].dic_max) {result.dic_max=values[i].dic_max;} #max... if (result.dic_min>values[i].dic_min) {result.dic_min=values[i].dic_min;} #min... }... result.dic_avg=result.dic_sum/values.length; #avg... return result;... };3. 编写finalize
> var f = function(key, rvalues){
... if (rvalues.dic_count>2){rvalues.get='true';} #增加列get,用于实现having... else {rvalues.get='false'};... return rvalues;... }4. 执行mapreduce
> var res = db.runCommand({
... mapreduce:'tianyc_test3',... map:m,... reduce:r,... finalize:f, #可以在这里使用query参数,进行更复杂的mongo查询。... out:'tianyc_mr_Result1'... }... )5. 获取结果
> db.tianyc_mr_Result1.find() #查询结果{ "_id" : "neu", "value" : { "dic_count" : 3, "dic_sum" : 9, "dic_avg" : 3, "dic_max" : 4, "dic_min" : 2, "get" : "true" } }{ "_id" : "xtt", "value" : { "dic_count" : 4, "dic_sum" : 14, "dic_avg" : 3.5, "dic_max" : 5, "dic_min" : 2, "get" : "true" } }{ "_id" : "yct", "value" : { "dic_count" : 2, "dic_sum" : 2, "dic_avg" : 2, "dic_max" : 2, "dic_min" : 2, "get" : "false" } }> db.tianyc_mr_Result1.find({'value.get':'true'},{'value.get':0}) # 使用find条件,得到最终结果。
{ "_id" : "neu", "value" : { "dic_count" : 3, "dic_sum" : 9, "dic_avg" : 3, "dic_max" : 4, "dic_min" : 2 } }{ "_id" : "xtt", "value" : { "dic_count" : 4, "dic_sum" : 14, "dic_avg" : 3.5, "dic_max" : 5, "dic_min" : 2 } }>