Mongodb通过_id汇总$ add值

时间:2014-08-29 15:20:24

标签: mongodb aggregation-framework mongodb-php

我试图创建观看次数最多的元素(作者)的聚合。

以下是我的用户集合:

{
  "_id" : ObjectId("54008ac8145a6cc5058b456b"),
  "history" : {
    "authors" : [
      {
        "name" : "michou",
        "count" : {
          "all" : NumberLong(1),
          "2014" : NumberLong(1),
          "201408" : NumberLong(1),
          "2014w35" : NumberLong(1)
        }
      }
    ]
  }
}
{
  "_id" : ObjectId("54008ac8145a6ccb058b4570"),
  "history" : {
    "authors" : [
      {
        "name" : "petitBonhommeEnMousse",
        "count" : {
          "all" : NumberLong(2),
          "2014" : NumberLong(2),
          "201408" : NumberLong(2),
          "2014w35" : NumberLong(2)
        }
      },
      {
        "name" : "lordVador",
        "count" : {
          "all" : NumberLong(1),
          "2014" : NumberLong(1),
          "201408" : NumberLong(1),
          "2014w35" : NumberLong(1)
        }
      }
    ]
  }
}
{
  "_id" : ObjectId("54008ac8145a6ccf058b456c"),
  "history" : {
    "authors" : [
      {
        "name" : "lordVador",
        "count" : {
          "all" : NumberLong(1),
          "2014" : NumberLong(1),
          "201408" : NumberLong(1),
          "2014w35" : NumberLong(1)
        }
      }
    ]
  }
}

我想要得到的是用户在过去三周内看到作者的时间列表。

为此,当用户看到某个页面时,我会增加该键的值" 2014w35"," 2014w36" ...(一年中的星期数) 。

这是第一次尝试:

db.users.aggregate(
  [
    { $match: { history_updated: "20140829" } },
    { $unwind: "$history.authors" },
    { $group :
      {
        "_id" : "$history.authors.name",
        "total2014w35" : {"$sum"  : "$history.authors.count.2014w35"},
        "total2014w34" : {"$sum"  : "$history.authors.count.2014w34"},
        "total2014w33" : {"$sum"  : "$history.authors.count.2014w33"}
      }
    },
    { $project: {
        "_id" : 1,
        "total" : {
          $add : [
            "$total2014w35",
            "$total2014w34",
            "$total2014w33"
          ]
        }
      }
    }
  ]
)

返回所见作者的列表和次数,但不会被用户分隔。这是总数:

{ "_id" : "lordVador", "total" : NumberLong(2) }
{ "_id" : "petitBonhommeEnMousse", "total" : NumberLong(2) }
{ "_id" : "michou", "total" : NumberLong(1) }

我的第二个尝试是按_id分组:

db.users.aggregate(
  [
    { $match: { history_updated: "20140829" } },
    { $unwind: "$history.authors" },
    {
      $group :
      {
        "_id" : "$_id",
        ....

当然,它返回用户的作者总数。但没有作者的详细信息。

{ "_id" : ObjectId("54008ac8145a6ccb058b4570"), "total" : NumberLong(3) }
{ "_id" : ObjectId("54008ac8145a6ccf058b456c"), "total" : NumberLong(1) }
{ "_id" : ObjectId("54008ac8145a6cc5058b456b"), "total" : NumberLong(1) }

我想拥有的是两者的混合。我希望为每个用户(集合中的文档)提供作者列表,每个用户在过去3周内看到他/她的次数。

类似的东西:

{ "_id" : ObjectId("54008ac8145a6ccb058b4570"), [{ "lordVador" : NumberLong(3) },{ "michou" : NumberLong(1) } ] }
{ "_id" : ObjectId("54008ac8145a6ccf058b456c"), [{ "petitBonhommeEnMousse" : NumberLong(1) } ] }
{ "_id" : ObjectId("54008ac8145a6cc5058b456b"), [{ "lordVador" : NumberLong(1) } ] }

你们有没有想过如何混合它们?

2 个答案:

答案 0 :(得分:1)

使用聚合框架,您无法获取作者姓名及其计数之间的映射。您最多可以获得两个被认为是关联的不同数组:

db.user.aggregate([
{$match: {"history_updated": "20140829" } },
{$unwind:"$history.authors"},
{$group:{"_id":"$_id","authors":{$push:"$history.authors.name"},
         "visits_last_three_mnths":{$push:{$add:["$history.authors.count.201408","$history.authors.count.2014w35"]}}}
}
])

示例o / p :(已更改了ID,数字,从示例中移除了用户)

{ "_id" : 2, "authors" : [ "petitBonhommeEnMousse", "lordVador" ], "visits_last_three_mnths" : [75,150 ] }
{ "_id" : 1, "authors" : [ "michou" ], "visits_last_three_mnths" : [ 300 ] }

注意:在"作者"之间保留了相关性。和#34; visits_last_three_mnths"数组,按其索引。但是,您希望获得结果的方式可以通过Map-Reduce来实现。

我使用的修改过的数据集,

db.user.insert({
  "_id" : 1,
  "history" : {
    "authors" : [
      {
        "name" : "michou",
        "count" : {
          "all" : 400,
          "2014" : 300,
          "201408" : 200,
          "2014w35" : 100
        }
      }
    ]
  }
});
db.user.insert({
  "_id" : 2,
  "history" : {
    "authors" : [
      {
        "name" : "petitBonhommeEnMousse",
        "count" : {
          "all" : 200,
          "2014" : 100,
          "201408" : 50,
          "2014w35" : 25
        }
      },
      {
        "name" : "lordVador",
        "count" : {
          "all" : 300,
          "2014" : 200,
          "201408" : 100,
          "2014w35" : 50
        }
      }
    ]
  }
});

以下是使用Map reduce(使用与上面相同的数据集)拍摄的照片,希望这会有所帮助:

var map = function(){emit(this._id,{"author":this.history.authors});}
var reduce = function(userid,authors_arr){
var reduced = {"user_views":[]};
for(var i in authors_arr)
{
    var author_array = authors_arr[i];
    var authors = author_array.author;
    for(var j in authors)
    {
        var author = authors[j];
        reduced.user_views.push({"author_name":author.name,"views":author.count.all});
    }
}
return reduced;
}

db.user.mapReduce(map,reduce,{ out: "output" }) 

[
  {
    "_id": 1,
    "value": {
      "user_views": [
        {
          "author_name": "michou",
          "views": 400
        }
      ]
    }
  },
  {
    "_id": 2,
    "value": {
      "user_views": [
        {
          "author_name": "petitBonhommeEnMousse",
          "views": 200
        },
        {
          "author_name": "lordVador",
          "views": 300
        }
      ]
    }
  }
]

注意:您需要更改关键命名约定,例如。' 201408'不是有效的java脚本属性。 一旦你改变它们,你可以修改"减少"用于获取每周观点的总和。

答案 1 :(得分:0)

也许您可以尝试“$ addToSet”进行第二次尝试。添加作者并读取数字到集合中。如果作者对每个用户都是唯一的,那么“$ push”就可以了。 http://docs.mongodb.org/manual/reference/operator/aggregation/push/