MongoDB简单批量插入应该快于1000次插入/秒

时间:2017-07-31 16:32:30

标签: node.js mongodb caching mongodb-query

我需要根据用户输入从数千个文档(例如5000)的集合中显示未定义的$group个数量。为此,我的主查询有4 $lookup s。即使foreignKey上有索引,这仍然需要大约2秒钟。

这本身让我感到惊讶。 5000 * 4索引查找,真的需要2秒吗?但这不是最大的问题。

在第一个请求中,需要根据用户输入从实时数据生成集合。渲染和插入这些文档大约需要10秒钟。它很复杂,所以很正常。

这意味着第一个请求需要12秒,每个后续请求需要2秒。

为了加快这些后续请求,一次执行所有$lookup似乎是个好主意,并将其保存为新的缓存集合。从这个准备好的缓存中查询只需要大约0.1秒。

我认为这意味着第一个请求现在需要12秒(我确实需要等待写入({w:1}),因为现在分组选择取决于正在生成的缓存),其余的需要0.1秒

问题在于我极度低估了将文档写入缓存所需的时间。首先,我将聚合数组插入到新集合中。我使用批量操作优化了它。我通过使用流来削减另一秒:

let bulk        = db.cache.initializeUnorderedBulkOp()
let cursor      = db.data.aggregate(pipeline)
let stream      = cursor.stream()
stream.on('data', document => bulk.insert(document))
stream.once('end', () => Promise.resolve(bulk.execute({w: 1})) // Data needs to be there before I can query it in the next step.

此操作使2秒查询大约需要10秒钟才能完成。 〜5000个文件是正常的吗?它只有大约15 MB的数据。 (所以我猜这个流是好事。我接近16 MB的查询限制。)

现在初始请求需要22秒。其余的0.1。这不是我想到的优化。

有没有办法可以规避或更好地解决这个问题? 是否有更快的方式返回知道写入完成,而没有在_id中接收5000插入writeOpresult? 使用$out会明显加快吗?

  • MongoDB 3.2
  • WiredTiger

内联更新

我能够使用具有16 GB RAM的快速8核Intel i7。插入4000个文件只需要#34; 4秒这仍然是疯了。在互联网上,我阅读的文章从20.000插入/秒到100.000插入/秒。我这里有1000插入/秒。

MongoDB日志

正如您在mongo日志中看到的那样,提供给批量插入操作的查询确实需要不到2秒的时间:

  

db.data命令:aggregate
      {aggregate:" data",pipeline:" [...]",cursor:{batchSize:1000}}
      cursorid:155377253006 keyUpdates:0 writeConflicts:0 numYields:10 reslen:3953728
      :{Global:{acquireCount:{r:8034}},数据库:{acquireCount:{r:4017}},收藏:{acquireCount:{r:4017}}}
      协议:op_query 398ms
   db.data命令:getMore
      {getMore:155377253006,collection:" data",batchSize:1000}
      planSummary:PIPELINE_PROXY cursorid:155377253006 keysExamined:0 docsExamined:0 keyUpdates:0 writeConflicts:0 numYields:8 nreturned:1000 reslen:4000968
      :{Global:{acquireCount:{r:8036}},数据库:{acquireCount:{r:4018}},收藏:{acquireCount:{r:4018}}}
      协议:op_query 393ms
   db.data命令:getMore
      {getMore:155377253006,collection:" data",batchSize:1000}
      planSummary:PIPELINE_PROXY cursorid:155377253006 keysExamined:0 docsExamined:0 keyUpdates:0 writeConflicts:0 numYields:8 nreturned:1000 reslen:3906423
      :{Global:{acquireCount:{r:8028}},数据库:{acquireCount:{r:4014}},收藏:{acquireCount:{r:4014}}}
      协议:op_query 501ms
  db.data命令: getMore
      {getMore:155377253006,collection:" data",batchSize:1000}
      planSummary:PIPELINE_PROXY cursorid:155377253006 keysExamined:0 docsExamined:0 cursorExhausted:1 keyUpdates:0 writeConflicts:0 numYields:8 nreturned:972 reslen:3938471
      :{Global:{acquireCount:{r:7796}},数据库:{acquireCount:{r:3898}},收藏:{acquireCount:{r:3898}}}
      协议:op_query 388ms

总计:1680毫秒

初始$lookup管道

此管道执行缓存的初始查找。包括在请求中。

[
    {
        "$match": {
            "meta.string": "some string"
        }
    }, 
    {
        "$project": {
            "_id": 0, 
            "meta": 1, 
            "attributes": 1, 
            "relationships": 1, 
            "type": 1,
            "id": 1
        }
    }, 
    {
        "$lookup": {
            "as": "lookup01", 
            "foreignField": "id", 
            "from": "data.lookup01", 
            "localField": "relationships.lookup01.id"
        }
    }, 
    {
        "$unwind": "$lookup01"
    }, 
    {
        "$redact": {   // This specific id is often but 
            "$cond": [ // not always unique. meta.string is.
                {
                    "$eq": [
                        "$lookup01.meta.string", 
                        "some string"
                    ]
                }, 
                "$$KEEP", 
                "$$PRUNE"
            ]
        }
    }, 
    {
        "$project": {
            "_id": 0, 
            "meta": 1, 
            "attributes": 1, 
            "relationships": 1, 
            "type": 1,
            "id": 1, 
            "lookup01": "$lookup01.attributes"
        }
    }, 
    {
        "$lookup": {
            "as": "lookup02", 
            "foreignField": "id", 
            "from": "data.lookup02", 
            "localField": "relationships.lookup02.id"
        }
    }, 
    {
        "$unwind": "$lookup02"
    }, 
    {
        "$project": {
            "_id": 0, 
            "meta": 1, 
            "attributes": 1, 
            "relationships": 1, 
            "type": 1,
            "id": 1, 
            "lookup01": 1, 
            "lookup02": "$lookup02.attributes"
        }
    }, 
    {
        "$lookup": {
            "as": "lookup03", 
            "foreignField": "id", 
            "from": "data.lookup03", 
            "localField": "relationships.lookup03.id"
        }
    }, 
    {
        "$unwind": "$lookup03"
    }, 
    {
        "$project": {
            "_id": 0, 
            "meta": 1, 
            "attributes": 1, 
            "relationships": 1, 
            "type": 1,
            "id": 1, 
            "lookup01": 1 
            "lookup02": 1, 
            "lookup03": "$lookup03.attributes"
        }
    }, 
    {
        "$lookup": {
            "as": "lookup04", 
            "foreignField": "id", 
            "from": "data.lookup04", 
            "localField": "relationships.lookup04.id"
        }
    }, 
    {
        "$unwind": "$lookup04"
    }, 
    {
        "$project": {
            "_id": 0, 
            "meta": 1, 
            "attributes": 1, 
            "relationships": 1, 
            "type": 1,
            "id": 1, 
            "lookup01": 1, 
            "lookup02": 1, 
            "lookup03": 1, 
            "lookup04": "$lookup04.attributes"
        }
    }
]

生成的示例缓存文档

不幸的是,这些文件包含公共和专有数据,我不允许在此发布实际数据。这是对缓存中插入的文档类型的印象。基本上,它是(是)JSON-API结构化数据,包括关系。

    "_id" : ObjectId(),
    "type" : "data",
    "id" : 123,
    "meta" : {
        "date" : ISODate(),
        "date2" : ISODate(),
        "string" : "",
        "number" : 123
    },
    "attributes" : {
        "partnerData" : {
            "boolean" : true,
            "string01" : "",
            "string02" : "",
            "string03" : "",
            "string04" : "",
            "string05" : "",
            "number01" : 123,
            "number02" : 123,
            "number03" : 123,
            "number04" : 123,
            "subdocument" : {
                "array01" : [ 
                    {
                        "number" : 123,
                        "string" : ""
                    }
                ],
                "array02" : [ 
                    {
                        "number" : 123,
                        "string" : ""
                    },
                    {
                        "number" : 123,
                        "string" : ""
                    }
                ],
                "array03" : [ 
                    {
                        "number" : 123,
                        "string" : ""
                    }
                ]
            }
        },
        "number01" : 123,
        "number02" : 123,
        "number03" : 123,
        "number04" : 123,
        "number05" : 123,
        "number06" : 123,
        "number07" : 123,
        "boolean01" : false,
        "boolean02" : false,
        "boolean03" : false,
        "boolean04" : false,
        "boolean05" : false
        "boolean06" : true,
        "string01" : "",
        "string02" : "",
        "object01" : null,
        "object02" : null,
    },
    "relationships" : {
        "lookup01" : {
            "id" : 123,
            "type" : ""
        },
        "lookup02" : {
            "id" : 123,
            "type" : ""
        },
        "lookup03" : {
            "id" : 123,
            "type" : ""
        },
        "lookup04" : {
            "id" : 123,
            "type" : ""
        },
        "array01" : [ 
            {
                "id" : 123,
                "type" : ""
            },
            {
                "id" : 123,
                "type" : ""
            }
        ],
        "array02" : [ 
            {
                "id" : 123,
                "type" : ""
            },
            {
                "id" : 123,
                "type" : ""
            }
        ]
    },
    "lookup01" : {
        "number01" : 123,
        "number02" : 123,
        "boolean05" : false
        "boolean06" : true,
        "string01" : "",
        "string02" : "",
        "string03" : "",
        "string04" : "",
        "string05" : "",
        "string06" : "",
        "string07" : "",
        "object01" : null,
        "object02" : null,
    },
    "lookup02" : {
        "number01" : 123,
        "number02" : 123,
        "boolean05" : false
        "boolean06" : true,
        "string01" : "",
        "string02" : "",
        "string03" : "",
        "string04" : "",
        "string05" : "",
        "string06" : "",
        "string07" : "",
        "object01" : {
            "number" : 123,
            "string" : ""
        },
        "object02" : {
            "number" : 123,
            "string" : ""
        },
        "object03" : {
            "number" : 123,
            "string" : ""
        },
        "object04" : {
            "number" : 123,
            "string" : ""
        }
    },
    "lookup03" : {
        "string01" : "",
        "string02" : "",
        "object01" : {
            "number" : 123,
            "string" : ""
        }
    },
    "lookup04" : {
        "number01" : 123,
        "number02" : 123,
        "number03" : 123,
        "number04" : 123,
        "number05" : 123,
        "number06" : 123,
        "number07" : 123,
        "boolean01" : false,
        "boolean02" : false,
        "boolean03" : false,
        "boolean04" : false,
        "boolean05" : false
        "boolean06" : true,
        "string01" : "",
        "string02" : "",
        "string03" : "",
        "string04" : "",
        "string05" : "",
        "string06" : "",
        "string07" : "",
        "object01" : {
            "number" : 123,
            "string" : ""
        }
    },
    "date03" : ISODate()
}

索引

每个集合的索引都在meta.string和查询集合data.lookup0x.id

  • _id_(默认)
  • id_1
  • meta.string_1

0 个答案:

没有答案