难以优化Mongo distinct查询以使用索引

时间:2017-01-16 00:16:40

标签: mongodb query-optimization

我很难说服Mongo运行distinct查询,看起来它应该被索引覆盖,而不会在集合中获取大量文档。

我的文件有一般形式:

{
  _tenantId: 'someString',
  _productCategory: 'some string from a smallish set'
  ...
}

我有一个索引(_tenantId,_productCategory)。

我想知道给定租户的不同产品类别是什么,所以查询是:

db.products.distinct( '_productCategory', { _tenantId: '463171c3-d15f-4699-893d-3046327f8e1f'})

这种运行速度相当缓慢(对于本地数据库,即Mongo 3.2.9,收集大约50万个产品需要几秒钟)。对于我们预先生产的基于SaaS的Mongo(可能比我的本地实例更多的内存限制,它可以自由运行我的机器),相同的数据需要几十秒的时间。

解释查询产生:

{
"queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "engage-prod.products",
    "indexFilterSet" : false,
    "parsedQuery" : {
        "_tenantId" : {
            "$eq" : "463171c3-d15f-4699-893d-3046327f8e1f"
        }
    },
    "winningPlan" : {
        "stage" : "FETCH",
        "inputStage" : {
            "stage" : "IXSCAN",
            "keyPattern" : {
                "_tenantId" : 1,
                "_productCategory" : 1
            },
            "indexName" : "_tenantId_1__productCategory_1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
                "_tenantId" : [
                    "[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1f\"]"
                ],
                "_productCategory" : [
                    "[MinKey, MaxKey]"
                ]
            }
        }
    },
    "rejectedPlans" : [ ]
},
"executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 406871,
    "executionTimeMillis" : 358,
    "totalKeysExamined" : 406871,
    "totalDocsExamined" : 406871,
    "executionStages" : {
        "stage" : "FETCH",
        "nReturned" : 406871,
        "executionTimeMillisEstimate" : 80,
        "works" : 406872,
        "advanced" : 406871,
        "needTime" : 0,
        "needYield" : 0,
        "saveState" : 3178,
        "restoreState" : 3178,
        "isEOF" : 1,
        "invalidates" : 0,
        "docsExamined" : 406871,
        "alreadyHasObj" : 0,
        "inputStage" : {
            "stage" : "IXSCAN",
            "nReturned" : 406871,
            "executionTimeMillisEstimate" : 40,
            "works" : 406872,
            "advanced" : 406871,
            "needTime" : 0,
            "needYield" : 0,
            "saveState" : 3178,
            "restoreState" : 3178,
            "isEOF" : 1,
            "invalidates" : 0,
            "keyPattern" : {
                "_tenantId" : 1,
                "_productCategory" : 1
            },
            "indexName" : "_tenantId_1__productCategory_1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
                "_tenantId" : [
                    "[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1f\"]"
                ],
                "_productCategory" : [
                    "[MinKey, MaxKey]"
                ]
            },
            "keysExamined" : 406871,
            "dupsTested" : 0,
            "dupsDropped" : 0,
            "seenInvalidated" : 0
        }
    }
},
"serverInfo" : {
    "host" : "Stevens-MacBook-Pro.local",
    "port" : 27017,
    "version" : "3.2.9",
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}

请注意,即使它运行IXSCAN,它仍然会返回超过400K的文档(nReturned)。

如果我创建一个包含词汇连接(带有_tenantAndProductCategory分隔符)的索引的复合字段:和索引,使其成为单个字段索引,那么查询:

db.products.explain('executionStats').distinct( '_productTenantAndCategory', { _productTenantAndCategory: {$gte: '463171c3-d15f-4699-893d-3046327f8e1f',$lt: '463171c3-d15f-4699-893d-3046327f8e1g'}})

完全在索引中工作并产生:

{
"queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "engage-prod.products",
    "indexFilterSet" : false,
    "parsedQuery" : {
        "$and" : [
            {
                "_productTenantAndCategory" : {
                    "$lt" : "463171c3-d15f-4699-893d-3046327f8e1g"
                }
            },
            {
                "_productTenantAndCategory" : {
                    "$gte" : "463171c3-d15f-4699-893d-3046327f8e1f"
                }
            }
        ]
    },
    "winningPlan" : {
        "stage" : "PROJECTION",
        "transformBy" : {
            "_id" : 0,
            "_productTenantAndCategory" : 1
        },
        "inputStage" : {
            "stage" : "DISTINCT_SCAN",
            "keyPattern" : {
                "_productTenantAndCategory" : 1
            },
            "indexName" : "_productTenantAndCategory_1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
                "_productTenantAndCategory" : [
                    "[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1g\")"
                ]
            }
        }
    },
    "rejectedPlans" : [ ]
},
"executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 62,
    "executionTimeMillis" : 0,
    "totalKeysExamined" : 63,
    "totalDocsExamined" : 0,
    "executionStages" : {
        "stage" : "PROJECTION",
        "nReturned" : 62,
        "executionTimeMillisEstimate" : 0,
        "works" : 63,
        "advanced" : 62,
        "needTime" : 0,
        "needYield" : 0,
        "saveState" : 0,
        "restoreState" : 0,
        "isEOF" : 1,
        "invalidates" : 0,
        "transformBy" : {
            "_id" : 0,
            "_productTenantAndCategory" : 1
        },
        "inputStage" : {
            "stage" : "DISTINCT_SCAN",
            "nReturned" : 62,
            "executionTimeMillisEstimate" : 0,
            "works" : 63,
            "advanced" : 62,
            "needTime" : 0,
            "needYield" : 0,
            "saveState" : 0,
            "restoreState" : 0,
            "isEOF" : 1,
            "invalidates" : 0,
            "keyPattern" : {
                "_productTenantAndCategory" : 1
            },
            "indexName" : "_productTenantAndCategory_1",
            "isMultiKey" : false,
            "isUnique" : false,
            "isSparse" : false,
            "isPartial" : false,
            "indexVersion" : 1,
            "direction" : "forward",
            "indexBounds" : {
                "_productTenantAndCategory" : [
                    "[\"463171c3-d15f-4699-893d-3046327f8e1f\", \"463171c3-d15f-4699-893d-3046327f8e1g\")"
                ]
            },
            "keysExamined" : 63
        }
    }
},
"serverInfo" : {
    "host" : "Stevens-MacBook-Pro.local",
    "port" : 27017,
    "version" : "3.2.9",
    "gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}

我需要为我需要的所有聚合查询构建具有手动复合键的单个字段索引,这不是一个非常理想的路径。由于所有信息都出现在我开始使用的复合索引中,为什么Mongo执行原始distinct查询并覆盖该索引?我有什么办法可以通过查询优化来克服这个问题吗?

注意这实际上是一个稍微复杂的子问题,涉及一个聚合管道来实际计算每个类别的出现次数,但我现在将我的问题限制为更简单distinct查询,因为它似乎捕获了使用应覆盖事物的索引(我在聚合管道案例中也看到)的失败的本质,同时是一个更简单的整体查询。

0 个答案:

没有答案
相关问题