MongoDB全文搜索运行缓慢

时间:2016-08-09 09:45:30

标签: mongodb indexing full-text-search

我在名为meta_all(大小> 7.5 GB)的数据库中有一个名为meta_db的集合(包含> 940万条记录)。我使用title中的以下命令为meta_all中的pymongo字段创建了一个文本索引:

meta_all.create_index([("title","text")])

但是当我尝试执行搜索时,使用以下命令限制到最高结果,搜索需要永远:

cursor = meta_all.find(

    {'$text':{'$search':'Some query'}},{'score': {'$meta': "textScore"}})

cursor.sort([('score',{'$meta':"textScore"})]).limit(1)

对于小型查询(包含两个或更少的单词),给出结果需要5-7分钟,而对于更大的查询,它会运行几个小时!

我已尝试过AND和默认OR搜索,但没有任何效果。我怀疑文本索引没有用于查询,但我不太确定是什么问题。

以下是db.currentOp()为我的搜索查询'Gone Girl'提供的内容:

> db.currentOp()
{
    "inprog" : [
        {
            "desc" : "conn3",
            "threadId" : "139933091329792",
            "connectionId" : 3,
            "client" : "127.0.0.1:58300",
            "active" : true,
            "opid" : 221,
            "secs_running" : 31,
            "microsecs_running" : NumberLong(31062518),
            "op" : "query",
            "ns" : "meta_db.meta_all",
            "query" : {
                "find" : "meta_all",
                "filter" : {
                    "$text" : {
                        "$search" : "Gone Girl"
                    }
                },
                "sort" : {
                    "score" : {
                        "$meta" : "textScore"
                    }
                },
                "projection" : {
                    "score" : {
                        "$meta" : "textScore"
                    }
                },
                "limit" : 1,
                "singleBatch" : true
            },
            "planSummary" : "IXSCAN { _fts: \"text\", _ftsx: 1 }, IXSCAN { _fts: \"text\", _ftsx: 1 }",
            "numYields" : 1194,
            "locks" : {
                "Global" : "r",
                "Database" : "r",
                "Collection" : "r"
            },
            "waitingForLock" : false,
            "lockStats" : {
                "Global" : {
                    "acquireCount" : {
                        "r" : NumberLong(2392)
                    }
                },
                "Database" : {
                    "acquireCount" : {
                        "r" : NumberLong(1196)
                    }
                },
                "Collection" : {
                    "acquireCount" : {
                        "r" : NumberLong(1196)
                    }
                }
            }
        },
        {
            "desc" : "conn1",
            "threadId" : "139933253670656",
            "connectionId" : 1,
            "client" : "127.0.0.1:58297",
            "active" : true,
            "opid" : 268,
            "secs_running" : 0,
            "microsecs_running" : NumberLong(81),
            "op" : "command",
            "ns" : "admin.$cmd",
            "query" : {
                "currentOp" : 1
            },
            "numYields" : 0,
            "locks" : {

            },
            "waitingForLock" : false,
            "lockStats" : {

            }
        }
    ],
    "ok" : 1
}

以下是db的统计信息:

> db.stats()
{
    "db" : "meta_db",
    "collections" : 1,
    "objects" : 9430088,
    "avgObjSize" : 1307.0730941217091,
    "dataSize" : 12325814300,
    "storageSize" : 7530577920,
    "numExtents" : 0,
    "indexes" : 2,
    "indexSize" : 566513664,
    "ok" : 1
}

db.meta_all.find({'$text':{'$search':'Some query'}},{'score': {'$meta': "textScore"}}).explain()给了我以下内容:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "meta_db.meta_all",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$text" : {
                "$search" : "Some query",
                "$language" : "english",
                "$caseSensitive" : false,
                "$diacriticSensitive" : false
            }
        },
        "winningPlan" : {
            "stage" : "PROJECTION",
            "transformBy" : {
                "score" : {
                    "$meta" : "textScore"
                }
            },
            "inputStage" : {
                "stage" : "TEXT",
                "indexPrefix" : {

                },
                "indexName" : "title_text",
                "parsedTextQuery" : {
                    "terms" : [
                        "queri"
                    ],
                    "negatedTerms" : [ ],
                    "phrases" : [ ],
                    "negatedPhrases" : [ ]
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "inputStage" : {
                        "stage" : "TEXT_OR",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "_fts" : "text",
                                "_ftsx" : 1
                            },
                            "indexName" : "title_text",
                            "isMultiKey" : true,
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 1,
                            "direction" : "backward",
                            "indexBounds" : {

                            }
                        }
                    }
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "serverInfo" : {
        "host" : "jobsism10-Lenovo-G580",
        "port" : 27017,
        "version" : "3.3.8",
        "gitVersion" : "00ed8f3b275971093ddd2ee7d3ab558904e28af0"
    },
    "ok" : 1
}

任何人都可以帮我解决这个问题吗?

1 个答案:

答案 0 :(得分:0)

您确定索引保存在RAM中吗?

您可以使用session.getSession().invalidate() 查找mongo如何使用您的磁盘。也许mongo正在从HDD读取数据?另外,请查看mongo统计信息中的iotop以检查mongo是否正在从磁盘加载数据

另一件事是你可以使用像page faults这样的工具来查看由mongo制作的系统调用。