突出显示弹性搜索中的术语问题

时间:2018-10-18 10:02:27

标签: elasticsearch

  
      
  1. 我们使用以下设置和映射创建了索引。
  2.   
 PUT http://localhost:9200/essearch
{ 
"mappings": {
        "object": {
            "_all": {
                "enabled": false
            },
            "properties": {
               "content": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_standard"
                },
                "content_phonic": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_phonetic"
                },
                "content_stemming": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "similarity": "classic",
                    "analyzer": "content_stemming"
                }
            }
        }
    },
 "settings": {
        "index": {
            "number_of_shards": "1",
            "similarity": {
                "default": {
                    "type": "classic"
                }
            },
            "max_result_window": "50000",
            "mapper": {
                "dynamic": "false"
            },
            "analysis": {
                "filter": {
                    "content_phonetic": {
                        "type": "phonetic",
                        "encoder": "doublemetaphone"
                    },
                    "StopWords": {
                        "type": "stop",
                        "stopwords": [
                            "after",
                            "all",
                            "under",
                            "very",
                            "well"]
                    }
                },
                "analyzer": {
                    "content_phonetic": {
                        "filter": [
                            "content_phonetic"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    },
                    "content_stemming": {
                        "filter": [
                            "lowercase",
                            "porter_stem"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    },
                    "content_standard": {
                        "filter": [
                            "lowercase",
                            "StopWords"
                        ],
                        "char_filter": [
                            "CharFilters"
                        ],
                        "type": "custom",
                        "tokenizer": "standard"
                    }
                },
                "char_filter": {
                    "CharFilters": {
                        "type": "mapping",
                        "mappings": [
                            ". => ' '",
                            "' => ' '",
                            "_ => ' '",
                            ": => ' '"
                        ]
                    }
                }
            },
            "number_of_replicas": "0"
        }
    }}
  

2:为文档建立索引

 http://localhost:9200/essearch/object/1
{ "content" : "beginning thirty days after the anticipated COD. 
             Buyer shall be responsible for all natural gas and electrical imbalance charges.
             All prices shall be at the Reference Conditions.
             Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under said requests to Buyer.  Buyer shall have full dispatch rights subject to operational parameters  (including ramp rates. buyer said to me..."   }
  

3:执行了突出显示查询

    http://localhost:9200/essearch/_search
 {
 "highlight": {
"pre_tags": [ "<term0 style='background-color:Lime'>", "<term1 style='background-color:Chocolate'>", "<term2 style='background-color:Pink'>"
],"post_tags": [ "</term0>", "</term1>", "</term2>" ],
"encoder": "html",
"fields": { "content": { "fragment_size": 50, "number_of_fragments": 0, "type": "fvh" } } },
"_source": false,
"query": {
"bool": {
  "must": [
    {
      "query_string": {
        "query": "(\"under said\") OR (said) OR (buyer)",
        "default_field": "content"}} ],
  "filter": [
    {
      "ids": {
        "values": [ "1" ] } } ] } } }
  

4:突出显示查询输出

    {
"took": 0,
"timed_out": false,
"_shards": {
    "total": 1,
    "successful": 1,
    "failed": 0
},
"hits": {
    "total": 1,
    "max_score": 0.30490398,
    "hits": [
        {
            "_index": "essearch",
            "_type": "object",
            "_id": "1",
            "_score": 0.30490398,
            "highlight": {
                "content": [
                    "beginning thirty days after the anticipated COD.
                    <term1 style='background-color:Chocolate'>Buyer</term1> 
                    shall be responsible for all natural gas and electrical imbalance charges.
                    All prices shall be at the Reference Conditions.Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under <term0 style='background-color:Lime'>said</term0> requests    to <term1 style='background-color:Chocolate'>Buyer</term1>. <term1 style='background-color:Chocolate'>Buyer</term1> shall have full dispatch rights subject to operational parameters (including ramp rates. <term1 style='background-color:Chocolate'>buyer</term1> <term0 style='background-color:Lime'>said</term0> to me..."
                ]    }  } ] } }

如果您看到我们根据提供的查询字词数量应用了pre,post标记。在这里,我们有3个带有OR运算符的术语,因此总共提供了三个pre,post标签。执行突出显示查询后,应按顺序将 term1 标记应用于“所述”术语,但是ES将“ term0 ”标记应用于“所述”术语,对于“买方”则应用其 term1 标签。

2 个答案:

答案 0 :(得分:1)

我不确定这是否是您的映射问题,因为我运行了相同的text +查询并获得了预期的结果。可能与查询中的其他字段有关。我发现使用突出显示查询来隔离要突出显示的内容有助于合理化标签的顺序。

background-image: url("/Trial Website/Grow Image.jpg");

我的映射:

"beginning thirty days after the anticipated COD. \n             <term2 style='background-color:Pink'>Buyer</term2> shall be responsible for all natural gas and electrical imbalance charges.\n             All prices shall be at the Reference Conditions.\n             <term2 style='background-color:Pink'>Buyer’s</term2> performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights <term0 style='background-color:Lime'>under said</term0> requests to <term2 style='background-color:Pink'>Buyer</term2>.  <term2 style='background-color:Pink'>Buyer</term2> shall have full dispatch rights subject to operational parameters  (including ramp rates. <term2 style='background-color:Pink'>buyer</term2> <term1 style='background-color:Chocolate'>said</term1> to me..."

我的文档:

{
        "mappings": {
            "properties": {
                "text": {
                    "type": "text",
                    "term_vector": "with_positions_offsets",
                    "analyzer": "english",
                }                
            }
        }
    }

我的查询:

{"text": """beginning thirty days after the anticipated COD. 
             Buyer shall be responsible for all natural gas and electrical imbalance charges.
             All prices shall be at the Reference Conditions.
             Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under said requests to Buyer.  Buyer shall have full dispatch rights subject to operational parameters  (including ramp rates. buyer said to me..."""}

答案 1 :(得分:0)

请尝试使用Unified Highlighter代替Fast Vector Highlighter或为FVH定义boundary_scanner。

您也可以使用特殊的highlight_query来突出显示。使用突出显示查询,您可以指定其他查询以进行搜索和突出显示。