Elasticsearch同义词返回0结果

时间:2016-08-17 08:08:45

标签: elasticsearch elasticsearch-2.0

我创建了一个名为“synonym_filter”的同义词过滤器

"synonym_filter": {
                 "type": "synonym",
                 "synonyms": [
                    "adidas, xyz, abc",
                    "nike, rofl, lol"
                 ]
              }

和分析器名为“同义词”

 "synonyms": {
                 "filter": [
                    "lowercase",
                    "synonym_filter"
                 ],
                 "tokenizer": "standard"
              },

根据elasticsearch文档。完整设置现在看起来像这样。

GET /test6_de_idx1/_settings

   "test6_de_idx1": {
  "settings": {
     "index": {
        "creation_date": "1471372087742",
        "analysis": {
           "filter": {
              "edge_ngram_back": {
                 "min_gram": "2",
                 "side": "back",
                 "type": "edgeNGram",
                 "max_gram": "10"
              },
              "edge_ngram_front": {
                 "min_gram": "2",
                 "side": "front",
                 "type": "edgeNGram",
                 "max_gram": "10"
              },
              "synonym_filter": {
                 "type": "synonym",
                 "synonyms": [
                    "adidas, xyz, abc",
                    "nike, rofl, lol"
                 ]
              },
              "stop": {
                 "type": "stop",
                 "stopwords": "_german_"
              },
              "strip_hyphens": {
                 "pattern": "-",
                 "type": "pattern_replace",
                 "replacement": ""
              },
              "length": {
                 "type": "length",
                 "min": "2"
              },
              "strip_spaces": {
                 "pattern": "\\s",
                 "type": "pattern_replace",
                 "replacement": ""
              },
              "snowball": {
                 "type": "snowball",
                 "language": "German"
              },
              "strip_dots": {
                 "pattern": "\\.",
                 "type": "pattern_replace",
                 "replacement": ""
              }
           },
           "analyzer": {
              "std": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "length"
                 ],
                 "char_filter": "html_strip",
                 "tokenizer": "standard"
              },
              "synonyms": {
                 "filter": [
                    "lowercase",
                    "synonym_filter"
                 ],
                 "tokenizer": "standard"
              },
              "keyword_suffix": {
                 "filter": [
                    "asciifolding",
                    "lowercase",
                    "strip_spaces",
                    "strip_dots",
                    "strip_hyphens",
                    "edge_ngram_back"
                 ],
                 "tokenizer": "keyword"
              },
              "text_suffix": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "edge_ngram_back"
                 ],
                 "char_filter": "html_strip",
                 "tokenizer": "standard"
              },
              "language": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "snowball",
                    "length"
                 ],
                 "char_filter": "html_strip",
                 "type": "custom",
                 "tokenizer": "standard"
              },
              "keyword": {
                 "filter": [
                    "asciifolding",
                    "lowercase",
                    "strip_spaces",
                    "strip_dots",
                    "strip_hyphens"
                 ],
                 "tokenizer": "keyword"
              },
              "keyword_prefix": {
                 "filter": [
                    "asciifolding",
                    "lowercase",
                    "strip_spaces",
                    "strip_dots",
                    "strip_hyphens",
                    "edge_ngram_front"
                 ],
                 "tokenizer": "keyword"
              },
              "text_prefix": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "edge_ngram_front"
                 ],
                 "char_filter": "html_strip",
                 "tokenizer": "standard"
              }
           }
        },
        "number_of_shards": "1",
        "number_of_replicas": "0",
        "uuid": "sAiM27R2QOKLj9wjrwoAqw",
        "version": {
           "created": "2030399"
        }
     }
  }

}

使用

测试分析仪
GET /test6_de_idx1/_analyze
{
  "analyzer" : "synonyms",
  "text" : "xyz is the English queen"
}

显示分析仪正在工作:

"tokens": [
      {
         "token": "xyz",
         "start_offset": 0,
         "end_offset": 3,
         "type": "<ALPHANUM>",
         "position": 0
      },
      {
         "token": "adidas",
         "start_offset": 0,
         "end_offset": 3,
         "type": "SYNONYM",
         "position": 0
      },
      {
         "token": "abc",
         "start_offset": 0,
         "end_offset": 3,
         "type": "SYNONYM",
         "position": 0
      },
      {
         "token": "is",
         "start_offset": 4,
         "end_offset": 6,
         "type": "<ALPHANUM>",
         "position": 1
      },
      {
         "token": "the",
         "start_offset": 7,
         "end_offset": 10,
         "type": "<ALPHANUM>",
         "position": 2
      },
      {
         "token": "english",
         "start_offset": 11,
         "end_offset": 18,
         "type": "<ALPHANUM>",
         "position": 3
      },
      {
         "token": "queen",
         "start_offset": 19,
         "end_offset": 24,
         "type": "<ALPHANUM>",
         "position": 4
      }
   ]

但是,当我使用同义词进行测试搜索时,我使用adidas得到0结果,这在数据中使用我得到的结果。

GET /test6_de_idx1/_search?q=xyz&size=5



 "took": 1,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "failed": 0
   },
   "hits": {
      "total": 0,
      "max_score": null,
      "hits": []
   }

什么可以成为问题?

编辑:

感谢您的快速回复!对于搜索字词“adidas”,点击次数如此:

"hits": [
         {
            "_index": "test6_de_idx1",
            "_type": "product",
            "_id": "59279",
            "_score": 1.0859994,
            "_source": {
               "id": 59279,
               "sku": "0002-10780",
               "type_id": "configurable",
               "brand": "Adidas",
               "color": "Mehrfarbig",
               "manufacturer": "Adidas",
               "material": "Polyester",
               "model": "Damen",
               "producttype": "Tank",
               "status": 1,
               "tax_class_id": 1,
               "visibility": 4,
               "price": 24.99,
               "weight": 0,
               "image": "http://sportokay.dev/skin/frontend/default/default/images/catalog/product/placeholder/image.jpg",
               "name": "Adidas Keyhole Tank Damen Fitnessshirt",
               "description": "Das Adidas Keyhole Tank Damen Fitnessshirt ist ein leichtes, weiches Fitness T-Shirt aus Adidas Climalite Material, welches optimales Feuchtigkeitsmanagement ermöglicht.\r\n- 100 % Polyester\r\n- Adidas Climalite\r\n- Lockere Passform\r\n- Schlitz am Rücken",
               "short_description": "Adidas Keyhole Tank Damen Fitnessshirt",
               "_categories": [
                  "Damen",
                  "Alle",
                  "Fitness",
                  "Fitnessbekleidung",
                  "Shirts"
               ],
               "_prices": {
                  "price": 24.99,
                  "final_price": 24.99,
                  "minimal_price": 24.99,
                  "min_price": 24.99,
                  "max_price": 24.99,
                  "tier_price": null
               },
               "_url": "http://xxx.dev/at_de/adidas-keyhole-tank-damen-fitnessshirt.html"
            }
         }

GET / test6_de_idx1 / _sear ch?q =品牌:虽然xyz是adidas的同义词,但xyz不会给出结果

EDIT2:

这是当前使用的映射:

"test6_de_idx1": {
  "mappings": {
     "product": {
        "_all": {
           "analyzer": "std"
        },
        "properties": {
           "_categories": {
              "type": "string",
              "analyzer": "language",
              "include_in_all": true
           },
           "_parent_ids": {
              "type": "integer",
              "index": "no",
              "store": true
           },
           "_prices": {
              "properties": {
                 "final_price": {
                    "type": "double"
                 },
                 "max_price": {
                    "type": "double"
                 },
                 "min_price": {
                    "type": "double"
                 },
                 "minimal_price": {
                    "type": "double"
                 },
                 "price": {
                    "type": "double"
                 }
              }
           },
           "_url": {
              "type": "string",
              "index": "no",
              "store": true
           },
           "adjustment": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "adjustmentrange": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "antennas": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "backlength": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "bike_colour": {
              "type": "string",
              "fields": {
                 "prefix": {
                    "type": "string",
                    "analyzer": "text_prefix",
                    "search_analyzer": "std"
                 },
                 "std": {
                    "type": "string",
                    "analyzer": "std"
                 },
                 "suffix": {
                    "type": "string",
                    "analyzer": "text_suffix",
                    "search_analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },....
 "brand": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },

EDIT3:

我将“同义词”分析器添加到了映射属性字段,但它仍然不起作用。我做得对吗?

"brand": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 },
                 "synonyms": {
                    "type": "string",
                    "analyzer": "synonyms"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },

1 个答案:

答案 0 :(得分:1)

尝试GET /test6_de_idx1/_sear‌​ch?q=some_field:xyz&s‌​ize= 5意味着在查询中明确提及字段名称(some_field)。否则,它将使用未使用您的分析仪的_all

q=xyz已翻译为使用query_string,默认情况下,它使用_all字段。