Elasticsearch:带拉丁字符的简单查询字符串

时间:2016-03-11 14:39:12

标签: elasticsearch

我在以下文字中使用了一个简单的查询字符串:

  

Jiboiadetrêsmetrosécapturada em avenida de Governador

Obs :这是我的消息字段的内容

我的查询字符串(无结果)

"simple_query_string":{
     "query":"tr\u00eas",
     "fields":["message","author.name","author.id"],
     "default_operator":"AND"
}

我的查询字符串(1个结果)

"simple_query_string":{
     "query":"Jiboia",
     "fields":["message","author.name","author.id"],
     "default_operator":"AND"
}

有拉丁字符的技巧吗?

我的映射:

{"mentions-2016.02.26":{"aliases":{"mentions_ro":{},"mentions_rw":{}},"mappings":{"mention":{"dynamic_templates":[{"analyzer":{"mapping":{"type":"string","index":"not_analyzed","store":"no"},"match":"*","match_mapping_type":"string"}}],"date_detection":false,"properties":{"analytics":{"properties":{"collect_delay":{"type":"long"},"number_of_replies":{"type":"long"},"twitter_reach":{"type":"long"},"youtube_views":{"type":"long"}}},"author":{"properties":{"gender":{"type":"string","index":"not_analyzed"},"id":{"type":"string"},"locale":{"properties":{"area":{"type":"string","index":"not_analyzed"},"country":{"type":"string","index":"not_analyzed"}}},"name":{"type":"string"},"platform_id":{"type":"long"}}},"created_at":{"type":"date","format":"dateOptionalTime"},"elastic_date":{"type":"date","format":"dateOptionalTime"},"id":{"type":"long"},"items_batch_created":{"type":"date","format":"dateOptionalTime"},"message":{"type":"string"},"metadata":{"properties":{"event":{"type":"string","index":"not_analyzed"},"timestamp":{"type":"long"}}},"monitoring":{"properties":{"id":{"type":"long"},"owner":{"properties":{"email":{"type":"string","index":"not_analyzed"},"id":{"type":"long"},"plan":{"properties":{"active":{"type":"string","index":"not_analyzed"},"name":{"type":"string","index":"not_analyzed"},"paid":{"type":"string","index":"not_analyzed"}}}}}}},"parent_id":{"type":"long"},"published_at":{"type":"date","format":"dateOptionalTime"},"raw_content":{"properties":{"actor_link":{"type":"string","index":"not_analyzed"},"aid":{"type":"string","index":"not_analyzed"},"atom_content":{"type":"string","index":"not_analyzed"},"attachment_content":{"type":"string","index":"not_analyzed"},"attachment_image":{"type":"string","index":"not_analyzed"},"attachment_text":{"type":"string","index":"not_analyzed"},"attachment_url":{"type":"string","index":"not_analyzed"},"attribution":{"type":"string","index":"not_analyzed"},"author":{"type":"string","index":"not_analyzed"},"author_name":{"type":"string","index":"not_analyzed"},"author_uri":{"type":"string","index":"not_analyzed"},"can_comment":{"type":"string","index":"not_analyzed"},"caption":{"type":"string","index":"not_analyzed"},"cast":{"type":"string","index":"not_analyzed"},"category":{"type":"string","index":"not_analyzed"},"channellink":{"type":"string","index":"not_analyzed"},"channeltitle":{"type":"string","index":"not_analyzed"},"comment_id":{"type":"string","index":"not_analyzed"},"comment_info":{"type":"string","index":"not_analyzed"},"comment_real_id":{"type":"string","index":"not_analyzed"},"comments":{"type":"string","index":"not_analyzed"},"content":{"type":"string","index":"not_analyzed"},"created_at":{"type":"string","index":"not_analyzed"},"created_time":{"type":"string","index":"not_analyzed"},"createdat":{"type":"long"},"date_timestamp":{"type":"string","index":"not_analyzed"},"dateuploaded":{"type":"string","index":"not_analyzed"},"description":{"type":"string","index":"not_analyzed"},"displayName":{"type":"string","index":"not_analyzed"},"download":{"type":"string","index":"not_analyzed"},"downloadurl":{"type":"string","index":"not_analyzed"},"duration":{"type":"string","index":"not_analyzed"},"embed":{"type":"string","index":"not_analyzed"},"embed_privacy":{"type":"string","index":"not_analyzed"},"farm":{"type":"long"},"firstname":{"type":"string","index":"not_analyzed"},"flickrid":{"type":"string","index":"not_analyzed"},"fonte_id":{"type":"string","index":"not_analyzed"},"format":{"type":"string","index":"not_analyzed"},"fotoPai":{"type":"string","index":"not_analyzed"},"from_id":{"type":"string","index":"not_analyzed"},"from_name":{"type":"string","index":"not_analyzed"},"from_user":{"type":"string","index":"not_analyzed"},"from_user_id":{"type":"string","index":"not_analyzed"},"from_user_profile_image_url":{"type":"string","index":"not_analyzed"},"gdcomments":{"type":"string","index":"not_analyzed"},"gender":{"type":"string","index":"not_analyzed"},"guid":{"type":"string","index":"not_analyzed"},"height":{"type":"string","index":"not_analyzed"},"icon":{"type":"string","index":"not_analyzed"},"idComment":{"type":"string","index":"not_analyzed"},"idVideo":{"type":"string","index":"not_analyzed"},"id_externo":{"type":"string","index":"not_analyzed"},"idexterno":{"type":"string","index":"not_analyzed"},"image":{"type":"string","index":"not_analyzed"},"imagem":{"type":"string","index":"not_analyzed"},"impactoyoutube":{"type":"string","index":"not_analyzed"},"inReplyTo":{"properties":{"id":{"type":"string","index":"not_analyzed"},"url":{"type":"string","index":"not_analyzed"}}},"in_reply_to_screen_name":{"type":"string","index":"not_analyzed"},"in_reply_to_status_id":{"type":"long"},"incontest":{"type":"string","index":"not_analyzed"},"isPicture":{"type":"boolean"},"is_hd":{"type":"string","index":"not_analyzed"},"is_private":{"type":"string","index":"not_analyzed"},"is_transcoding":{"type":"string","index":"not_analyzed"},"iso_language_code":{"type":"string","index":"not_analyzed"},"klout":{"type":"long"},"language":{"type":"string","index":"not_analyzed"},"like_info":{"type":"string","index":"not_analyzed"},"likes":{"type":"string","index":"not_analyzed"},"link":{"type":"string","index":"not_analyzed"},"link_related":{"type":"string","index":"not_analyzed"},"link_self":{"type":"string","index":"not_analyzed"},"location":{"type":"string","index":"not_analyzed"},"mediacategory":{"type":"string","index":"not_analyzed"},"mediacontent":{"type":"string","index":"not_analyzed"},"mediadescription":{"type":"string","index":"not_analyzed"},"mediakeywords":{"type":"string","index":"not_analyzed"},"mediaplayer":{"type":"string","index":"not_analyzed"},"mediathumbnail":{"type":"string","index":"not_analyzed"},"mediatitle":{"type":"string","index":"not_analyzed"},"message":{"type":"string","index":"not_analyzed"},"modified_date":{"type":"string","index":"not_analyzed"},"monitoramento_id":{"type":"string","index":"not_analyzed"},"name":{"type":"string","index":"not_analyzed"},"note_count":{"type":"long"},"number_of_comments":{"type":"string","index":"not_analyzed"},"number_of_likes":{"type":"string","index":"not_analyzed"},"number_of_plays":{"type":"string","index":"not_analyzed"},"owner":{"type":"string","index":"not_analyzed"},"parent_id":{"type":"string","index":"not_analyzed"},"permalink":{"type":"string","index":"not_analyzed"},"photo":{"properties":{"default":{"type":"boolean"},"prefix":{"type":"string","index":"not_analyzed"},"suffix":{"type":"string","index":"not_analyzed"}}},"picture":{"type":"string","index":"not_analyzed"},"post_id":{"type":"string","index":"not_analyzed"},"privacy":{"type":"string","index":"not_analyzed"},"profile_image_url":{"type":"string","index":"not_analyzed"},"profile_picture":{"type":"string","index":"not_analyzed"},"pubdate":{"type":"string","index":"not_analyzed"},"publicado":{"type":"string","index":"not_analyzed"},"published":{"type":"string","index":"not_analyzed"},"realname":{"type":"string","index":"not_analyzed"},"removido":{"type":"string","index":"not_analyzed"},"retroactive":{"type":"boolean"},"secret":{"type":"string","index":"not_analyzed"},"secretkey":{"type":"string","index":"not_analyzed"},"server":{"type":"string","index":"not_analyzed"},"share_id":{"type":"string","index":"not_analyzed"},"slide_id":{"type":"string","index":"not_analyzed"},"slideshowembedurl":{"type":"string","index":"not_analyzed"},"slideshowtype":{"type":"string","index":"not_analyzed"},"source":{"type":"string","index":"not_analyzed"},"src_big":{"type":"string","index":"not_analyzed"},"status":{"type":"string","index":"not_analyzed"},"summary":{"type":"string","index":"not_analyzed"},"t_id":{"type":"string","index":"not_analyzed"},"tags":{"type":"string","index":"not_analyzed"},"text":{"type":"string","index":"not_analyzed"},"texto":{"type":"string","index":"not_analyzed"},"thumbnail":{"type":"string","index":"not_analyzed"},"thumbnails":{"type":"string","index":"not_analyzed"},"thumbnailsize":{"type":"string","index":"not_analyzed"},"thumbnailsmallurl":{"type":"string","index":"not_analyzed"},"thumbnailurl":{"type":"string","index":"not_analyzed"},"thumbnailxlargeurl":{"type":"string","index":"not_analyzed"},"thumbnailxxlargeurl":{"type":"string","index":"not_analyzed"},"tip_id":{"type":"string","index":"not_analyzed"},"title":{"type":"string","index":"not_analyzed"},"to":{"type":"string","index":"not_analyzed"},"to_user":{"type":"string","index":"not_analyzed"},"to_user_id":{"type":"long"},"tumblr_id":{"type":"string","index":"not_analyzed"},"tweet_id":{"type":"string","index":"not_analyzed"},"type":{"type":"string","index":"not_analyzed"},"update_id":{"type":"string","index":"not_analyzed"},"updated":{"type":"string","index":"not_analyzed"},"updatedVideo":{"type":"string","index":"not_analyzed"},"updated_time":{"type":"string","index":"not_analyzed"},"upload_date":{"type":"string","index":"not_analyzed"},"url":{"type":"string","index":"not_analyzed"},"urls":{"type":"string","index":"not_analyzed"},"user_id":{"type":"string","index":"not_analyzed"},"user_url":{"type":"string","index":"not_analyzed"},"userimageurl":{"type":"string","index":"not_analyzed"},"username":{"type":"string","index":"not_analyzed"},"userurl":{"type":"string","index":"not_analyzed"},"veioDoAlbum":{"type":"boolean"},"video":{"type":"long"},"vimeo_id":{"type":"string","index":"not_analyzed"},"wall_id":{"type":"string","index":"not_analyzed"},"width":{"type":"string","index":"not_analyzed"},"ytduration":{"type":"string","index":"not_analyzed"}}},"raw_content_hash":{"type":"string","index":"not_analyzed"},"search":{"properties":{"id":{"type":"long"},"social_network":{"type":"string","index":"not_analyzed"},"type":{"type":"string","index":"not_analyzed"},"type_id":{"type":"string","index":"not_analyzed"}}},"sentiment":{"properties":{"automatic":{"properties":{"active":{"type":"long"},"precision":{"type":"long"},"value":{"type":"string","index":"not_analyzed"}}},"value":{"type":"string","index":"not_analyzed"}}},"tag":{"properties":{"count":{"type":"long"},"ids":{"type":"string","index":"not_analyzed"}}},"title":{"type":"string","index":"not_analyzed"},"type":{"type":"string","index":"not_analyzed"},"updated_at":{"type":"date","format":"dateOptionalTime"},"words":{"type":"string","index":"not_analyzed"}}}},"settings":{"index":{"refresh_interval":"2s","number_of_shards":"7","gc_deletes":"1814400","creation_date":"1456497520658","number_of_replicas":"2","version":{"created":"1050299"},"uuid":"sp4CJpxMRf-_z0bUtHTrjA"}},"warmers":{}}}

2 个答案:

答案 0 :(得分:0)

你需要让elasticsearch知道如何处理你的角色。 我使用像这样的自定义标记器做了一个例子:

curl -XPOST "http://192.168.99.100:9200/my_type/my_type/my_type" -d'
{
    "settings" : {
        "index" : {
            "number_of_shards" : 1,
            "number_of_replicas" : 1
        },  
        "analysis" : {
            "filter" : {
                "custom_filter" : {
                    "type" : "word_delimiter",
                    "type_table": ["ê => ALPHA", "Ê => ALPHA"]
                }   
            },
            "analyzer" : {
                "custom_analyzer" : {
                    "type" : "custom",
                    "tokenizer" : "whitespace",
                    "filter" : ["lowercase", "custom_filter"]
                }
            }
        }
    },
    "mappings" : {
        "my_type" : {
            "properties" : {
                "msg" : {
                    "type" : "string",
                    "analyzer" : "custom_analyzer"
                }
            }
        }
    }
}'

我刚刚创建了一个使用标记器的分析器,它知道ê和Ê需要被解释为字符。

之后我只在我的msg字段中进行搜索

curl -XPOST "http://192.168.99.100:9200/my_type/my_type/my_type/my_type" -d'
{
    "msg":"três"
}'

并且会起作用:D

答案 1 :(得分:0)

我发现了问题。

我使用javascript atob函数在弹性索引后解码消息。 atob函数与我的拉丁字符不兼容并打破它。 我更改节点js上的本机Buffer类的atob。

  

Obs:默认的分析器与拉丁字符完美配合!

抱歉!