Elastic中的geo_point

时间:2017-07-23 10:50:54

标签: elasticsearch logstash elastic-stack logstash-configuration

我正在尝试将纬度和经度映射到Elastic中的geo_point。

这是我的日志文件条目:

13-01-2017 ORDER COMPLETE: £22.00 Glasgow, 55.856299, -4.258845

这是我的conf文件

input {
file {
  path => "/opt/logs/orders.log"
  start_position => "beginning"
 }
}

filter {
   grok {
       match => { "message" => "(?<date>[0-9-]+) (?<order_status>ORDER [a-zA-Z]+): (?<order_amount>£[0-9.]+) (?<order_location>[a-zA-Z ]+)"}
}

mutate {
       convert => { "order_amount" => "float" }
       convert => { "order_lat" => "float" }
       convert => { "order_long" => "float" }

       rename => {
                  "order_long" => "[location][lon]"
                  "order_lat" => "[location][lat]"
       }
 }
}

output {
      elasticsearch {
               hosts => "localhost"

               index => "sales"
               document_type => "order"

     }
    stdout {}
}

我使用/bin/logstash -f orders.conf启动logstash,这会给出:

"@version"=>{"type"=>"keyword", "include_in_all"=>false}, "geoip"=>{"dynamic"=>true,
"properties"=>{"ip"=>{"type"=>"ip"},
"location"=>{"type"=>"geo_point"}, "latitude"=>{"type"=>"half_float"},
"longitude"=>{"type"=>"half_float"}}}}}}}}

请参阅?它将location视为geo_point。然而GET sales/_mapping会产生这样的结果:

"location": {
        "properties": {
          "lat": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "lon": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      },

更新

每次重新索引时,我都会停止logstash,然后从/opt/logstash/data/plugins/inputs/file...中删除.sincedb。我还创建了一个全新的日志文件,每次都增加索引(我目前最多sales7)。

配置文件

input {
   file {
     path => "/opt/ag-created/logs/orders2.log"
     start_position => "beginning"
   }
}

filter {
  grok {
     match => { "message" => "(?<date>[0-9-]+) (?<order_status>ORDER [a-zA-Z]+): (?<order_amount>£[0-9.]+) (?<order_location>[a-zA-Z ]+), (?<order_lat>[0-9.]+), (?<order_long>[-0-9.]+)( - (?<order_failure_reason>[A-Za-z :]+))?" }
}

 mutate {
    convert => { "order_amount" => "float" }
 }

 mutate {
    convert => { "order_lat" => "float" }
 }

 mutate {
    convert => { "order_long" => "float" }
 }

 mutate {
     rename => { "order_long" => "[location][lon]" }
 }

 mutate {
     rename => { "order_lat" => "[location][lat]" }
  }
 }

output {
      elasticsearch {
               hosts => "localhost"
               index => "sales7"
               document_type => "order"
               template_name => "myindex"
               template => "/tmp/templates/custom-orders2.json"
               template_overwrite => true
     }

    stdout {}
}

JSON文件

 {
  "template": "sales7",
  "settings": {
    "index.refresh_interval": "5s"
 },
"mappings": {
"sales": {
  "_source": {
    "enabled": false
  },
  "properties": {
    "location": {
      "type": "geo_point"
        }
      }
    }
  },
  "aliases": {}
  }
        index => "sales7"
               document_type => "order"
               template_name => "myindex"
               template => "/tmp/templates/custom-orders.json"
               template_overwrite => true
     }

    stdout {}
}

有趣的是,当geo_point映射不起作用时(即lat和long都是浮点数),我的数据被索引(30行)。但是,当location正确地成为geo_point时,我的所有行都没有编入索引。

1 个答案:

答案 0 :(得分:1)

有两种方法可以做到这一点。第一个是为映射创建模板,以便在为数据编制索引时创建正确的映射。因为Elasticseach不了解您的数据类型。你应该说下面的事情。

首先,为您的映射结构创建一个template.json文件:

{
  "template": "sales*",
  "settings": {
    "index.refresh_interval": "5s"
  },
  "mappings": {
    "sales": {
      "_source": {
        "enabled": false
      },
      "properties": {
        "location": {
          "type": "geo_point"
        }
      }
    }
  },
  "aliases": {}
}

之后更改您的logstash配置以将此映射放入索引:

input {
file {
  path => "/opt/logs/orders.log"
  start_position => "beginning"
 }
}

filter {
   grok {
       match => { "message" => "(?<date>[0-9-]+) (?<order_status>ORDER [a-zA-Z]+): (?<order_amount>£[0-9.]+) (?<order_location>[a-zA-Z ]+)"}
}

mutate {
       convert => { "order_amount" => "float" }
       convert => { "order_lat" => "float" }
       convert => { "order_long" => "float" }

       rename => {
                  "order_long" => "[location][lon]"
                  "order_lat" => "[location][lat]"
       }
 }
}

output {
      elasticsearch {
                hosts => "localhost"

                index => "sales"
                document_type => "order"
                template_name => "myindex"
                template => "/etc/logstash/conf.d/template.json"
                template_overwrite => true


     }
    stdout {}
}

第二个选项是摄取节点功能。我将更新此选项的答案,但现在您可以查看my dockerized repository。在这个例子中,我在解析位置数据时使用了摄取节点特征而不是模板。