如何通过LogStash过滤简单消息到ElasticSearch在多个字段中划分消息

时间:2017-04-07 16:41:06

标签: elasticsearch logstash logstash-grok logstash-configuration logstash-file

这是输入文件:

{"meta":"","level":"error","message":"clientErrorHandler: Erro não previsto ou mapeado durante chamada dos serviços.","timestamp":"2017-04-06T16:08:37.861Z"}
{"meta":"","level":"error","message":"clientErrorHandler: Erro não previsto ou mapeado durante chamada dos serviços.","timestamp":"2017-04-06T19:40:17.682Z"}

基本上,这样的日志是我的NodeJs应用程序通过Winstom模块的结果。我的疑问集中在如何调整logstash过滤器以获得在ElasticSearch中创建的4个字段。

我的目的是看到“列”(我认为属性或文件可能是ElasticSearch上下文中更好的单词):level(例如错误),message_source(例如clientErrorHandler),message_content(例如Erronão...serviços )和error_time没有纳秒(例如2017-04-06T19:40:17)。

我被困在这一点上:

1 - 我使用了这个logstash.conf

input {
    file {
         path => "/home/demetrio/dev/testes_manuais/ELK/logs/*"
         start_position => "beginning"

   }
}

filter {

  grok {
        match => {
        "message" => '%{SYSLOG5424SD:loglevel} %{TIMESTAMP_ISO8601:Date} %{GREEDYDATA:content}'
      }
  }

  date {
    match => [ "Date", "YYYY-mm-dd HH:mm:ss.SSS" ]
    locale => en
  }

}

output {
  stdout {
    codec => plain {
                        charset => "ISO-8859-1"
                }

    }
    elasticsearch {
        hosts => "http://127.0.0.1:9200"
        index => "dmz-logs-indice"

  }
}

2 - 通过Kibana DevTools搜索ElasticSearch

GET _search
{
  "query": {
    "match_all": {}
  }
}

我看到了:

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 6,
    "successful": 6,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 1,
    "hits": [
      {
        "_index": ".kibana",
        "_type": "config",
        "_id": "5.3.0",
        "_score": 1,
        "_source": {
          "buildNum": 14823
        }
      },
      {
        "_index": "dmz-logs-indice",
        "_type": "logs",
        "_id": "AVtJLZ5x6gscWn5fxxA_",
        "_score": 1,
        "_source": {
          "path": "/home/demetrio/dev/testes_manuais/ELK/logs/logs.log",
          "@timestamp": "2017-04-07T16:09:36.996Z",
          "@version": "1",
          "host": "nodejs",
          "message": """{"meta":"","level":"error","message":"clientErrorHandler: Erro não previsto ou mapeado durante chamada dos serviços.","timestamp":"2017-04-06T16:08:37.861Z"}""",
          "tags": [
            "_grokparsefailure"
          ]
        }
      },
      {
        "_index": "dmz-logs-indice",
        "_type": "logs",
        "_id": "AVtJLZ5x6gscWn5fxxBA",
        "_score": 1,
        "_source": {
          "path": "/home/demetrio/dev/testes_manuais/ELK/logs/logs.log",
          "@timestamp": "2017-04-07T16:09:36.998Z",
          "@version": "1",
          "host": "nodejs",
          "message": """{"meta":"","level":"error","message":"clientErrorHandler: Erro não previsto ou mapeado durante chamada dos serviços.","timestamp":"2017-04-06T19:40:17.682Z"}""",
          "tags": [
            "_grokparsefailure"
          ]
        }
      }
    ]
  }
}

我想我应该使用一些RegularExpresss或Grok来划分四个和平:

1级 2 - 消息与之前的内容“:” 3 - 带有“:”之后的消息 4 - 时间戳

并且,如果可能,请提供更好的列(字段/属性)标签,如:

1级 2 - message_source 3 - message_content 4 - error_time

最后删除时间戳纳秒

PS。为了防止一些未来的读者对我如何登录NodeJ感兴趣,你在这里:

...

var winston = require('winston');
winston.emitErrs = true;

var logger = new winston.Logger({
    transports: [
        new winston.transports.File({
            level: 'error',
            filename: './logs/logs.log',
            handleExceptions: true,
            json: true,
            maxsize: 5242880, //5MB
            maxFiles: 5,
            colorize: false,
            prettyPrint: true
        })               
    ],
    exitOnError: false
});

...

function clientErrorHandler(err, req, res, next) {
      logger.log("error","clientErrorHandler: Erro não previsto ou mapeado durante chamada dos serviços.",err.message);

      res.send(500, { error: 'Erro genérico!' });

  }

app.use(clientErrorHandler);

PS2:我仔细阅读Filter specific Message with logstash before sending to ElasticSearch之类的问题,但我真的被困了

1 个答案:

答案 0 :(得分:1)

由于您的应用程序将日志输出为JSON字符串,因此您可以配置Logstash以将日志解析为JSON。这就像在文件输入配置中添加codec => "json"一样简单。

以下是您的方案的示例配置:

input {
  file {
    path => "/home/demetrio/dev/testes_manuais/ELK/logs/*"
    start_position => "beginning"
    codec => "json"
  }
}

filter {
  # This matches `timestamp` field into `@timestamp` field for Kibana to consume.
  date {
    match => [ "timestamp", "ISO8601" ]
    remove_field => [ "timestamp" ]
  }
}

output {
  stdout {
    # This codec gives your more details about the event.
    codec => rubydebug
  }

  elasticsearch {
    hosts => "http://127.0.0.1:9200"
    index => "dmz-logs-indice"
  }
}

这是来自Logstash的示例stdout

{
          "path" => "/home/demetrio/dev/testes_manuais/ELK/logs/demo.log",
    "@timestamp" => 2017-04-06T19:40:17.682Z,
         "level" => "error",
          "meta" => "",
      "@version" => "1",
          "host" => "dbf718c4b8e4",
       "message" => "clientErrorHandler: Erro não previsto ou mapeado durante chamada dos serviços.",
}
相关问题