开始配置filebeat,在这之前,你或许需要了解下之前的配置结构[ELK6.3.2安装与配置[跨网络转发思路]](https://www.linuxea.com/1889.html),我又将配置优化了下。仅仅因为我一个目录下有多个nginx日志。
配置filebeat
之前使用过用一个个日志来做单个的日志过滤,现在使用*.log匹配所有以log结尾的日志在发送到redis中在配置filebeat中将/data/wwwlogs/的所有以.log结尾的文件都会被收集到%{[fields.list_id]的变量名称中,在这个示例中是100_nginx_access,output到redis,key名称则是100_nginx_access,这其中包含error日志
[root@linuxea-0702-DTNode01 ~]# cat /etc/filebeat/filebeat.yml
filebeat.prospectors:
- type: log
enabled: true
paths:
- /data/wwwlogs/*.log
fields:
list_id: 172_nginx_access
exclude_files:
- ^access
- ^error
- .gz$
filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: false
setup.template.settings:
index.number_of_shards: 3
output.redis:
hosts: ["47.90.33.131:6379"]
password: "OTdmOWI4ZTM4NTY1M2M4OTZh"
db: 2
timeout: 5
key: "%{[fields.list_id]:unknow}"
排除文件可以这样
exclude_files: ["/var/wwwlogs/error.log"]
为了提升性能,redis关闭持久存储
save ""
#save 900 1
#save 300 10
#save 60 10000
appendonly no
aof-rewrite-incremental-fsync no
logstash配置文件
假如你也是rpm安装的logstash的话,那就巧了,我也是在logstash中修pipeline.workers的线程数和ouput的线程数以及batch.size,线程数可以和内核数量持平,如果是单独运行logstash,可以设置稍大些。配置文件过滤后就是这样
[root@linuxea-VM-Node117 /etc/logstash]# cat logstash.yml
node.name: node1
path.data: /data/logstash/data
#path.config: *.yml
log.level: info
path.logs: /data/logstash/logs
pipeline.workers: 16
pipeline.output.workers: 16
pipeline.batch.size: 10000
pipeline.batch.delay: 10
pipelines 配置文件
pipelines文件中包含了所有的日志配置文件,也就是管道存放的位置和启动的workers
[root@linuxea-VM-Node117 /etc/logstash]# cat pipelines.yml
# This file is where you define your pipelines. You can define multiple.
# For more information on multiple pipelines, see the documentation:
# https://www.elastic.co/guide/en/logstash/current/multiple-pipelines.html
- pipeline.id: 172_nginx_access
pipeline.workers: 1
path.config: "/etc/logstash/conf.d/172_nginx_access.conf"
- pipeline.id: 76_nginx_access
pipeline.workers: 1
path.config: "/etc/logstash/conf.d/76_nginx_access.conf"
jvm.options
jvm.options配置文件中修改xms的起始大小和最大的大小,视配置而定
-Xms4g
-Xmx7g
- 文件目录树:
[root@linuxea-VM-Node117 /etc/logstash]# tree ./
./
|-- conf.d
| |-- 172_nginx_access.conf
| `-- 76_nginx_access.conf
|-- GeoLite2-City.mmdb
|-- jvm.options
|-- log4j2.properties
|-- logstash.yml
|-- patterns.d
| |-- nginx
| |-- nginx2
| `-- nginx_error
|-- pipelines.yml
`-- startup.options
2 directories, 20 files
nginx配置文件
在conf.d目录下存放是单个配置文件,他可以存放多个。单个大致这样的
input {
redis {
host => "47.31.21.369"
port => "6379"
key => "172_nginx_access"
data_type => "list"
password => "OTdmOM4OTZh"
threads => "5"
db => "2"
}
}
filter {
if [fields][list_id] == "172_nginx_access" {
grok {
patterns_dir => [ "/etc/logstash/patterns.d/" ]
match => { "message" => "%{NGINXACCESS}" }
match => { "message" => "%{NGINXACCESS_B}" }
match => { "message" => "%{NGINXACCESS_ERROR}" }
match => { "message" => "%{NGINXACCESS_ERROR2}" }
overwrite => [ "message" ]
remove_tag => ["_grokparsefailure"]
timeout_millis => "0"
}
geoip {
source => "clent_ip"
target => "geoip"
database => "/etc/logstash/GeoLite2-City.mmdb"
}
useragent {
source => "User_Agent"
target => "userAgent"
}
urldecode {
all_fields => true
}
mutate {
gsub => ["User_Agent","["]",""] #将user_agent中的 " 换成空
convert => [ "response","integer" ]
convert => [ "body_bytes_sent","integer" ]
convert => [ "bytes_sent","integer" ]
convert => [ "upstream_response_time","float" ]
convert => [ "upstream_status","integer" ]
convert => [ "request_time","float" ]
convert => [ "port","integer" ]
}
date {
match => [ "timestamp" , "dd/MMM/YYYY:HH:mm:ss Z" ]
}
}
}
output {
if [fields][list_id] == "172_nginx_access" {
elasticsearch {
hosts => ["10.10.240.113:9200","10.10.240.114:9200"]
index => "logstash-172_nginx_access-%{+YYYY.MM.dd}"
user => "elastic"
password => "dtopsadmin"
}
}
stdout {codec => rubydebug}
}
其中:
match字段的文件位置和在/etc/logstash/patterns.d/
patterns_dir => [ "/etc/logstash/patterns.d/" ]
match => { "message" => "%{NGINXACCESS}" }
match => { "message" => "%{NGINXACCESS_B}" }
match => { "message" => "%{NGINXACCESS_ERROR}" }
match => { "message" => "%{NGINXACCESS_ERROR2}" }
nginx日志grok字段
[root@linuxea-VM-Node117 /etc/logstash]# cat patterns.d/nginx
NGUSERNAME [a-zA-Z.@-+_%]+
NGUSER %{NGUSERNAME}
NGINXACCESS %{IP:clent_ip} (?:-|%{USER:ident}) [%{HTTPDATE:log_date}] "%{WORD:http_verb} (?:%{PATH:baseurl}?%{NOTSPACE:params}(?: HTTP/%{NUMBER:http_version})?|%{DATA:raw_http_request})" (%{IPORHOST:url_domain}|%{URIHOST:ur_domain}|-)[(%{BASE16FLOAT:request_time}|-)] %{NOTSPACE:request_body} %{QS:referrer_rul} %{GREEDYDATA:User_Agent} [%{GREEDYDATA:ssl_protocol}] [(?:%{GREEDYDATA:ssl_cipher}|-)][%{NUMBER:time_duration}] [%{NUMBER:http_status_code}] [(%{BASE10NUM:upstream_status}|-)] [(%{NUMBER:upstream_response_time}|-)] [(%{URIHOST:upstream_addr}|-)]
[root@linuxea-VM-Node117 /etc/logstash]#
由于使用了4层,nginx日志被报错在编译时候的日志格式,也做了grok
[root@linuxea-VM-Node117 /etc/logstash]# cat patterns.d/nginx2
NGUSERNAME [a-zA-Z.@-+_%]+
NGUSER %{NGUSERNAME}
NGINXACCESS_B %{IPORHOST:clientip} (?:-|(%{WORD}.%{WORD})) (?:-|%{USER:ident}) [%{HTTPDATE:timestamp}] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:http_status_code} %{NOTSPACE:request_body} "%{GREEDYDATA:User_Agent}"
[root@linuxea-VM-Node117 /etc/logstash]#
nginx错误日志的grok
[root@linuxea-VM-Node117 /etc/logstash]# cat patterns.d/nginx_error
NGUSERNAME [a-zA-Z.@-+_%]+
NGUSER %{NGUSERNAME}
NGINXACCESS_ERROR (?<time>d{4}/d{2}/d{2}s{1,}d{2}:d{2}:d{2})s{1,}[%{DATA:err_severity}]s{1,}(%{NUMBER:pid:int}#%{NUMBER}:s{1,}*%{NUMBER}|*%{NUMBER}) %{DATA:err_message}(?:,s{1,}client:s{1,}(?<client_ip>%{IP}|%{HOSTNAME}))(?:,s{1,}server:s{1,}%{IPORHOST:server})(?:, request: %{QS:request})?(?:, host: %{QS:client_ip})?(?:, referrer: "%{URI:referrer})?
NGINXACCESS_ERROR2 (?<time>d{4}/d{2}/d{2}s{1,}d{2}:d{2}:d{2})s{1,}[%{DATA:err_severity}]s{1,}%{GREEDYDATA:err_message}
[root@linuxea-VM-Node117 /etc/logstash]#