高吞吐场景下的 Filebeat 配置与调优
一 核心原则与架构选择
二 关键配置示例与说明
# filebeat.yml 高吞吐示例(按实际环境调整数值)
filebeat.inputs:
- type: filestream # 7.0+ 推荐
enabled: true
paths:
- /var/log/**/*.log
ignore_older: 72h # 忽略过旧文件,减少扫描与状态规模
scan_frequency: 15s # 降低扫描频率,减少 CPU 与 I/O
close_inactive: 5m # 不活跃文件及时关闭句柄,释放 fd
harvester_limit: 1000 # 并发 harvester 上限(按内存与 fd 评估)
# 多行日志示例(Java 堆栈)
multiline.pattern: '^\['
multiline.negate: true
multiline.match: after
multiline.max_lines: 10000
# 可选:按条件减少事件量
# include_lines: ['ERROR', 'WARN']
# exclude_lines: ['DEBUG']
# 减少采集端处理,尽量后置解析
processors:
- add_host_metadata: ~
- add_cloud_metadata: ~
# 如确需轻量处理再启用,避免复杂 grok
# - dissect:
# tokenizer: "%{ts} %{level} %{msg}"
# target_prefix: ""
# 队列与可靠性(持久化队列)
queue:
type: persisted
max_bytes: 1GB
flush.min_events: 2048
flush.timeout: 1s
# 输出到 Logstash(推荐在 Logstash 做解析与路由)
output.logstash:
hosts: ["logstash-0.example.com:5044", "logstash-1.example.com:5044"]
loadbalance: true
bulk_max_size: 2048
compression_level: 3
worker: 4 # 输出并发(按下游承受能力调整)
# 输出到 Elasticsearch(直连时)
# output.elasticsearch:
# hosts: ["es-0.example.com:9200", "es-1.example.com:9200"]
# bulk_max_size: 2048
# compression: true
# worker: 4
# 监控与自检
monitoring:
enabled: true
elasticsearch:
hosts: ["monitoring-es.example.com:9200"]
index: "filebeat-monitoring-%{[agent.version]}-%{+yyyy.MM.dd}"
# 注册表(状态文件)调优
path.data: /var/lib/filebeat
filebeat.registry.flush: 1s
三 运行环境与资源调优
四 监控 验证与渐进式调优