Ubuntu 上 Node.js 日志报警机制落地指南
一 方案总览与选型
二 方案一 PM2 日志关键词告警(轻量)
#!/usr/bin/env bash
LOG=$(pm2 logs my-api --raw --since 5m 2>&1)
if echo "$LOG" | grep --quiet 'ERROR'; then
echo "[$(date)] ERROR found, sending alert" | mail -s "PM2 Node.js ERROR" ops@example.com
# 也可替换为 curl 调用企业微信/钉钉/Slack Webhook
fi
*/5 * * * * /path/check-pm2-errors.sh >> /var/log/pm2-alert.log 2>&1
三 方案二 集中化日志平台 ELK 或 Graylog(推荐)
input {
file {
path => "/var/log/nodejs/*.log"
start_position => "beginning"
codec => json
}
}
filter {
# 可添加 grok/date 解析
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "nodejs-logs-%{+YYYY.MM.dd}"
}
}
四 方案三 Prometheus Grafana 指标告警与日志联动
const client = require('prom-client');
const httpRequestDuration = new client.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method','route','code'],
buckets: [0.01,0.05,0.1,0.5,1,2,5]
});
app.use((req,res,next)=>{
const start=Date.now();
res.on('finish',()=>{
httpRequestDuration.labels(req.method,req.route?.path||'unknown',res.statusCode)
.observe((Date.now()-start)/1000);
});
next();
});
app.get('/metrics', async (_,res)=>{
res.set('Content-Type', client.register.contentType);
res.end(await client.register.metrics());
});
groups:
- name: nodejs
rules:
- alert: HighErrorRate
expr: sum(rate(http_request_duration_seconds_count{code=~"5.."}[5m])) / sum(rate(http_request_duration_seconds_count[5m])) > 0.01
for: 5m
labels:
severity: page
annotations:
summary: High 5xx error rate on {{ $labels.instance }}
五 方案四 异常追踪平台 Sentry(错误类告警)
const Sentry = require('@sentry/node');
Sentry.init({ dsn: 'YOUR_SENTRY_DSN', environment: 'production' });