用结构化日志与集中平台联动,配合指标与告警,形成从发现到定位的闭环。
一、整体架构与关键原则
二、落地步骤
三、最小可用代码示例
package main
import (
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"os"
"time"
)
func main() {
cfg := zap.NewProductionEncoderConfig()
cfg.EncodeTime = zapcore.ISO8601TimeEncoder
core := zapcore.NewCore(
zapcore.NewJSONEncoder(cfg),
zapcore.AddSync(os.Stdout),
zapcore.InfoLevel,
)
logger := zap.New(core, zap.AddCaller(), zap.AddStacktrace(zapcore.ErrorLevel))
defer logger.Sync()
logger.Info("service started",
zap.String("service", "order"),
zap.String("version", "v1.2.3"),
zap.String("env", "prod"),
)
// 模拟业务处理
start := time.Now()
err := process()
duration := time.Since(start)
if err != nil {
logger.Error("process failed",
zap.Error(err),
zap.Duration("duration", duration),
zap.String("trace_id", "abc-123-def"),
)
} else {
logger.Info("process succeeded",
zap.Duration("duration", duration),
zap.Int("count", 42),
)
}
}
func process() error {
// TODO: 业务逻辑
return nil
}
package main
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"net/http"
)
var (
requestsTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests.",
})
requestDuration = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "http_request_duration_seconds",
Help: "Duration of HTTP requests.",
})
)
func init() {
prometheus.MustRegister(requestsTotal, requestDuration)
}
func main() {
http.Handle("/metrics", promhttp.Handler())
http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{"status":"ok"}`))
})
go http.ListenAndServe(":8080", nil)
select {}
}
import (
"gopkg.in/natefinch/lumberjack.v2"
"github.com/sirupsen/logrus"
)
logrus.SetOutput(&lumberjack.Logger{
Filename: "/var/log/myapp.log",
MaxSize: 10, // MB
MaxBackups: 3, // 保留文件数
MaxAge: 28, // 天
Compress: true, // 压缩
})
四、告警规则与SLO示例
五、运维与治理要点