Ubuntu 僵尸进程监控与预警
一 监控思路与阈值
二 快速检测与定位
ps -eo pid,ppid,state,cmd | awk '$3 ~ /Z/ {print}'pstree -p | grep -A5 -B5 'Z'top/htop(在 htop 中可按F3搜索 zombie,状态列为S)atop、systemd-cgtopcat /proc/<PID>/status | grep Stateps -o ppid= -p <ZOMBIE_PID>pidstat -w(结合状态筛选可辅助发现异常)三 预警方案
#!/usr/bin/env bash
set -Eeuo pipefail
THRESHOLD=1
MAIL_TO="admin@example.com"
WEBHOOK_URL="https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY"
zombies=$(ps -eo pid,ppid,state,cmd --no-headers | awk '$3 ~ /Z/ {print}' | wc -l)
ts=$(date '+%F %T')
if (( zombies >= THRESHOLD )); then
msg="[$ts] Zombie alert: $zombies zombie(s) found:\n"
msg+=$(ps -eo pid,ppid,state,cmd --no-headers | awk '$3 ~ /Z/ {printf "PID=%s PPID=%s CMD=%s\n", $1, $2, $4}' | head -20)
# 邮件
echo -e "$msg" | mail -s "[Zombie Alert] $zombies zombies on $(hostname)" "$MAIL_TO"
# 企业微信示例
curl -s -H 'Content-Type: application/json' -d "{\"msgtype\":\"text\",\"text\":{\"content\":\"$msg\"}}" "$WEBHOOK_URL"
fi
*/5 * * * * /usr/local/bin/check_zombies.sh >> /var/log/zombie_monitor.log 2>&1四 处置与根因修复
kill -TERM "$PPID";无响应再强制:kill -KILL "$PPID"sudo systemctl restart <service>