Debian环境下降低Hadoop运维成本的实操方案
一 标准化与自动化
二 稳定与自愈
三 资源与性能基线
四 备份恢复与变更管控
五 低成本监控与日志示例
#!/usr/bin/env bash
set -euo pipefail
HADOOP_HOME=${HADOOP_HOME:-/usr/local/hadoop}
PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
echo "=== Hadoop Health Check $(date) ==="
echo -n "1) Java/Hadoop: "
java -version 2>&1 | head -1 || { echo "FAIL"; exit 1; }
$HADOOP_HOME/bin/hadoop version 2>&1 | head -1 || { echo "FAIL"; exit 1; }
echo -n "2) Processes (jps): "
mapfile -t procs < <(jps)
declare -A need=(["NameNode"]=1 ["DataNode"]=1 ["ResourceManager"]=1 ["NodeManager"]=1)
missing=()
for p in "${!need[@]}"; do
if ! printf '%s\n' "${procs[@]}" | grep -qw "$p"; then
missing+=("$p")
fi
done
if [ ${#missing[@]} -eq 0 ]; then
echo "OK"
else
echo "MISSING: ${missing[*]}"; exit 1
fi
echo -n "3) HDFS Report: "
hdfs dfsadmin -report >/dev/null 2>&1 && echo "OK" || { echo "FAIL"; exit 1; }
echo -n "4) YARN Nodes: "
yarn node -list >/dev/null 2>&1 && echo "OK" || { echo "FAIL"; exit 1; }
echo "All checks passed."