Ubuntu 环境下 HDFS 备份与恢复实操指南
一 备份总览与适用场景
二 快速上手 常用命令清单
hdfs dfsadmin -allowSnapshot /path/to/dirhdfs dfs -createSnapshot /path/to/dir snap_20251129hdfs dfsadmin -listSnapshots /path/to/dirhdfs dfs -cp /path/to/dir/.snapshot/snap_20251129/file /path/to/dir/file<property><name>fs.trash.interval</name><value>120</value></property>
<property><name>fs.trash.checkpoint.interval</name><value>120</value></property>
hdfs dfs -mv /user/hadoop/.Trash/Current/file /user/hadoop/filehdfs distcp -m 50 hdfs://src-nn:8020/data hdfs://backup-nn:8020/backup/20251129hdfs distcp -update -m 50 hdfs://src-nn:8020/data hdfs://backup-nn:8020/backup/latesthdfs dfs -cp -r /data /backup/20251129hdfs dfs -get /data /local/backup/20251129.tar.gzhdfs fsck /data -files -blocks -locations三 关键备份场景与步骤
hdfs dfsadmin -allowSnapshot /datahdfs dfs -createSnapshot /data snap_20251129hdfs dfs -cp /data/.snapshot/snap_20251129/file /data/filehdfs distcp -m 50 hdfs://src-nn:8020/data hdfs://backup-nn:8020/backup/20251129hdfs distcp -update -m 50 hdfs://src-nn:8020/data hdfs://backup-nn:8020/backup/latesthdfs dfs -count hdfs://backup-nn:8020/backup/20251129 与 hdfs fscksudo -u hdfs hdfs dfsadmin -safemode entersudo -u hdfs hdfs dfsadmin -saveNamespace(生成最新 fsimage 并重置 edits)sudo -u hdfs chown -R hdfs:hdfs /dfs/nn/current,退出安全模式:sudo -u hdfs hdfs dfsadmin -safemode leavehdfs dfs -mv /user/hadoop/.Trash/Current/file /user/hadoop/filemysqldump -uroot -p --databases metastore > metastore_$(date +%F).sqlmysql -uroot -p < metastore_$(date +%F).sql四 自动化与调度示例
#!/usr/bin/env bash
set -e
SRC="hdfs://src-nn:8020/data"
BK_BASE="hdfs://backup-nn:8020/backup"
DATE=$(date +%F)
LATEST="$BK_BASE/latest"
YESTERDAY="$BK_BASE/$DATE"
# 全量或首次
if ! hdfs dfs -test -d "$LATEST"; then
hdfs distcp -m 50 "$SRC" "$YESTERDAY"
else
hdfs distcp -update -m 50 "$SRC" "$LATEST"
fi
# 原子切换 latest 指针
hdfs dfs -rm -r "$LATEST" 2>/dev/null || true
hdfs dfs -mkdir -p "$LATEST"
hdfs dfs -cp -r "$YESTERDAY"/* "$LATEST/"
# 简单校验
hdfs dfs -count "$LATEST"
0 2 * * * /opt/backup/backup_hdfs_distcp.sh >> /var/log/hdfs_backup.log 2>&1sudo tar czvf /opt/backup/hadoop-conf-$(date +%F).tar.gz /etc/hadoop/conf/*.xmlrsync -avz /opt/backup/ backup-server:/data/backup/hdfs/五 策略与最佳实践