在 Debian 上搭建 HDFS 集群
一 环境准备
sudo apt-get install -y openssh-server && sudo systemctl enable --now ssh二 安装 Java 与 Hadoop
sudo apt-get update && sudo apt-get install -y openjdk-8-jdksudo apt-get update && sudo apt-get install -y wget apt-transport-https
wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | sudo tee /etc/apt/keyrings/adoptium.asc
echo "deb [signed-by=/etc/apt/keyrings/adoptium.asc] https://mirrors.tuna.tsinghua.edu.cn/Adoptium/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | sudo tee /etc/apt/sources.list.d/adoptium.list
sudo apt-get update
sudo apt-get install -y temurin-8-jdk
wget -P /root/packages https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
sudo tar -zxvf /root/packages/hadoop-3.3.6.tar.gz -C /usr/local
sudo ln -sfn /usr/local/hadoop-3.3.6 /usr/local/hadoop
/etc/profile.d/hadoop.sh:export JAVA_HOME=/usr/lib/jvm/temurin-8-jdk-amd64 # 若用 OpenJDK 8:/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile.d/hadoop.sh三 集群配置
192.168.50.201 node1
192.168.50.202 node2
192.168.50.203 node3
ssh-keygen -t rsa -C "node1@example.com"
ssh-copy-id node1
ssh-copy-id node2
ssh-copy-id node3
export JAVA_HOME=/usr/lib/jvm/temurin-8-jdk-amd64
export HDFS_NAMENODE_USER=hadoop
export HDFS_DATANODE_USER=hadoop
export HDFS_SECONDARYNAMENODE_USER=hadoop
export YARN_RESOURCEMANAGER_USER=hadoop
export YARN_NODEMANAGER_USER=hadoop
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/export/data/hadoop</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/export/data/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/export/data/hadoop/hdfs/data</value>
</property>
</configuration>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
sudo mkdir -p /export/data/hadoop/hdfs/{name,data}
sudo chown -R hadoop:hadoop /export/data/hadoop
四 启动与验证
hdfs namenode -format
# 启动 HDFS
start-dfs.sh
# 如需 YARN
start-yarn.sh
# 如需历史服务
mapred --daemon start historyserver
jps
# 期望进程:NameNode、DataNode、SecondaryNameNode(HDFS);ResourceManager、NodeManager(YARN)
hdfs dfsadmin -report
五 网络与端口及常见问题
sudo ufw disable
# 或放行
sudo ufw allow 22,8020,9870,8088,19888/tcp