在 CentOS 上配置 HDFS 集群
一 环境准备与规划
二 安装与基础配置
sudo yum install -y java-1.8.0-openjdk-develwget https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz && tar -xzvf hadoop-3.3.1.tar.gz -C /usr/local/ && mv /usr/local/hadoop-3.3.1 /usr/local/hadoop/etc/profile.d/hadoop.shexport JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile.d/hadoop.sh$HADOOP_HOME/etc/hadoop/hadoop-env.sh:export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdkssh-keygen -t rsa -P '' -f ~/.ssh/id_rsassh-copy-id hadoop@slave1、ssh-copy-id hadoop@slave2(如使用 root,则目标用户为 root)三 配置 HDFS 核心文件
$HADOOP_HOME/etc/hadoop/<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/data/tmp</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value> <!-- 小于等于 DataNode 数量 -->
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/opt/hadoop/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/opt/hadoop/data/datanode</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
slave1
slave2
rsync -av $HADOOP_HOME/etc/hadoop/ hadoop@slave1:$HADOOP_HOME/etc/hadoop/rsync -av $HADOOP_HOME/etc/hadoop/ hadoop@slave2:$HADOOP_HOME/etc/hadoop/四 启动与验证
mkdir -p /opt/hadoop/data/{tmp,namenode,datanode}hdfs namenode -formatstart-dfs.shjps 应看到 NameNode;各 DataNode 应看到 DataNodehdfs dfs -mkdir /testhdfs dfs -put /etc/hosts /test/hdfs dfs -ls /test五 防火墙与常见问题
sudo firewall-cmd --permanent --add-port=9000/tcpsudo firewall-cmd --permanent --add-port=9870/tcp(Hadoop 3.x;2.x 用 50070)sudo firewall-cmd --reload/etc/hosts 或 DNS 能正确解析 master/slaveX。