Hadoop Linux集群搭建实操指南
一 环境准备与规划
二 基础环境配置
hostnamectl set-hostname hadoop01(各节点分别设置)/etc/hosts 添加192.168.1.10 hadoop01
192.168.1.11 hadoop02
192.168.1.12 hadoop03
systemctl stop firewalld && systemctl disable firewalldsed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config && setenforce 0yum install -y chrony && systemctl enable --now chronydyum install -y java-1.8.0-openjdk-develapt update && apt install -y openjdk-8-jdkjava -versionssh-keygen -t rsa -P '' -f ~/.ssh/id_rsassh-copy-id hadoop01、ssh-copy-id hadoop02、ssh-copy-id hadoop03cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys。三 Hadoop安装与核心配置
wget https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz && tar -xzvf hadoop-3.3.6.tar.gz -C /opt/ && mv /opt/hadoop-3.3.6 /opt/hadoopexport JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source ~/.bashrcexport JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
</configuration>
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop/data/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/hadoop/data/dfs/data</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop01:9870</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop02:9868</value>
</property>
</configuration>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop01:19888</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
hadoop02
hadoop03
scp -r /opt/hadoop hadoop02:/opt/、scp ~/.bashrc hadoop02:~/ && ssh hadoop02 "source ~/.bashrc"mkdir -p /opt/hadoop/data/dfs/{name,data}。四 启动与验证
hdfs namenode -formatstart-dfs.shstart-yarn.shmapred --daemon start historyserverjps(应见到 NameNode、DataNode、SecondaryNameNode、ResourceManager、NodeManager 等)hdfs dfs -mkdir -p /user/roothdfs dfs -put /etc/hosts /user/root/hdfs dfs -ls /user/root 与 hdfs dfs -cat /user/root/hostshdfs dfsadmin -report。五 常见问题与高可用扩展
fs.defaultFS=hdfs://mycluster,ha.zookeeper.quorum=zk1:2181,zk2:2181,zk3:2181dfs.nameservices=mycluster、dfs.ha.namenodes.mycluster=nn1,nn2、dfs.namenode.rpc-address...、dfs.shared.edits.dir=qjournal://.../mycluster、dfs.ha.automatic-failover.enabled=true、dfs.ha.fencing.methods=sshfence 等