CentOS 上 Hadoop 资源分配实操指南
一 核心思路与规划
二 YARN 资源分配关键配置
<configuration>
<!-- 节点可分配资源 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>14336</value> <!-- 预留约 1 GB 给系统/OS -->
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
<!-- 容器请求边界 -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>4</value>
</property>
<!-- 选择调度器 -->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default,etl,analytics</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.analytics.capacity</value>
<value>20</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>80</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.analytics.maximum-capacity</name>
<value>40</property>
</property>
</configuration>
三 HDFS 存储资源分配要点
<configuration>
<!-- 副本数:三副本为生产常用,权衡可靠性与存储成本 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 块大小:大文件/吞吐型作业用 256 MB 或 512 MB -->
<property>
<name>dfs.block.size</name>
<value>268435456</value> <!-- 256 MB -->
</property>
<!-- 元数据与数据目录(多磁盘/SSD 建议分别挂载) -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data1/hdfs/datanode,/data2/hdfs/datanode</value>
</property>
<!-- NameNode 并发处理能力 -->
<property>
<name>dfs.namenode.handler.count</name>
<value>30</value>
</property>
</configuration>
四 提交作业时的资源控制
五 验证与优化建议