CentOS 上 HDFS 资源分配策略设置指南
一 架构澄清与总体思路
二 配置步骤与关键参数
<configuration>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</value>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</value>
<value>16384</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-vcores</value>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</value>
<value>8</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default,etl,analytics</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.analytics.capacity</name>
<value>20</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>80</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.etl.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.analytics.maximum-capacity</name>
<value>40</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.3</value>
</property>
</configuration>
<configuration>
<property>
<name>yarn.scheduler.fair.user-as-default-queue</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/etc/hadoop/conf/fair-scheduler.xml</value>
</property>
</configuration>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>rm-host:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>rm-host:19888</value>
</property>
</configuration>
start-dfs.sh
start-yarn.sh
三 HDFS 侧存储与数据布局策略
hdfs diskbalancer -plan <node> -out <plan.json>
hdfs diskbalancer -execute <plan.json>
hdfs diskbalancer -query <node>
四 验证与监控
五 常见注意事项