跳到主要内容Rocky Linux 9.4 部署 Hadoop 3.4.2 高可用集群 | 极客日志Javajava
Rocky Linux 9.4 部署 Hadoop 3.4.2 高可用集群
基于 Rocky Linux 9.4 操作系统,详细演示了 Hadoop 3.4.2 高可用集群的搭建过程。内容涉及集群规划、主机名解析、用户免密登录、JDK 环境变量配置、Zookeeper 集群部署、HDFS 与 YARN 高可用配置(包括 JournalNode、NameNode、ResourceManager、ZKFC 等组件)。提供了完整的配置文件示例(core-site.xml, hdfs-site.xml, yarn-site.xml 等)及启动验证命令,适用于大数据平台运维参考。
1、集群规划
1.1、集群资源规划
1.2、HostName
cat > /etc/hosts << EOF
192.168.xxx.xxx xxx-245
192.168.xxx.xxx xxx-246
192.168.xxx.xxx xxx-247
192.168.xxx.xxx xxx-248
EOF
1.3、创建用户及免密
ansible cluster -m user -a
anisble cluster -m shell -a
-u hadoop -i ssh-keygen -t rsa -b 2048 -P -f ~/.ssh/id_rsa
ssh-copy-id 192.168.xxx.xxx
ssh-copy-id 192.168.xxx.xxx
ssh-copy-id 192.168.xxx.xxx
ssh-copy-id 192.168.xxx.xxx
"name=hadoop password={{'Hadoop.2026'|password_hash('sha512')}}"
"echo 'hadoop ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers"
sudo
''
1.4、JDK
vi /etc/profile
export JAVA_HOME=/usr/java/jdk-17
export PATH=$JAVA_HOME/bin:$PATH
source /etc/profile
1.5、Linux 命令
cat /proc/cpuinfo | grep "model name" && cat /proc/cpuinfo | grep "physical id"
cat /proc/meminfo | grep MemTotal
disk -l | grep Disk
uname -a
head -n 1 /etc/issue
cat /proc/cpuinfo
hostname
lspci -tv
lsusb -tv
lsmod
env
free -m
df -h
du -sh
grep MemTotal /proc/meminfo
grep MemFree /proc/meminfo
uptime
cat /proc/loadavg
disk and partition
mount | column -t
fdisk -l
swapon -s
hdparm -i /dev/hda
dmesg | grep IDE
network
ifconfig
iptables -L
route -n
netstat -lntp
netstat -antp
netstat -s
process
ps -ef
top
user
w
id
last
cut -d: -f1 /etc/passwd
cut -d: -f1 /etc/group
crontab -l
service
chkconfig --list
chkconfig --list | grep on
rpm -qa
cat /proc/cpuinfo
cat /proc/partitions
cat /proc/meminfo
cat /proc/version
cat /proc/ioports
cat /proc/interrupts
cat /proc/pci
cat /proc/swaps
2、Zookeeper 集群
2.1、安装解压
ansible cluster -m copy -a 'src=/opt/software/apache-zookeeper-3.9.4-bin.tar.gz dest=/opt/software/'
ansible cluster -m shell -a "tar -xvf /opt/software/apache-zookeeper-3.9.4-bin.tar.gz -C /opt/apps"
ansible cluster -m shell -a "mv /opt/apps/apache-zookeeper-3.9.4-bin /opt/apps/zookeeper-3.9.4"
2.2、ZK 核心配置文件
# 基本时间单位(毫秒)
tickTime=2000
# Leader 选举初始化超时时间
initLimit=10
# 与 Leader 同步的超时时间
syncLimit=5
# 客户端连接端口
clientPort=2181
# 数据存储目录
dataDir=/data/zookeeper/data
# 集群节点配置(格式:server.ID=主机名:选举端口:通信端口)
server.1=xxx-246:2888:3888
server.2=xxx-247:2888:3888
server.3=xxx-248:2888:3888
# 自动清理频率(小时)
autopurge.purgeInterval=24
# 保留快照数量
autopurge.snapRetainCount=3
2.3、节点标识
ansible cluster -m shell -a "mkdir -p /data/zookeeper/data/"
anisble cluster -m shell -a "echo '1' > /data/zookeeper/data/myid"
2.4、环境变量
export ZK_HOME=/opt/apps/zookeeper-3.9.4
export PATH=$PATH:$ZK_HOME/bin
source /etc/profile
2.5、分发配置
ansible cluster -m copy -a 'src=/opt/apps/zookeeper-3.9.4/conf/zoo.cfg dest=/opt/apps/zookeeper-3.9.4/conf/'
2.6、修改日志目录
vi bin/zkEnv.sh
ZOO_LOG_DIR=/data/zookeeper/log
ansible cluster -m copy -a 'src=/opt/apps/zookeeper-3.9.4/bin/zkEnv.sh dest=/opt/apps/zookeeper-3.9.4/bin'
2.7、创建目录并赋权
ansible cluster -m shell -a "mkdir -p /data/zookeeper/{data,log}"
anisble cluster -m shell -a "chown -R hadoop:hadoop /opt/apps/zookeeper-3.9.4"
anisble cluster -m shell -a "chown -R hadoop:hadoop /data/zookeeper/"
2.8、启动与状态验证
zkServer.sh start
zkServer.sh status
ansible cluster -m shell -a '/opt/apps/zookeeper-3.9.4/bin/zkServer.sh restart'
anisble cluster -m shell -a '/opt/apps/zookeeper-3.9.4/bin/zkServer.sh status'
3、Hadoop 集群
3.1、集群规划
3.2、安装解压
tar -xzf hadoop-3.4.2.tar.gz -C /opt/apps/
anisble cluster -m shell -a "tar -xzf /opt/software/hadoop-3.4.2.tar.gz -C /opt/apps"
3.3、修改配置
1、hadoop-env.sh
export JAVA_HOME=/usr/java/jdk-17
export HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED"
export HADOOP_LOG_DIR=/data/hadoop/log
export HADOOP_CLASSPATH=$HADOOP_HOME/lib/*:$HADOOP_HOME/etc/hadoop/*
export HADOOP_CONF_DIR=/opt/apps/hadoop-3.4.2/etc/hadoop
2、yarn-env.sh
export JAVA_HOME=/usr/java/jdk-17
3、workers
4、capacity-scheduler.xml
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.5</value>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
</property>
5、core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hdfs-ha</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop/data/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>xxx-246:2181,xxx-247:2181,xxx-248:2181</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
6、hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/hadoop/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/hadoop/data/datanode</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hadoop/data/journal</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>hdfs-ha</value>
</property>
<property>
<name>dfs.ha.namenodes.hdfs-ha</name>
<value>nn-246,nn-247,nn-248</value>
</property>
<property>
<name>dfs.namenode.rpc-address.hdfs-ha.nn-246</name>
<value>xxx-246:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.hdfs-ha.nn-247</name>
<value>xxx-247:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.hdfs-ha.nn-248</name>
<value>xxx-248:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.hdfs-ha.nn-246</name>
<value>xxx-246:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.hdfs-ha.nn-247</name>
<value>xxx-247:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.hdfs-ha.nn-248</name>
<value>xxx-248:9870</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://xxx-246:8485;xxx-247:8485;xxx-248:8485/hdfs-ha</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.hdfs-ha</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
7、mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1638m</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx3276m</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>128</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.4.2</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.4.2</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/apps/hadoop-3.4.2</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>xxx-248:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>xxx-248:19888</value>
</property>
</configuration>
8、yarn-site.xml
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-ha</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm-246,rm-247,rm-248</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm-246</name>
<value>xxx-246</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm-246</name>
<value>xxx-246:9088</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm-247</name>
<value>xxx-247</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm-247</name>
<value>xxx-247:9088</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm-248</name>
<value>xxx-248</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm-248</name>
<value>xxx-248:9088</value>
</property>
<property>
<name>hadoop.zk.address</name>
<value>xxx-246:2181,xxx-247:2181,xxx-248:2181</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>32768</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>32768</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>16</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>48</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://xxx-248:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>259200</value>
</property>
<property>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>86400</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>600</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data/hadoop/log</value>
</property>
</configuration>
9、配置环境变量
vi /etc/profile
export HADOOP_HOME=/opt/apps/hadoop-3.4.2
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
3.4、分发配置
ansible cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/capacity-scheduler.xml dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/workers dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/mapred-site.xml dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/yarn-site.xml dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/core-site.xml dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/hdfs-site.xml dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/hadoop-env.sh dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
anisble cluster -m copy -a "src=/opt/apps/hadoop-3.4.2/etc/hadoop/yarn-env.sh dest=/opt/apps/hadoop-3.4.2/etc/hadoop/"
3.5、创建目录并赋权
ansible cluster -m shell -a "mkdir -p /data/hadoop/data/{namenode,journal,datanode,tmp}"
anisble cluster -m shell -a "mkdir -p /data/hadoop/data/log"
ansible cluster -m shell -a "chown -R hadoop:hadoop /data/hadoop"
anisble cluster -m shell -a "chown -R hadoop:hadoop /opt/apps/hadoop-3.4.2"
3.6、服务启动
1、初始化 ZKFC
stop-dfs.sh
hdfs zkfc -formatZK
hdfs --daemon start zkfc
hdfs --workers --daemon start zkfc
2、启动所有 JournalNode 服务
hdfs --daemon start journalnode
hdfs --daemon stop journalnode
$HADOOP_HOME/bin/hdfs --workers --daemon start journalnode
curl http://xxx-246:8480/jmx
3、主节点格式化 NameNode
4、主节点启动 NameNode
hdfs --daemon start namenode
5、同步元数据到所有备用 NameNode 节点
hdfs namenode -bootstrapStandby
6、启动所有备用 NameNode
hdfs --daemon start namenode
7、切换 NameNode 为 Active
hdfs haadmin -getAllServiceState
hdfs haadmin -transitionToActive nn-246
hdfs haadmin -transitionToActive --forcemanual nn-246
8、启动 HDFS
start-dfs.sh
hdfs --daemon start datanode
hdfs --workers --daemon start datanode
9、启动 YARN
start-yarn.sh
yarn rmadmin -getServiceState rm-246
10、启动日志服务
mapred --daemon start historyserver
4、Hadoop 操作
4.1、Web 访问
Hadoop 访问:http:
Yarn 访问:http:
4.2、创建目录并赋权
hadoop fs -mkdir /tmp
hadoop fs -mkdir /spark-jars
hadoop fs -mkdir /spark-history
hadoop fs -mkdir /tmp/hadoop-yarn
hadoop fs -mkdir -p /user/hive
hadoop fs -mkdir -p /user/hive/tmp
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /spark-jars
hadoop fs -chmod g+w /spark-history
hadoop fs -chmod g+w /user/hive
hadoop fs -chmod g+w /user/hive/tmp
hadoop fs -chmod -R o+w /user/hive/tmp
hadoop fs -chmod -R g+w /user/hive/warehouse
hdfs dfs -chmod -R o+w /tmp/hadoop-yarn
hdfs dfs -chown -R hadoop:hive /user/hive
4.3、运行 PI 任务
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.2.jar pi 10 100
4.4、任务监控与故障排查
yarn node-list
yarn node-status Node-Id
yarn application -list
yarn application -status application_1700000000000_0001
yarn application -kill application_1700000000000_0001
yarn logs -applicationId application_1700000000000_0001
yarn logs -applicationId application_1700000000000_0001 -containerId container_1700000000000_0001_01_000001
ps -ef | grep hadoop-3.4.2 | grep -v grep | awk '{print $2}' | xargs kill -9
微信扫一扫,关注极客日志
微信公众号「极客日志」,在微信中扫描左侧二维码关注。展示文案:极客日志 zeeklog
相关免费在线工具
- Keycode 信息
查找任何按下的键的javascript键代码、代码、位置和修饰符。 在线工具,Keycode 信息在线工具,online
- Escape 与 Native 编解码
JavaScript 字符串转义/反转义;Java 风格 \uXXXX(Native2Ascii)编码与解码。 在线工具,Escape 与 Native 编解码在线工具,online
- JavaScript / HTML 格式化
使用 Prettier 在浏览器内格式化 JavaScript 或 HTML 片段。 在线工具,JavaScript / HTML 格式化在线工具,online
- JavaScript 压缩与混淆
Terser 压缩、变量名混淆,或 javascript-obfuscator 高强度混淆(体积会增大)。 在线工具,JavaScript 压缩与混淆在线工具,online
- Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。 在线工具,Base64 字符串编码/解码在线工具,online
- Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。 在线工具,Base64 文件转换器在线工具,online