您的位置:首页 > 运维架构

一步一步安装hadoop2.7.1 HA

2016-01-08 14:29 429 查看
一步一步安装hadoop2.7.1 HA

Hadoop HA高可用架构图:
<pre name="code" class="html">




3台虚拟机节点分部:<pre name="code" class="html">




--不用图形界面
[root@node1 ~]# vi /etc/inittab
id:3:initdefault:

--linux基本设置
ifconfig
192.168.88.128
192.168.88.129
192.168.88.130

hostname node1
hostname node2
hostname node3

vi /etc/hosts
192.168.88.128 node1
192.168.88.129 node2
192.168.88.130 node3

vi /etc/sysconfig/network
HOSTNAME=node1

vi /etc/sysconfig/network-scripts/ifcfg-eth0
IPADDR=192.168.88.128

/sbin/service network restart  #重新启动网络服务

service iptables stop
chkconfig iptables off
chkconfig|grep iptables
iptables        0:off   1:off   2:off   3:off   4:off   5:off   6:off

reboot之后主机名生效

--配置免密码

node1,2,3中执行:
su - hadoop
ssh-keygen -q -t rsa -N "" -f /home/hadoop/.ssh/id_rsa
cd .ssh
cat id_rsa.pub >> authorized_keys
chmod go-wx  authorized_keys

node1中执行:
scp id_rsa.pub hadoop@node2:~
scp id_rsa.pub hadoop@node3:~

node2和node3中执行:
cat ~/id_rsa.pub>>~/.ssh/authorized_keys
cat ~/id_rsa.pub>>~/.ssh/authorized_keys

node1中执行:
ssh node2
ssh node3

------------------------------------------------------------
--安装jdk
用winscp上传jdk
[hadoop@node1 ~]$ tar -zxvf jdk-7u67-linux-x64.tar.gz
[hadoop@node1 bin]$ pwd
/home/hadoop/jdk1.7.0_67/bin

--这步可以跳过,后面统一弄
[hadoop@node1 bin]$ su - root
[root@node1 ~]# vi /etc/profile
export JAVA_HOME=/home/hadoop/jdk1.7.0_67
export PATH=${JAVA_HOME}/bin:$PATH

unset i
unset -f pathmunge
[root@node1 ~]# source /etc/profile

------------------------------------------------------------
--安装hadoop
[hadoop@node1 ~]$ tar -zxvf hadoop-2.7.1.tar.gz
/home/hadoop/hadoop-2.7.1

--加入环境变量
/home/hadoop/hadoop-2.7.1/sbin
/home/hadoop/hadoop-2.7.1/bin

--data和jn目录
[hadoop@node1 ~]$ mkdir -p /home/hadoop/hadoop-2.7.1/data
[hadoop@node1 ~]$ mkdir -p /home/hadoop/hadoop-2.7.1/data/jn/
[hadoop@node1 ~]$ mkdir /home/hadoop/tmp    --存放pid

--修改配置文件
cd etc/hadoop
vi hadoop-env.sh  修改JAVA_HOME等
vi hdfs-site.xml
vi core-site.xml
vi slaves
node1
node2
node3

[hadoop@node2 hadoop]$ ll *env*sh
-rw-r--r--. 1 hadoop hadoop 4236 Nov  5 17:12 hadoop-env.sh
-rw-r--r--. 1 hadoop hadoop 1449 Nov  5 17:12 httpfs-env.sh
-rw-r--r--. 1 hadoop hadoop 1527 Nov  5 17:12 kms-env.sh
-rw-r--r--. 1 hadoop hadoop 1383 Nov  5 17:12 mapred-env.sh
-rw-r--r--. 1 hadoop hadoop 4567 Nov  5 17:12 yarn-env.sh

--覆盖hadoop配置文件
scp /home/hadoop/hadoop-2.7.1/etc/hadoop/*  hadoop@node2:/home/hadoop/hadoop-2.7.1/etc/hadoop/
scp /home/hadoop/hadoop-2.7.1/etc/hadoop/*  hadoop@node3:/home/hadoop/hadoop-2.7.1/etc/hadoop/

------------------------------------------------------------
--配置zk
hadoop@node1 ~]$ tar -zxvf zookeeper-3.4.6.tar.gz
--加入环境变量
/home/hadoop/zookeeper-3.4.6/bin
--安装目录下建立数据文件夹
[hadoop@node1 zookeeper-3.4.6]$ mkdir data

cd conf
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg --修改dataDir=/home/hadoop/zookeeper-3.4.6/data
最后加上额外3行
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888

cd data
vi myid
1

scp -r zookeeper-3.4.6/ hadoop@node2:~    vi /opt/zookeeper/myid  2
scp -r zookeeper-3.4.6/ hadoop@node3:~    vi /opt/zookeeper/myid  3

scp同步下其他节点的配置文件

------------------------------------------------------------
配置环境变量,各个节点 root用户

[hadoop@node1 bin]$ su - root
[root@node1 ~]# vi /etc/profile
export JAVA_HOME=/home/hadoop/jdk1.7.0_67
export HADOOP_HOME=/home/hadoop/hadoop-2.7.1
export ZK_HOME=/home/hadoop/zookeeper-3.4.6
export PATH=$PATH:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${ZK_HOME}/bin

unset i
unset -f pathmunge
[root@node1 ~]# source /etc/profile

------------------------------------------------------------
--启动3个zk
su - hadoop
zkServer.sh start
jps

zkServer.sh stop    --.out文件增长很快

------------------------------------------------------------
--启动3个jn
cd sbin
hadoop-daemon.sh start journalnode
jps

------------------------------------------------------------
--格式化NN1
hdfs namenode -format

--启动NN1
hadoop-daemon.sh start namenode

--格式化NN2
hdfs namenode -bootstrapStandby   --复制元数据到NN2

--启动NN2
hadoop-daemon.sh start namenode 看能否启动起来

--格式化zk
hdfs zkfc -formatZK    --其中一个NN中执行

--启动zk
hadoop-daemon.sh start zkfc

--重启hadoop
NN1:
stop-dfs.sh
start-dfs.sh

--配置ip映射,hosts文件
C:\WINDOWS\system32\drivers\etc\hosts
192.168.88.128 node1
192.168.88.129 node2
192.168.88.130 node3

--谁先抢到锁,谁就就active
--监控界面 http://node1:50070 http://node2:50070

--测试上传文件
./hdfs dfs -mkdir -p /usr/file
./hdfs dfs -put /xxx/xxx.txt /usr/file

-------------------------------------------------------------
配置Map/Reduce
vi mapred-site.xml
vi yarn-site.xml

-------------------------------------------------------------------------------------------
全面启动
--所有节点
zkServer.sh start
zkServer.sh stop

--node1
start-all.sh
stop-all.sh

--监控界面 http://node1:8088 
zkServer.sh start
start-all.sh

--检查zkfc是否成功起了
hadoop-daemon.sh start zkfc

--单独启动每一个步骤:
zkServer.sh start
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode
hadoop-daemon.sh start journalnode
hadoop-daemon.sh start zkfc
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start nodemanager

---------------------------------------------------------------
--附配置:
---------------------------------------------------------------

[hadoop@node1 conf]$ vi zoo.cfg
clientPort=2181  --默认

dataDir=/home/hadoop/zookeeper-3.4.6/data
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888

[hadoop@node1 hadoop]$ vi hadoop-env.sh
export JAVA_HOME=/home/hadoop/jdk1.7.0_67
export HADOOP_PID_DIR=/home/hadoop/dirtmp
export HADOOP_SECURE_DN_PID_DIR=/home/hadoop/dirtmp

vi mapred-env.sh
export HADOOP_MAPRED_PID_DIR=/home/hadoop/dirtmp

vi yarn-env.sh
export YARN_PID_DIR=/home/hadoop/dirtmp

vi hdfs-site.xml

<property>
<name>dfs.nameservices</name>
<value>odscluster</value>
</property>
<property>
<name>dfs.ha.namenodes.odscluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.odscluster.nn1</name>
<value>node1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.odscluster.nn2</name>
<value>node2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.odscluster.nn1</name>
<value>node1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.odscluster.nn2</name>
<value>node2:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node1:8485;node2:8485;node3:8485/odscluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.odscluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvide
4000
r</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/hadoop-2.7.1/data/jn</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>

vi core-site.xml

<property>
<name>fs.defaultFS</name>
<value>hdfs://odscluster</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/hadoop-2.7.1/data</value>
</property>

vi mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

vi yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息