我爱编程

hadoop集群搭建(hive、spark)

2017-11-03  本文已影响0人  485b1aca799e

hadoop集群HA环境搭建


  1. 准备工具
  1. 修改每一台主机的主机名 vim /etc/hostname
user1
user2
user3
user4
user5
user6
user7
  1. ip地址配置 vim /etc/network/interfaces ,其中user1的网络ip设置为
# The loopback network interface 
auto lo 
iface lo inet loopback 
 
# The primary network interface 
auto eth0 
#iface eth0 inet dhcp 
iface eth0 inet static 
address 192.168.18.11 
netmask 255.255.255.0 
gateway 192.168.18.10 
dns-nameservers 192.168.18.10
  1. 修改hosts,实现使用主机名ping通 vim /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6

192.168.18.11 user1
192.168.18.12 user2
192.168.18.13 user3
192.168.18.14 user4
192.168.18.15 user5
192.168.18.16 user6
192.168.18.17 user7
  1. 主机之间ssh免密码登陆
ssh-copy-id -i /root/.ssh/id_rsa.pub  user7
scp /root/.ssh/authorized_keys user1:/root/.ssh/authorized_keys
scp /root/.ssh/authorized_keys user2:/root/.ssh/authorized_keys
scp /root/.ssh/authorized_keys user3:/root/.ssh/authorized_keys
scp /root/.ssh/authorized_keys user4:/root/.ssh/authorized_keys
scp /root/.ssh/authorized_keys user5:/root/.ssh/authorized_keys
scp /root/.ssh/authorized_keys user6:/root/.ssh/authorized_keys
  1. 安装jdk虚拟机
export JAVA_HOME=/ittest/jdk1.8.0_72/
export JRE_HOME=/ittest/jdk1.8.0_72/jre
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
scp -r /ittest/  user2:/
scp -r /ittest/  user3:/
scp -r /ittest/  user4:/
scp -r /ittest/  user5:/
scp -r /ittest/  user6:/
scp -r /ittest/  user7:/
scp /etc/profile user2:/etc/profile
scp /etc/profile user3:/etc/profile
scp /etc/profile user4:/etc/profile
scp /etc/profile user5:/etc/profile
scp /etc/profile user6:/etc/profile
scp /etc/profile user7:/etc/profile
  1. 安装zookeeper
server.1=user5:2888:3888
server.2=user6:2888:3888
server.3=user7:2888:3888
scp -r  /ittest/zookeeper-3.4.8 user5:/ittest/
scp -r  /ittest/zookeeper-3.4.8 user6:/ittest/
scp -r  /ittest/zookeeper-3.4.8 user7:/ittest/

8.安装hadoop(暂时先在user3上安装一台yarn,稍后再user4上配置第二台yarn)

hadoop-env.sh     
core-site.xml        
hdfs-site.xml        
mapred-site.xml    
yarn-site.xml (需要使用示例文件新建一个) 
slaves 

export JAVA_HOME=/ittest/jdk1.8.0_72
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
       <!--指定hdfs的nameservice为ns1 -->
       <property>
            <name>fs.defaultFS</name>
            <value>hdfs://ns1</value>
       </property>
       <!--指定hadoop临时目录 -->
       <property>
            <name>hadoop.tmp.dir</name>
            <value>/ittest/hadoop-2.7.1/tmp</value>
       </property>
       <!--指定zookeeper地址 -->
       <property>
           <name>ha.zookeeper.quorum</name>
           <value>user5:2181,user6:2181,user7:2181</value>
       </property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <!--指定hdfs的nameservice为ns1,需要和core-site.xml中保持一致 -->
        <property>
             <name>dfs.nameservices</name>
             <value>ns1</value>
        </property> 
        <!--ns1下面有两个NameNode,分别是nn1,nn2 -->
        <property>
             <name>dfs.ha.namenodes.ns1</name>
             <value>nn1,nn2</value>
        </property>
         <!-- nn1的RPC通信地址 -->
         <property>
              <name>dfs.namenode.rpc-address.ns1.nn1</name>
              <value>user1:9000</value>
        </property>
        <!-- nn1的http通信地址 -->
        <property>
              <name>dfs.namenode.http-address.ns1.nn1</name>
              <value>user1:50070</value>
        </property>
        <!-- nn2的RPC通信地址 -->
         <property>
              <name>dfs.namenode.rpc-address.ns1.nn2</name>
              <value>user2:9000</value>
        </property>
        <!-- nn2的http通信地址 -->
        <property>
              <name>dfs.namenode.http-address.ns1.nn2</name>
              <value>user2:50070</value>
        </property>
        <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
        <property>
              <name>dfs.namenode.shared.edits.dir</name>
              <value>qjournal://user5:8485;user6:8485;user7:8485/ns1</value>
        </property>
        <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
        <property>
              <name>dfs.journalnode.edits.dir</name>
              <value>/ittest/hadoop-2.7.1/journal</value>
        </property>
        <!-- 开启NameNode失败自动切换 -->
        <property>
              <name>dfs.ha.automatic-failover.enabled</name>
              <value>true</value>
        </property>
        <!-- 配置失败自动切换实现方式 -->
        <property>
              <name>dfs.client.failover.proxy.provider.ns1</name>
              <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
        </property>
        <!-- 配置隔离机制 -->
        <property>
              <name>dfs.ha.fencing.methods</name>
              <value>sshfence</value>
        </property>
        <!-- 使用隔离机制时需要ssh免登陆 -->
        <property>
              <name>dfs.ha.fencing.ssh.private-key-files</name>
              <value>/root/.ssh/id_rsa</value>
        </property>
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
           <!-- 指定mr框架为yarn方式 -->
           <property>
                 <name>mapreduce.framework.name</name>
                 <value>yarn</value>
           </property>
</configuration>


<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
           <!-- 指定resourcemanager地址 -->
           <property>
               <name>yarn.resourcemanager.hostname</name>
               <value>user3</value>
           </property>
           <!-- 指定nodemanager启动时加载server的方式为shuffle server -->
           <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
          </property>
</configuration>
user5
user6
user7
scp -r /ittest/hadoop-2.7.1/ user2:/ittest/
scp -r /ittest/hadoop-2.7.1/ user3:/ittest/
scp -r /ittest/hadoop-2.7.1/ user4:/ittest/
scp -r /ittest/hadoop-2.7.1/ user5:/ittest/
scp -r /ittest/hadoop-2.7.1/ user6:/ittest/
scp -r /ittest/hadoop-2.7.1/ user7:/ittest/
  1. 启动zookeeper集群(分别在user5、user6、user7上启动zk),(如果之前有启动则无需)
cd /ittest/zookeeper-3.4.8/bin/
            ./zkServer.sh start
./zkServer.sh status
  1. 启动journalnode(在server01上启动所有journalnode)
cd /ittest/hadoop-2.7.1
sbin/hadoop-daemons.sh start journalnode
  1. 格式化HDFS
hadoop namenode -format
scp -r tmp/ server02:/ittest/hadoop-2.7.1/
  1. 格式化ZK(在server01上执行即可)
hdfs zkfc -formatZK

-执行完后在 zookeep主机(user5或user6或user7)上测试!

cd /ittest/zookeeper3.4.8/bin
./zkCli.sh
  1. 启动HDFS(在user1上执行)
    sbin/start-dfs.sh

  2. 启动YARN(在user1和 user3上都要执行)
    sbin/start-yarn.sh

  3. 到此,hadoop2.7.1配置完毕,可以统计浏览器访问:

http://192.168.18.11:50070
NameNode 'user1:9000' (active)
http://192.168.18.12:50070
NameNode 'user2:9000' (standby)
1346 NameNode
1480 DFSZKFailoverController
1216 JournalNode
3633 Jps
1365 DataNode
1126 QuorumPeerMain
2988 NodeManager
  1. 验证HDFS HA
  1. 验证YARN:运行一下hadoop提供的demo中的WordCount程序:
    /ittest/hadoop-2.7.1/bin/hadoop jar /ittest/hadoop-2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar wordcount /copyprofile.sh /out4
  1. hadoop环境变量设置:
    修改user1 上面/etc/profile文件
export JAVA_HOME=/ittest/jdk1.8.0_72
export JRE_HOME=/ittest/jdk1.8.0_72/jre
export HADOOP_HOME=/ittest/hadoop-2.7.1
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
scp /etc/profile user2:/etc/profile
scp /etc/profile user3:/etc/profile
scp /etc/profile user4:/etc/profile
scp /etc/profile user5:/etc/profile
scp /etc/profile user6:/etc/profile
scp /etc/profile user7:/etc/profile
  1. 将user4做成yarn,修改user1节点上的/ittest/hadoop-2.7.1/etc/hadoop 中的yarn-site.xml,可以保留原来的一份
    mv yarn-site.xml yarn.site.xml.noHA
    cp yarn.site.xml.noHA yarn-site.xml
<configuration>

<!-- Site specific YARN configuration properties -->
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>

  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>


  <property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>604800</value>
  </property>

  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>

  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>

  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>yarncluster</value>
  </property>

  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  
  <property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>user3</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>user4</value>
  </property>

  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>user3:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>user4:8088</value>
  </property>

  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>user5:2181,user6:2181,user7:2181</value>
  </property>

  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>

  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  </property>

  <property>
    <name>yarn.nodemanager.recovery.enabled</name>
    <value>true</value>
  </property>

  <property>
    <name>yarn.nodemanager.address</name>
    <value>0.0.0.0:45454</value>
  </property>
</configuration>
scp /ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml user2:/ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml
scp /ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml user3:/ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml
scp /ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml user4:/ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml
scp /ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml user5:/ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml
scp /ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml user6:/ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml
scp /ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml user7:/ittest/hadoop-2.7.1/etc/hadoop/yarn-site.xml

Hive的部署(为后期补上,简略写写)

  1. mysql的安装
service mysqld start
mysql -uroot -p 
update mysql.user set password=password('root') where user ='root' 

可能会报错;ERROR 1054(42S22) Unknown column 'password' in field list
因为mysql5.7版本以后,mysql.user表中的字段名字已经改了,此时使用命令:

update mysql.user set authentication_string=password('root') where user='root'

mysql > grant all privileges on *.* to 'root'@'%' with grant option;
mysql > grant all privileges on *.* to 'root'@'%'  identified by '123';
mysql > flush privileges;
  1. hive的部署
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set Hive and Hadoop environment variables here. These variables can be used
# to control the execution of Hive. It should be used by admins to configure
# the Hive installation (so that users do not have to set environment variables
# or set command line parameters to get correct behavior).
#
# The hive service being invoked (CLI/HWI etc.) is available via the environment
# variable SERVICE


# Hive Client memory usage can be an issue if a large number of clients
# are running at the same time. The flags below have been useful in 
# reducing memory usage:
#
# if [ "$SERVICE" = "cli" ]; then
#   if [ -z "$DEBUG" ]; then
#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
#   else
#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
#   fi
# fi

# The heap size of the jvm stared by hive shell script can be controlled via:
#
# export HADOOP_HEAPSIZE=1024
#
# Larger heap size may be required when running queries over large number of files or partitions. 
# By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be 
# appropriate for hive server (hwi etc).


# Set HADOOP_HOME to point to a specific hadoop install directory
# HADOOP_HOME=${bin}/../../hadoop
HADOOP_HOME=/usr/local/hadoop/hadoop-2.8.1

# Hive Configuration Directory can be controlled by:
export HIVE_CONF_DIR=/usr/local/hive/conf

# Folder containing extra ibraries required for hive compilation/execution can be controlled by:
export HIVE_AUX_JARS_PATH=/usr/local/hive/lib


  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://172.16.244.235:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false</value>
    <description>JDBC connect string for a JDBC metastore</description>
  </property>

<property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>

  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
    <description>Username to use against metastore database</description>
  </property>
  
   <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>123</value>
    <description>password to use against metastore database</description>
    
  1. 拷贝JDBC驱动包
  1. 启动hive,进行测试。。。

后记

上一篇 下一篇

猜你喜欢

热点阅读