Ubuntu 伪分布式安装Hadoop、Spark、Hive

2018-12-18  本文已影响0人  zfylin

0. 准备

安装包下载

  1. Hadoop : http://apache.communilink.net/hadoop/common/hadoop-2.6.5/
  2. Spark: https://spark.apache.org/downloads.html
  3. Hive: http://ftp.cuhk.edu.hk/pub/packages/apache.org/hive/
  4. JDK: https://www.oracle.com/technetwork/cn/java/javase/downloads/jdk8-downloads-2133151-zhs.html
  5. Scala: https://www.scala-lang.org/download/

安装 Java

sudo tar -xzvf jdk-8u144-linux-x64.tar.gz -C /opt
cd /opt
sudo ln -s jdk1.8.0_144 jdk

# 添加环境变量
sudo vim /etc/profile
    # 添加下面的环境变量
    export JAVA_HOME=/opt/jdk
    export JRE_HOME=$JAVA_HOME/jre
    export CLASS_PATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar
    export PATH=$PATH:$JAVA_HOME/bin
source /etc/profile

安装 Scala

tar -zxvf $SCALA_FILE -C /opt
cd /opt
sudo ln -s scala-2.11.8 scala
sudo vim /etc/profile
    # 添加
    export SCALA_HOME=/opt/scala
    export PATH=$PATH:$SCALA_HOME/bin
source /etc/profile

安装 Mysql

ubuntu安装mysql

SSH免密的登录

sudo apt-get install openssh-server
ssh localhost ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

1. 安装Hadoop

安装配置

tar -zxvf $YOUR_HADOOP_FILE -C /opt
cd /opt
sudo ln -s hadoop-2.6.0 hadoop
sudo vim /etc/profile
    # 添加
    export HADOOP_HOME=/opt/hadoop
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
source /etc/profile
vim /opt/hadoop/etc/hadoop/hadoop-env.sh
    # 添加
    export JAVA_HOME=/opt/jdk
vim /opt/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
    <name>dfs.replication</name>
    <value>1</value>
</property>
<property>
    <name>dfs.permissions</name>
    <value>false</value>
</property>
<property>  
    <name>dfs.namenode.name.dir</name>  
    <value>file:///data/hadoopdata/hdfs/name</value>  
</property> 
<property>  
     <name>dfs.datanode.data.dir</name>  
     <value>file:///data/hadoopdata/hdfs/data</value>  
</property>  
<property>  
    <name>dfs.namenode.secondary.http-address</name>  
    <value>localhost:9001</value>  
</property>  
</configuration>
vim /opt/hadoop/etc/hadoop/core-site.xml 
<configuration>
<property>  
    <name>fs.defaultFS</name>   
    <value>hdfs://localhost:9000</value>  
</property>       
<property>  
    <name>hadoop.tmp.dir</name>
    <value>file:///opt/hadoop/tmp</value>  
</property> 
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://localhost:9000</value>
</property>
</configuration>
vim /opt/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
    <name>yarn.app.mapreduce.am.env</name>
    <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
    <name>mapreduce.map.env</name>
    <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
    <name>mapreduce.reduce.env</name>
    <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
</configuration>
vim /opt/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<property>  
    <name>yarn.nodemanager.vmem-check-enabled</name>  
    <value>false</value>  
</property>  
</configuration>

格式化

hdfs namenode -format

运行

/opt/hadoop/sbin/start-all.sh

验证

jps
# 看到如下进程
-----------------
5939 Jps
5636 DataNode
5493 NameNode
5814 SecondaryNameNode

浏览器访问: http://localhost:8088 http://localhost:50070

2. 安装Hive

安装配置

tar -zxvf $HIVE_FILE -C /opt
sudo ln -s apache-hive-1.2.2-bin hive
sudo vim /etc/profile
    # 添加
    export HIVE_HOME=/opt/hive
    export PATH=$PATH:$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$SPARK_HOME/bin
source /etc/profile
vim /opt/hive/conf/hive-site.xml
<configuration>
        <!-- (mysql地址localhost) -->
        <property>
                <name>javax.jdo.option.ConnectionURL</name>
                <value>jdbc:mysql://localhost:3306/hive</value>
        </property>
        <!-- (mysql的驱动) -->
        <property>
                <name>javax.jdo.option.ConnectionDriverName</name>
                <value>com.mysql.jdbc.Driver</value>
        </property>
        <!-- (用户名) -->
        <property>
                <name>javax.jdo.option.ConnectionUserName</name>
                <value>root</value>
        </property>
        <!-- (密码) -->
        <property>
                <name>javax.jdo.option.ConnectionPassword</name>
                <value>123456</value>
        </property>

        <property>
                <name>hive.metastore.schema.verification</name>
                <value>false</value>
        </property>
</configuration>
# 下载 mysql-connector-java-5.1.44.jar 
cp mysql-connector-java-5.1.44.jar /opt/hive/lib

验证

hive

3. 安装Spark

安装配置

tar -zxvf $SPARK_FILE -C /opt
sudo ln -s spark-2.2.1-bin-hadoop2.6  spark
sudo vim /etc/profile
    # 添加
    export SPARK_HOME=/opt/spark
    export PATH=$PATH:$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin
source /etc/profile
vim /opt/spark/conf/spark-env.sh
    # 添加
    export JAVA_HOME=/opt/jdk
    export SPARK_MASTER_IP=localhost
    export SCALA_HOME=/opt/scala
    export SPARK_WORKER_MEMORY=4G

验证

spark-shell

hive 配置

# 拷贝hive相关配置
cp /opt/hive/conf/*.xml /opt/spark/conf 
# 拷贝mysql jar 包
cp /opt/hive/lib/mysql-connector-java-5.1.44.jar /opt/spark-2.2.1/jars
上一篇下一篇

猜你喜欢

热点阅读