Ubuntu 伪分布式安装Hadoop、Spark、Hive
2018-12-18 本文已影响0人
zfylin
0. 准备
安装包下载
- Hadoop : http://apache.communilink.net/hadoop/common/hadoop-2.6.5/
- Spark: https://spark.apache.org/downloads.html
- Hive: http://ftp.cuhk.edu.hk/pub/packages/apache.org/hive/
- JDK: https://www.oracle.com/technetwork/cn/java/javase/downloads/jdk8-downloads-2133151-zhs.html
- Scala: https://www.scala-lang.org/download/
安装 Java
sudo tar -xzvf jdk-8u144-linux-x64.tar.gz -C /opt
cd /opt
sudo ln -s jdk1.8.0_144 jdk
# 添加环境变量
sudo vim /etc/profile
# 添加下面的环境变量
export JAVA_HOME=/opt/jdk
export JRE_HOME=$JAVA_HOME/jre
export CLASS_PATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar
export PATH=$PATH:$JAVA_HOME/bin
source /etc/profile
安装 Scala
tar -zxvf $SCALA_FILE -C /opt
cd /opt
sudo ln -s scala-2.11.8 scala
sudo vim /etc/profile
# 添加
export SCALA_HOME=/opt/scala
export PATH=$PATH:$SCALA_HOME/bin
source /etc/profile
安装 Mysql
SSH免密的登录
sudo apt-get install openssh-server
ssh localhost ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
1. 安装Hadoop
安装配置
tar -zxvf $YOUR_HADOOP_FILE -C /opt
cd /opt
sudo ln -s hadoop-2.6.0 hadoop
sudo vim /etc/profile
# 添加
export HADOOP_HOME=/opt/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
source /etc/profile
vim /opt/hadoop/etc/hadoop/hadoop-env.sh
# 添加
export JAVA_HOME=/opt/jdk
vim /opt/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/hadoopdata/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/hadoopdata/hdfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhost:9001</value>
</property>
</configuration>
vim /opt/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:///opt/hadoop/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
vim /opt/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
</configuration>
vim /opt/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
格式化
hdfs namenode -format
运行
/opt/hadoop/sbin/start-all.sh
验证
jps
# 看到如下进程
-----------------
5939 Jps
5636 DataNode
5493 NameNode
5814 SecondaryNameNode
浏览器访问: http://localhost:8088 http://localhost:50070
2. 安装Hive
安装配置
tar -zxvf $HIVE_FILE -C /opt
sudo ln -s apache-hive-1.2.2-bin hive
sudo vim /etc/profile
# 添加
export HIVE_HOME=/opt/hive
export PATH=$PATH:$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$SPARK_HOME/bin
source /etc/profile
vim /opt/hive/conf/hive-site.xml
<configuration>
<!-- (mysql地址localhost) -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive</value>
</property>
<!-- (mysql的驱动) -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<!-- (用户名) -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!-- (密码) -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
</configuration>
# 下载 mysql-connector-java-5.1.44.jar
cp mysql-connector-java-5.1.44.jar /opt/hive/lib
验证
hive
3. 安装Spark
安装配置
tar -zxvf $SPARK_FILE -C /opt
sudo ln -s spark-2.2.1-bin-hadoop2.6 spark
sudo vim /etc/profile
# 添加
export SPARK_HOME=/opt/spark
export PATH=$PATH:$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin
source /etc/profile
vim /opt/spark/conf/spark-env.sh
# 添加
export JAVA_HOME=/opt/jdk
export SPARK_MASTER_IP=localhost
export SCALA_HOME=/opt/scala
export SPARK_WORKER_MEMORY=4G
验证
spark-shell
hive 配置
# 拷贝hive相关配置
cp /opt/hive/conf/*.xml /opt/spark/conf
# 拷贝mysql jar 包
cp /opt/hive/lib/mysql-connector-java-5.1.44.jar /opt/spark-2.2.1/jars