ubuntu16.04 spark连接hadoop配置

2018-09-10  本文已影响0人  WJXZ
1.spark配置historyserver
2.hadoop配置yarn
3.hadoop添加配置
cd /usr/share/hadoop/hadoop-2.7.7/etc/hadoop

配置yarn-site.xml

sudo vim yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
        <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
        </property>
        <property>
                <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
                <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
<!-- spark -->
<property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
</property>
<property>
        <name>yarn.log.server.url</name>
        <value>http://你的ip:19888/jobhistory/logs</value>
</property>
<property>
        <name>yarn.nodemanager.pmem-check-enabled</name>
        <value>false</value>
</property>
<property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
</property>
</configuration>
#保存退出:wq

配置mapred-site.xml

sudo vim mapred-site.xml
<configuration>
        <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
        </property>
        <!-- spark-->
        <property>
                <name>mapreduce.jobhistory.done-dir</name>
                <value>/user/history/done</value>
        </property>
        <property>
                  <name>mapreduce.jobhistory.intermediate-done-dir</name>
                  <value>/user/history/done_intermediate</value>
         </property>
</configuration>
#保存退出:wq
4.重新启动yarn
cd /usr/share/hadoop/hadoop-2.7.7
./sbin/stop-yarn.sh
./sbin/mr-jobhistory-daemon.sh stop historyserver
./sbin/start-yarn.sh
./sbin/mr-jobhistory-daemon.sh start historyserver
5.配置spark
cd /usr/share/spark/spark-2.2.2-bin-hadoop2.7/conf
sudo vim spark-defaults.conf
spark.yarn.historyServer.address=你的ip:18080
#剩下几个是跟sparkhistory相关的一些配置
spark.history.ui.port=18080
spark.eventLog.enabled=true
spark.eventLog.dir=hdfs:///tmp/spark/events
spark.history.fs.logDirectory=hdfs:///tmp/spark/events
6.HDFS创建spark文件
hdfs dfs -mkdir -p /tmp/spark/events
7.启动sparkhistory
cd  /usr/share/spark/spark-2.2.2-bin-hadoop2.7
./sbin/start-history-server.sh
8.测试

localhost:18080

上一篇下一篇

猜你喜欢

热点阅读