ubuntu16.04 spark连接hadoop配置
2018-09-10 本文已影响0人
WJXZ
1.spark配置historyserver
2.hadoop配置yarn
3.hadoop添加配置
cd /usr/share/hadoop/hadoop-2.7.7/etc/hadoop
配置yarn-site.xml
sudo vim yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<!-- spark -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://你的ip:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
#保存退出:wq
配置mapred-site.xml
sudo vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- spark-->
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/user/history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/user/history/done_intermediate</value>
</property>
</configuration>
#保存退出:wq
4.重新启动yarn
cd /usr/share/hadoop/hadoop-2.7.7
./sbin/stop-yarn.sh
./sbin/mr-jobhistory-daemon.sh stop historyserver
./sbin/start-yarn.sh
./sbin/mr-jobhistory-daemon.sh start historyserver
5.配置spark
cd /usr/share/spark/spark-2.2.2-bin-hadoop2.7/conf
sudo vim spark-defaults.conf
spark.yarn.historyServer.address=你的ip:18080
#剩下几个是跟sparkhistory相关的一些配置
spark.history.ui.port=18080
spark.eventLog.enabled=true
spark.eventLog.dir=hdfs:///tmp/spark/events
spark.history.fs.logDirectory=hdfs:///tmp/spark/events
6.HDFS创建spark文件
hdfs dfs -mkdir -p /tmp/spark/events
7.启动sparkhistory
cd /usr/share/spark/spark-2.2.2-bin-hadoop2.7
./sbin/start-history-server.sh
8.测试
localhost:18080