Docker Hadoop 集群搭建
2019-06-19 本文已影响0人
索伦x
准备 JDK 及 Hadoop 安装包
JDK 下载页面
https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
hadoop 下载链接
http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.9.1/hadoop-2.9.1.tar.gz
制作 Hadoop 镜像
1. 解压安装包
root@suoron:~# mkdir -p /root/hadoop
root@suoron:~# tar zxvf hadoop-2.9.1.tar.gz -C /root/hadoop/
root@suoron:~# tar zxvf jdk-8u152-linux-x64.tar.gz -C /root/hadoop/
2.准备SSH证书
root@suoron:~# ssh-keygen -t rsa #一直按回车键即可
root@suoron:~# cd .ssh
root@suoron:~/.ssh# cat id_rsa.pub >> authorized_keys
root@suoron:~# cp .ssh /root/hadoop/data/ -arf
3.配置 JDK 环境变量
root@suoron:~# cp /root/.bashrc /root/hadoop/data/
root@suoron:~# echo "export JAVA_HOME=/usr/local/jdk1.8.0_152" >> /root/hadoop/data/.bashrc
root@suoron:~# echo "export PATH=$PATH:$JAVA_HOME/bin" >> /root/hadoop/data/.bashrc
4.编写 Dockerfile
FROM ubuntu:14.04
COPY hadoop-2.9.1 /usr/local/hadoop-2.9.1/
COPY jdk1.8.0_152 /usr/local/jdk1.8.0_152
COPY data/.bashrc /root/
COPY data/.ssh /root/ssh
COPY hadoop_init.sh /etc/rc.d/init.d/
RUN chmod 777 /etc/rc.d/init.d/hadoop_init.sh
CMD /etc/rc.d/init.d/hadoop_init.sh
5.编写启动脚本 hadoop_init.sh
#!/bin/sh
apt update
apt install -y vim
apt install -y ssh
mv /root/ssh /root/.ssh -f
/etc/init.d/ssh start
tail -f /dev/null
6.修改 Hadoop 配置
etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_152
etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop-2.9.1/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/hadoop-2.9.1/namenode_dir</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/hadoop-2.9.1/datanode_dir</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
etc/hadoop/yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
</configuration>
etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
etc/hadoop/slaves
localhost
slave01
slave02
7.构建 Hadoop 镜像
docker build . -t myhadoop
部署 Hadoop 集群
docker-compose.yml
version: '3'
services:
master:
image: myhadoop
container_name: master
slave01:
image: myhadoop
container_name: slave01
slave02:
image: myhadoop
container_name: slave02
docker-compose up
启动 Hadoop 集群
root@suoron:~# docker exec -it master /bin/bash
root@0545b48bd2a9:~#ssh localhost #输入yes
root@0545b48bd2a9:~#ssh slave01 #输入yes
root@0545b48bd2a9:~#ssh slave02 #输入yes
root@master:/usr/local/hadoop-2.9.1# bin/hdfs namenode -format
root@master:/usr/local/hadoop-2.9.1# sbin/start-all.sh
查看集群是否启动
root@47ba702846f0:/# jps #Master节点
4352 ResourceManager
3380 DataNode
3239 NameNode
4893 Jps
4461 NodeManager
4190 SecondaryNameNode
root@0545b48bd2a9:/# jps #slave节点
3318 DataNode
3434 NodeManager
3598 Jps
Hadoop 集群测试
root@master:/usr/local/hadoop-2.9.1#bin/hdfs dfs -mkdir -p /user/hadoop/input
#然后将/usr/local/hadoop-2.9.1/etc/hadoop/目录下的所有文件拷贝到hdfs上的目录:
root@master:/usr/local/hadoop-2.9.1# bin/hdfs dfs -put ./etc/hadoop/*.xml /user/hadoop/input
#然后通过ls命令查看下是否正确将文件上传到hdfs下:
root@master:/usr/local/hadoop-2.9.1# bin/hdfs dfs -ls /user/hadoop/input
接下来,通过运行下面命令执行实例程序
:
root@master:/usr/local/hadoop-2.9.1#bin/hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep /user/hadoop/input output 'dfs[a-z.]+' adoop-mapreduce-examples-*.jar grep /user/hadoop/input output 'dfs[a-z.]+'
19/06/19 06:48:04 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.0.4:8032
19/06/19 06:48:06 INFO input.FileInputFormat: Total input files to process : 9
19/06/19 06:48:06 INFO mapreduce.JobSubmitter: number of splits:9
19/06/19 06:48:07 INFO Configuration.deprecation: yarn.resourcemanager.system-metrics-publisher.enabled is deprecated. Instead, use yarn.system-metrics-publisher.enabled
19/06/19 06:48:07 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1560924569558_0001
19/06/19 06:48:08 INFO impl.YarnClientImpl: Submitted application application_1560924569558_0001
19/06/19 06:48:09 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1560924569558_0001/
19/06/19 06:48:09 INFO mapreduce.Job: Running job: job_1560924569558_0001
19/06/19 06:48:23 INFO mapreduce.Job: Job job_1560924569558_0001 running in uber mode : false
19/06/19 06:48:23 INFO mapreduce.Job: map 0% reduce 0%
等这个程序运行结束之后,就可以在hdfs上的output目录下查看到运行结果:
root@master:/usr/local/hadoop-2.9.1# bin/hdfs dfs -cat output/*
1 dfsadmin
1 dfs.replication
1 dfs.namenode.name.dir
1 dfs.datanode.data.dir