快乐大数据第12课 数据平台实战

2018-08-20  本文已影响0人  快乐大数据

#数据平台实战

#1配置maven,tomcat,cbord的过程略

#2.在node01 启动mysql

mysql -h192.168.183.101 -uhive -phive123

creat database bi;

在cboard中配置数据源

  数据源类型

  新建数据源名称  bi_mysql

  驱动类  bi_mysql

连接串 jdbc:mysql://192.168.183.101:3306/bi

  数据库用户名 hive

  数据库名 hive123

#3在kafka上创建实时日志

在node03上

cd /home/hadoop/apps/kafka_2.11-0.10.2.1

bin/kafka-topics.sh --create --zookeeper 192.168.183.101:2181 --replication-factor 1 --partitions 3 --topic userlog2

bin/kafka-topics.sh --list --zookeeper 192.168.183.101:2181

#使用kafka自带的消费者客户端脚本

bin/kafka-console-consumer.sh --zookeeper 192.168.183.101:2181 --from-beginning --topic userlog2

#在node01上

mkdir ~/apps/userlog

hadoop fs -mkdir /data/userlog

新建一个flume的日志文件 logagent.cnf 注意该IP

在node02上 修改collectagent101.cnf

先启动后端的collector

再启动前端日志收集的flume

在node02上启动flume

bin/flume-ng agent --conf conf --conf-file conf/collectagent101.conf --name collectagent -Dflume.root.logger=INFO,console

再在node01上启动flume

bin/flume-ng agent --conf conf --conf-file conf/logagent.conf --name logagent -Dflume.root.logger=INFO,console

在node01上启动生产数据的jar文件

在node01的apps上先rz上传

在apps下 java -cp logproducer-1.0-SNAPSHOT.jar cn.chinahadoop.producer.LogProducer /home/hadoop/apps/userlog f1

会发现生产出了数据

line -> user_20,windows_v1_20171201,010,2,1534645660158

line -> user_95,android_pad_v1.2_20180110,0755,1,1534645661159

line -> user_43,android_v1.2_20180110,0755,1,1534645662160

line -> user_90,apple_phone_v1.2_20180101,020,2,1534645663161

line -> user_49,android_v1.2_20180110,021,1,1534645664162

line -> user_66,apple_phone_v1.2_20180101,020,1,1534645665163

line -> user_57,apple_pad_v1.2_20180101,021,2,1534645666163

line -> user_96,apple_phone_v1.2_20180101,010,1,1534645667164

line -> user_95,android_pad_v1.2_20180110,0755,1,1534645668165

line -> user_5,android_pad_v1.2_20180110,021,2,1534645669165

line -> user_51,apple_pad_v1.2_20180101,0755,1,1534645670166

line -> user_44,windows_v1_20171201,010,1,1534645671167

line -> user_81,apple_pad_v1.2_20180101,021,2,1534645672168

line -> user_82,apple_mac_v1.2_20180101,020,1,1534645673169

line -> user_95,android_pad_v1.2_20180110,0755,1,1534645674169

line -> user_60,apple_phone_v1.2_20180101,010,2,1534645675170

line -> user_97,android_v1.2_20180110,021,1,1534645676170

line -> user_84,apple_phone_v1.2_20180101,010,1,1534645677171

line -> user_21,apple_pad_v1.2_20180101,021,2,1534645678171

line -> user_58,apple_mac_v1.2_20180101,020,1,1534645679172

line -> user_55,android_v1.2_20180110,0755,1,1534645680174

line -> user_45,apple_pad_v1.2_20180101,021,2,1534645681175

line -> user_70,apple_mac_v1.2_20180101,020,1,1534645682176

line -> user_93,apple_pad_v1.2_20180101,021,1,1534645683177

line -> user_68,windows_v1_20171201,010,2,1534645684178

line -> user_34,apple_mac_v1.2_20180101,020,1,1534645685179

line -> user_87,apple_pad_v1.2_20180101,0755,1,1534645686180

line -> user_43,android_v1.2_20180110,0755,2,1534645687182

line -> user_54,apple_phone_v1.2_20180101,020,1,1534645688183

line -> user_61,android_v1.2_20180110,021,1,1534645689184

line -> user_97,android_v1.2_20180110,021,2,1534645690185

line -> user_18,apple_phone_v1.2_20180101,020,1,1534645691185

line -> user_77,android_pad_v1.2_20180110,021,1,1534645692186

line -> user_67,android_v1.2_20180110,0755,2,1534645693187

line -> user_82,apple_mac_v1.2_20180101,020,1,1534645694188

line -> user_2,windows_v1_20171201,020,1,1534645695189

line -> user_83,android_pad_v1.2_20180110,0755,2,1534645696191

line -> user_91,android_v1.2_20180110,0755,1,1534645697191

line -> user_9,apple_pad_v1.2_20180101,021,1,1534645698193

查看已经生成的文件

cd ~/userlog/

并且在node2上一经发现监控到了数据的产生

在node01上建立数据仓库

create database ods;

use ods;

create external table ods.userlog_external_pt(

user_id string,

client_version string,

area_id string,

user_behavior int,

time string

)

partitioned by(dt string)

row format delimited

fields terminated by ','

lines terminated by '\n'

stored as textfile

location '/data/userlog';

create database pdw;

use pdw;

##1.创建用户日志明细表

//用户id,使用的客户端版本号,客户端类型,地域id,地域名称,时间戳,曝光,点击

create table pdw.userlog_detail_pt(

user_id string,

client_version string,

client_type string,

area_id string,

area_name string,

time string,

pv_cnt int,

click_cnt int

)

partitioned by(dt string)

row format delimited

fields terminated by '\t'

lines terminated by '\n'

stored as RCFILE;

##2.创建新增用户明细表(用于计算新增用户数,累计用户)

//新增用户id,时间戳

create table pdw.new_user_detail_pt(

user_id string,

time string

)

partitioned by(dt string)

row format delimited

fields terminated by '\t'

lines terminated by '\n'

stored as RCFILE;

//导入数据:(先提前建立好 mkdir -r /home/hadoop/apps/hive_test/data/ 再rz area_data.txt client_version_type_data.txt

load data local inpath '/home/hadoop/apps/hive_test/data/area_data.txt' into table area_info;

load data local inpath '/home/hadoop/apps/hive_test/data/client_version_type_data.txt' into table rel.client_version_type_info;

在mysql上创建表

CREATE TABLE `realtime_pv_click_report_daily` (

`pv_cnt` int(11) NOT NULL,

`click_cnt` int(11) NOT NULL,

`date_time` varchar(512) DEFAULT NULL

) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

执行用户明细脚本

sh ods_user_log_2_rcfile_daily.sh 0

在hivie中

use pdw;

执行新增用户脚本

上一篇下一篇

猜你喜欢

热点阅读