数仓实战06:数仓搭建-DWT层
2020-08-06 本文已影响0人
勇于自信
1.设备主题宽表
1)建表语句
hive (gmall) > DROP TABLE
IF EXISTS dwt_uv_topic;
CREATE external TABLE dwt_uv_topic (
`mid_id` string COMMENT '设备唯一标识',
`user_id` string COMMENT '用户标识',
`version_code` string COMMENT '程序版本号',
`version_name` string COMMENT '程序版本名',
`lang` string COMMENT '系统语言',
`source` string COMMENT '渠道号',
`os` string COMMENT '安卓系统版本',
`area` string COMMENT '区域',
`model` string COMMENT '手机型号',
`brand` string COMMENT '手机品牌',
`sdk_version` string COMMENT 'sdkVersion',
`gmail` string COMMENT 'gmail',
`height_width` string COMMENT '屏幕宽高',
`app_time` string COMMENT '客户端日志产生时的时间',
`network` string COMMENT '网络模式',
`lng` string COMMENT '经度',
`lat` string COMMENT '纬度',
`login_date_first` string COMMENT '首次活跃时间',
`login_date_last` string COMMENT '末次活跃时间',
`login_day_count` BIGINT COMMENT '当日活跃次数',
`login_count` BIGINT COMMENT '累积活跃天数'
) stored AS parquet location '/warehouse/gmall/dwt/dwt_uv_topic';
2)数据装载
hive (gmall) > INSERT overwrite TABLE dwt_uv_topic SELECT
nvl (new.mid_id, old.mid_id),
nvl (new.user_id, old.user_id),
nvl (
new.version_code,
old.version_code
),
nvl (
new.version_name,
old.version_name
),
nvl (new.lang, old.lang),
nvl (new.source, old.source),
nvl (new.os, old.os),
nvl (new.area, old.area),
nvl (new.model, old.model),
nvl (new.brand, old.brand),
nvl (
new.sdk_version,
old.sdk_version
),
nvl (new.gmail, old.gmail),
nvl (
new.height_width,
old.height_width
),
nvl (new.app_time, old.app_time),
nvl (new.network, old.network),
nvl (new.lng, old.lng),
nvl (new.lat, old.lat),
IF (
old.mid_id IS NULL,
'2020-03-10',
old.login_date_first
),
IF (
new.mid_id IS NOT NULL,
'2020-03-10',
old.login_date_last
),
IF (
new.mid_id IS NOT NULL,
new.login_count,
0
),
nvl (old.login_count, 0) +
IF (new.login_count > 0, 1, 0)
FROM
(SELECT * FROM dwt_uv_topic) old
FULL OUTER JOIN (
SELECT
*
FROM
dws_uv_detail_daycount
WHERE
dt = '2020-03-10'
) new ON old.mid_id = new.mid_id;
3)查询加载结果
hive (gmall)> select * from dwt_uv_topic limit 5;
2.会员主题宽表
宽表字段怎么来?维度关联的事实表度量值+开头、结尾+累积+累积一个时间段。
1)建表语句
hive (gmall) > DROP TABLE
IF EXISTS dwt_user_topic;
CREATE external TABLE dwt_user_topic (
user_id string COMMENT '用户 id',
login_date_first string COMMENT '首次登录时间',
login_date_last string COMMENT '末次登录时间',
login_count BIGINT COMMENT '累积登录天数',
login_last_30d_count BIGINT COMMENT '最近 30 日登录天数',
order_date_first string COMMENT '首次下单时间',
order_date_last string COMMENT '末次下单时间',
order_count BIGINT COMMENT '累积下单次数',
order_amount DECIMAL (16, 2) COMMENT '累积下单金额',
order_last_30d_count BIGINT COMMENT '最近 30 日下单次数',
order_last_30d_amount BIGINT COMMENT '最近 30 日下单金额',
payment_date_first string COMMENT '首次支付时间',
payment_date_last string COMMENT '末次支付时间',
payment_count DECIMAL (16, 2) COMMENT '累积支付次数',
payment_amount DECIMAL (16, 2) COMMENT '累积支付金额',
payment_last_30d_count DECIMAL (16, 2) COMMENT '最近 30 日支付次数',
payment_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日支付金额'
) COMMENT '用户主题宽表' stored AS parquet location '/warehouse/gmall/dwt/dwt_user_topic/' tblproperties (
"parquet.compression" = "lzo"
);
2)数据装载
hive (gmall) > INSERT overwrite TABLE dwt_user_topic SELECT
nvl (new.user_id, old.user_id),
IF (
old.login_date_first IS NULL
AND new.login_count > 0,
'2020-03-10',
old.login_date_first
),
IF (
new.login_count > 0,
'2020-03-10',
old.login_date_last
),
nvl (old.login_count, 0) +
IF (new.login_count > 0, 1, 0),
nvl (new.login_last_30d_count, 0),
IF (
old.order_date_first IS NULL
AND new.order_count > 0,
'2020-03-10',
old.order_date_first
),
IF (
new.order_count > 0,
'2020-03-10',
old.order_date_last
),
nvl (old.order_count, 0) + nvl (new.order_count, 0),
nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
nvl (new.order_last_30d_count, 0),
nvl (
new.order_last_30d_amount,
0
),
IF (
old.payment_date_first IS NULL
AND new.payment_count > 0,
'2020-03-10',
old.payment_date_first
),
IF (
new.payment_count > 0,
'2020-03-10',
old.payment_date_last
),
nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
nvl (old.payment_amount, 0) + nvl (new.payment_amount, 0),
nvl (
new.payment_last_30d_count,
0
),
nvl (
new.payment_last_30d_amount,
0
)
FROM
dwt_user_topic old
FULL OUTER JOIN (
SELECT
user_id,
sum(
IF (
dt = '2020-03-10',
login_count,
0
)
) login_count,
sum(
IF (
dt = '2020-03-10',
order_count,
0
)
) order_count,
sum(
IF (
dt = '2020-03-10',
order_amount,
0
)
) order_amount,
sum(
IF (
dt = '2020-03-10',
payment_count,
0
)
) payment_count,
sum(
IF (
dt = '2020-03-10',
payment_amount,
0
)
) payment_amount,
sum(IF(login_count > 0, 1, 0)) login_last_30d_count,
sum(order_count) order_last_30d_count,
sum(order_amount) order_last_30d_amount,
sum(payment_count) payment_last_30d_count,
sum(payment_amount) payment_last_30d_amount
FROM
dws_user_action_daycount
WHERE
dt >= date_add('2020-03-10' ,- 30)
GROUP BY
user_id
) new ON old.user_id = new.user_id;
3)查询加载结果
hive (gmall)> select * from dwt_user_topic limit 5;
3.商品主题宽表
1)建表语句
hive (gmall) > DROP TABLE
IF EXISTS dwt_sku_topic;
CREATE external TABLE dwt_sku_topic (
sku_id string COMMENT 'sku_id',
spu_id string COMMENT 'spu_id',
order_last_30d_count BIGINT COMMENT '最近 30 日被下单次数',
order_last_30d_num BIGINT COMMENT '最近 30 日被下单件数',
order_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日被下单金额',
order_count BIGINT COMMENT '累积被下单次数',
order_num BIGINT COMMENT '累积被下单件数',
order_amount DECIMAL (16, 2) COMMENT '累积被下单金额',
payment_last_30d_count BIGINT COMMENT '最近 30 日被支付次数',
payment_last_30d_num BIGINT COMMENT '最近 30 日被支付件数',
payment_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日被支付金额',
payment_count BIGINT COMMENT '累积被支付次数',
payment_num BIGINT COMMENT '累积被支付件数',
payment_amount DECIMAL (16, 2) COMMENT '累积被支付金额',
refund_last_30d_count BIGINT COMMENT '最近三十日退款次数',
refund_last_30d_num BIGINT COMMENT '最近三十日退款件数',
refund_last_30d_amount DECIMAL (10, 2) COMMENT '最近三十日退款金额',
refund_count BIGINT COMMENT '累积退款次数',
refund_num BIGINT COMMENT '累积退款件数',
refund_amount DECIMAL (10, 2) COMMENT '累积退款金额',
cart_last_30d_count BIGINT COMMENT '最近 30 日被加入购物车次数',
cart_last_30d_num BIGINT COMMENT '最近 30 日被加入购物车件数',
cart_count BIGINT COMMENT '累积被加入购物车次数',
cart_num BIGINT COMMENT '累积被加入购物车件数',
favor_last_30d_count BIGINT COMMENT '最近 30 日被收藏次数',
favor_count BIGINT COMMENT '累积被收藏次数',
appraise_last_30d_good_count BIGINT COMMENT '最近 30 日好评数',
appraise_last_30d_mid_count BIGINT COMMENT '最近 30 日中评数',
appraise_last_30d_bad_count BIGINT COMMENT '最近 30 日差评数',
appraise_last_30d_default_count BIGINT COMMENT '最近 30 日默认评价数',
appraise_good_count BIGINT COMMENT '累积好评数',
appraise_mid_count BIGINT COMMENT '累积中评数',
appraise_bad_count BIGINT COMMENT '累积差评数',
appraise_default_count BIGINT COMMENT '累积默认评价数'
) COMMENT '商品主题宽表' stored AS parquet location '/warehouse/gmall/dwt/dwt_sku_topic/' tblproperties (
"parquet.compression" = "lzo"
);
2)数据装载
hive (gmall) > INSERT overwrite TABLE dwt_sku_topic SELECT
nvl (new.sku_id, old.sku_id),
sku_info.spu_id,
nvl (new.order_count30, 0),
nvl (new.order_num30, 0),
nvl (new.order_amount30, 0),
nvl (old.order_count, 0) + nvl (new.order_count, 0),
nvl (old.order_num, 0) + nvl (new.order_num, 0),
nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
nvl (new.payment_count30, 0),
nvl (new.payment_num30, 0),
nvl (new.payment_amount30, 0),
nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
nvl (old.payment_num, 0) + nvl (new.payment_count, 0),
nvl (old.payment_amount, 0) + nvl (new.payment_count, 0),
nvl (new.refund_count30, 0),
nvl (new.refund_num30, 0),
nvl (new.refund_amount30, 0),
nvl (old.refund_count, 0) + nvl (new.refund_count, 0),
nvl (old.refund_num, 0) + nvl (new.refund_num, 0),
nvl (old.refund_amount, 0) + nvl (new.refund_amount, 0),
nvl (new.cart_count30, 0),
nvl (new.cart_num30, 0),
nvl (old.cart_count, 0) + nvl (new.cart_count, 0),
nvl (old.cart_num, 0) + nvl (new.cart_num, 0),
nvl (new.favor_count30, 0),
nvl (old.favor_count, 0) + nvl (new.favor_count, 0),
nvl (
new.appraise_good_count30,
0
),
nvl (new.appraise_mid_count30, 0),
nvl (new.appraise_bad_count30, 0),
nvl (
new.appraise_default_count30,
0
),
nvl (old.appraise_good_count, 0) + nvl (new.appraise_good_count, 0),
nvl (old.appraise_mid_count, 0) + nvl (new.appraise_mid_count, 0),
nvl (old.appraise_bad_count, 0) + nvl (new.appraise_bad_count, 0),
nvl (
old.appraise_default_count,
0
) + nvl (
new.appraise_default_count,
0
)
FROM
(
SELECT
sku_id,
spu_id,
order_last_30d_count,
order_last_30d_num,
order_last_30d_amount,
order_count,
order_num,
order_amount,
payment_last_30d_count,
payment_last_30d_num,
payment_last_30d_amount,
payment_count,
payment_num,
payment_amount,
refund_last_30d_count,
refund_last_30d_num,
refund_last_30d_amount,
refund_count,
refund_num,
refund_amount,
cart_last_30d_count,
cart_last_30d_num,
cart_count,
cart_num,
favor_last_30d_count,
favor_count,
appraise_last_30d_good_count,
appraise_last_30d_mid_count,
appraise_last_30d_bad_count,
appraise_last_30d_default_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
FROM
dwt_sku_topic
) old
FULL OUTER JOIN (
SELECT
sku_id,
sum(
IF (
dt = '2020-03-10',
order_count,
0
)
) order_count,
sum(
IF (
dt = '2020-03-10',
order_num,
0
)
) order_num,
sum(
IF (
dt = '2020-03-10',
order_amount,
0
)
) order_amount,
sum(
IF (
dt = '2020-03-10',
payment_count,
0
)
) payment_count,
sum(
IF (
dt = '2020-03-10',
payment_num,
0
)
) payment_num,
sum(
IF (
dt = '2020-03-10',
payment_amount,
0
)
) payment_amount,
sum(
IF (
dt = '2020-03-10',
refund_count,
0
)
) refund_count,
sum(
IF (
dt = '2020-03-10',
refund_num,
0
)
) refund_num,
sum(
IF (
dt = '2020-03-10',
refund_amount,
0
)
) refund_amount,
sum(
IF (
dt = '2020-03-10',
cart_count,
0
)
) cart_count,
sum(
IF (dt = '2020-03-10', cart_num, 0)
) cart_num,
sum(
IF (
dt = '2020-03-10',
favor_count,
0
)
) favor_count,
sum(
IF (
dt = '2020-03-10',
appraise_good_count,
0
)
) appraise_good_count,
sum(
IF (
dt = '2020-03-10',
appraise_mid_count,
0
)
) appraise_mid_count,
sum(
IF (
dt = '2020-03-10',
appraise_bad_count,
0
)
) appraise_bad_count,
sum(
IF (
dt = '2020-03-10',
appraise_default_count,
0
)
) appraise_default_count,
sum(order_count) order_count30,
sum(order_num) order_num30,
sum(order_amount) order_amount30,
sum(payment_count) payment_count30,
sum(payment_num) payment_num30,
sum(payment_amount) payment_amount30,
sum(refund_count) refund_count30,
sum(refund_num) refund_num30,
sum(refund_amount) refund_amount30,
sum(cart_count) cart_count30,
sum(cart_num) cart_num30,
sum(favor_count) favor_count30,
sum(appraise_good_count) appraise_good_count30,
sum(appraise_mid_count) appraise_mid_count30,
sum(appraise_bad_count) appraise_bad_count30,
sum(appraise_default_count) appraise_default_count30
FROM
dws_sku_action_daycount
WHERE
dt >= date_add('2020-03-10', - 30)
GROUP BY
sku_id
) new ON new.sku_id = old.sku_id
LEFT JOIN (
SELECT
*
FROM
dwd_dim_sku_info
WHERE
dt = '2020-03-10'
) sku_info ON nvl (new.sku_id, old.sku_id) = sku_info.id;
3)查询加载结果
hive (gmall)> select * from dwt_sku_topic limit 5;
4.优惠券主题宽表
1)建表语句
hive (gmall) > DROP TABLE
IF EXISTS dwt_coupon_topic;
CREATE external TABLE dwt_coupon_topic (
`coupon_id` string COMMENT '优惠券 ID',
`get_day_count` BIGINT COMMENT '当日领用次数',
`using_day_count` BIGINT COMMENT '当日使用(下单)次数',
`used_day_count` BIGINT COMMENT '当日使用(支付)次数',
`get_count` BIGINT COMMENT '累积领用次数',
`using_count` BIGINT COMMENT '累积使用(下单)次数',
`used_count` BIGINT COMMENT '累积使用(支付)次数'
) COMMENT '购物券主题宽表' stored AS parquetlocation '/warehouse/gmall/dwt/dwt_coupon_topic/' tblproperties (
"parquet.compression" = "lzo"
);
2)数据装载
hive (gmall)> insert overwrite table dwt_coupon_topic select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0) from ( select * from dwt_coupon_topic )old full outer join ( select coupon_id, get_count, using_count, used_count from dws_coupon_use_daycount where dt='2020-03-10' )new on old.coupon_id=new.coupon_id;
3)查询加载结果
hive (gmall)> select * from dwt_coupon_topic limit 5;
5.活动主题宽表
1)建表语句
hive (gmall)> drop table if exists dwt_activity_topic;
create external table dwt_activity_topic( `id` string COMMENT '活动 id', `activity_name` string COMMENT '活动名称', `order_day_count` bigint COMMENT '当日日下单次数', `payment_day_count` bigint COMMENT '当日支付次数', `order_count` bigint COMMENT '累积下单次数', `payment_count` bigint COMMENT '累积支付次数' ) COMMENT '活动主题宽表' row format delimited fields terminated by '\t' location '/warehouse/gmall/dwt/dwt_activity_topic/' tblproperties ("parquet.compression"="lzo");
2)数据装载
hive (gmall)> insert overwrite table dwt_activity_topic select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0) from ( select * from dwt_activity_topic )old full outer join ( select id, activity_name, order_count, payment_count from dws_activity_info_daycount where dt='2020-03-10' )new on old.id=new.id;
3)查询加载结果
hive (gmall)> select * from dwt_activity_topic limit 5;
6.DWT层数据导入脚本
1)在/home/atguigu/bin 目录下创建脚本 dws_to_dwt.sh
[atguigu@hadoop102 bin]$ vim dws_to_dwt.sh
在脚本中填写如下内容
#!/bin/bash
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;
then
do
_date=$1
else
do
_date=`date -d "-1 day" +%F`
fi
sql="
INSERT overwrite TABLE $ { APP }.dwt_uv_topic SELECT
nvl (new.mid_id, old.mid_id),
nvl (new.user_id, old.user_id),
nvl (
new.version_code,
old.version_code
),
nvl (
new.version_name,
old.version_name
),
nvl (new.lang, old.lang),
nvl (new.source, old.source),
nvl (new.os, old.os),
nvl (new.area, old.area),
nvl (new.model, old.model),
nvl (new.brand, old.brand),
nvl (
new.sdk_version,
old.sdk_version
),
nvl (new.gmail, old.gmail),
nvl (
new.height_width,
old.height_width
),
nvl (new.app_time, old.app_time),
nvl (new.network, old.network),
nvl (new.lng, old.lng),
nvl (new.lat, old.lat),
nvl (
old.login_date_first,
'$do_date'
),
IF (
new.login_count > 0,
'$do_date',
old.login_date_last
),
nvl (new.login_count, 0),
nvl (new.login_count, 0) + nvl (old.login_count, 0)
FROM
(
SELECT
*
FROM
$ { APP }.dwt_uv_topic
) old
FULL OUTER JOIN (
SELECT
*
FROM
$ { APP }.dws_uv_detail_daycount
WHERE
dt = '$do_date'
) new ON old.mid_id = new.mid_id;
INSERT overwrite TABLE $ { APP }.dwt_user_topic SELECT
nvl (new.user_id, old.user_id),
IF (
old.login_date_first IS NULL
AND new.login_count > 0,
'$do_date',
old.login_date_first
),
IF (
new.login_count > 0,
'$do_date',
old.login_date_last
),
nvl (old.login_count, 0) +
IF (new.login_count > 0, 1, 0),
nvl (new.login_last_30d_count, 0),
IF (
old.order_date_first IS NULL
AND new.order_count > 0,
'$do_date',
old.order_date_first
),
IF (
new.order_count > 0,
'$do_date',
old.order_date_last
),
nvl (old.order_count, 0) + nvl (new.order_count, 0),
nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
nvl (new.order_last_30d_count, 0),
nvl (
new.order_last_30d_amount,
0
),
IF (
old.payment_date_first IS NULL
AND new.payment_count > 0,
'$do_date',
old.payment_date_first
),
IF (
new.payment_count > 0,
'$do_date',
old.payment_date_last
),
nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
nvl (old.payment_amount, 0) + nvl (new.payment_amount, 0),
nvl (
new.payment_last_30d_count,
0
),
nvl (
new.payment_last_30d_amount,
0
)
FROM
(
SELECT
*
FROM
$ { APP }.dwt_user_topic
) old
FULL OUTER JOIN (
SELECT
user_id,
sum(
IF (
dt = '$do_date',
login_count,
0
)
) login_count,
sum(
IF (
dt = '$do_date',
order_count,
0
)
) order_count,
sum(
IF (
dt = '$do_date',
order_amount,
0
)
) order_amount,
sum(
IF (
dt = '$do_date',
payment_count,
0
)
) payment_count,
sum(
IF (
dt = '$do_date',
payment_amount,
0
)
) payment_amount,
sum(IF(order_count > 0, 1, 0)) login_last_30d_count,
sum(order_count) order_last_30d_count,
sum(order_amount) order_last_30d_amount,
sum(payment_count) payment_last_30d_count,
sum(payment_amount) payment_last_30d_amount
FROM
$ { APP }.dws_user_action_daycount
WHERE
dt >= date_add('$do_date' ,- 30)
GROUP BY
user_id
) new ON old.user_id = new.user_id;
WITH sku_act AS (
SELECT
sku_id,
sum(
IF (
dt = '$do_date',
order_count,
0
)
) order_count,
sum(
IF (dt = '$do_date', order_num, 0)
) order_num,
sum(
IF (
dt = '$do_date',
order_amount,
0
)
) order_amount,
sum(
IF (
dt = '$do_date',
payment_count,
0
)
) payment_count,
sum(
IF (
dt = '$do_date',
payment_num,
0
)
) payment_num,
sum(
IF (
dt = '$do_date',
payment_amount,
0
)
) payment_amount,
sum(
IF (
dt = '$do_date',
refund_count,
0
)
) refund_count,
sum(
IF (dt = '$do_date', refund_num, 0)
) refund_num,
sum(
IF (
dt = '$do_date',
refund_amount,
0
)
) refund_amount,
sum(
IF (dt = '$do_date', cart_count, 0)
) cart_count,
sum(
IF (dt = '$do_date', cart_num, 0)
) cart_num,
sum(
IF (
dt = '$do_date',
favor_count,
0
)
) favor_count,
sum(
IF (
dt = '$do_date',
appraise_good_count,
0
)
) appraise_good_count,
sum(
IF (
dt = '$do_date',
appraise_mid_count,
0
)
) appraise_mid_count,
sum(
IF (
dt = '$do_date',
appraise_bad_count,
0
)
) appraise_bad_count,
sum(
IF (
dt = '$do_date',
appraise_default_count,
0
)
) appraise_default_count,
sum(order_count) order_count30,
sum(order_num) order_num30,
sum(order_amount) order_amount30,
sum(payment_count) payment_count30,
sum(payment_num) payment_num30,
sum(payment_amount) payment_amount30,
sum(refund_count) refund_count30,
sum(refund_num) refund_num30,
sum(refund_amount) refund_amount30,
sum(cart_count) cart_count30,
sum(cart_num) cart_num30,
sum(favor_count) favor_count30,
sum(appraise_good_count) appraise_good_count30,
sum(appraise_mid_count) appraise_mid_count30,
sum(appraise_bad_count) appraise_bad_count30,
sum(appraise_default_count) appraise_default_count30
FROM
$ { APP }.dws_sku_action_daycount
WHERE
dt >= date_add('$do_date' ,- 30)
GROUP BY
sku_id
),
sku_topic AS (
SELECT
sku_id,
spu_id,
order_last_30d_count,
order_last_30d_num,
order_last_30d_amount,
order_count,
order_num,
order_amount,
payment_last_30d_count,
payment_last_30d_num,
payment_last_30d_amount,
payment_count,
payment_num,
payment_amount,
refund_last_30d_count,
refund_last_30d_num,
refund_last_30d_amount,
refund_count,
refund_num,
refund_amount,
cart_last_30d_count,
cart_last_30d_num,
cart_count,
cart_num,
favor_last_30d_count,
favor_count,
appraise_last_30d_good_count,
appraise_last_30d_mid_count,
appraise_last_30d_bad_count,
appraise_last_30d_default_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
FROM
$ { APP }.dwt_sku_topic
) INSERT overwrite TABLE $ { APP }.dwt_sku_topic SELECT
nvl (
sku_act.sku_id,
sku_topic.sku_id
),
sku_info.spu_id,
nvl (sku_act.order_count30, 0),
nvl (sku_act.order_num30, 0),
nvl (sku_act.order_amount30, 0),
nvl (sku_topic.order_count, 0) + nvl (sku_act.order_count, 0),
nvl (sku_topic.order_num, 0) + nvl (sku_act.order_num, 0),
nvl (sku_topic.order_amount, 0) + nvl (sku_act.order_amount, 0),
nvl (sku_act.payment_count30, 0),
nvl (sku_act.payment_num30, 0),
nvl (sku_act.payment_amount30, 0),
nvl (sku_topic.payment_count, 0) + nvl (sku_act.payment_count, 0),
nvl (sku_topic.payment_num, 0) + nvl (sku_act.payment_count, 0),
nvl (sku_topic.payment_amount, 0) + nvl (sku_act.payment_count, 0),
nvl (refund_count30, 0),
nvl (sku_act.refund_num30, 0),
nvl (sku_act.refund_amount30, 0),
nvl (sku_topic.refund_count, 0) + nvl (sku_act.refund_count, 0),
nvl (sku_topic.refund_num, 0) + nvl (sku_act.refund_num, 0),
nvl (sku_topic.refund_amount, 0) + nvl (sku_act.refund_amount, 0),
nvl (sku_act.cart_count30, 0),
nvl (sku_act.cart_num30, 0),
nvl (sku_topic.cart_count, 0) + nvl (sku_act.cart_count, 0),
nvl (sku_topic.cart_num, 0) + nvl (sku_act.cart_num, 0),
nvl (sku_act.favor_count30, 0),
nvl (sku_topic.favor_count, 0) + nvl (sku_act.favor_count, 0),
nvl (
sku_act.appraise_good_count30,
0
),
nvl (
sku_act.appraise_mid_count30,
0
),
nvl (
sku_act.appraise_bad_count30,
0
),
nvl (
sku_act.appraise_default_count30,
0
),
nvl (
sku_topic.appraise_good_count,
0
) + nvl (
sku_act.appraise_good_count,
0
),
nvl (
sku_topic.appraise_mid_count,
0
) + nvl (
sku_act.appraise_mid_count,
0
),
nvl (
sku_topic.appraise_bad_count,
0
) + nvl (
sku_act.appraise_bad_count,
0
),
nvl (
sku_topic.appraise_default_count,
0
) + nvl (
sku_act.appraise_default_count,
0
)
FROM
sku_act
FULL OUTER JOIN sku_topic ON sku_act.sku_id = sku_topic.sku_id
LEFT JOIN (
SELECT
*
FROM
$ { APP }.dwd_dim_sku_info
WHERE
dt = '$do_date'
) sku_info ON nvl (
sku_topic.sku_id,
sku_act.sku_id
) = sku_info.id;
INSERT overwrite TABLE $ { APP }.dwt_coupon_topic SELECT
nvl (
new.coupon_id,
old.coupon_id
),
nvl (new.get_count, 0),
nvl (new.using_count, 0),
nvl (new.used_count, 0),
nvl (old.get_count, 0) + nvl (new.get_count, 0),
nvl (old.using_count, 0) + nvl (new.using_count, 0),
nvl (old.used_count, 0) + nvl (new.used_count, 0)
FROM
(
SELECT
*
FROM
$ { APP }.dwt_coupon_topic
) old
FULL OUTER JOIN (
SELECT
coupon_id,
get_count,
using_count,
used_count
FROM
$ { APP }.dws_coupon_use_daycount
WHERE
dt = '$do_date'
) new ON old.coupon_id = new.coupon_id;
INSERT overwrite TABLE $ { APP }.dwt_activity_topic SELECT
nvl (new.id, old.id),
nvl (
new.activity_name,
old.activity_name
),
nvl (new.order_count, 0),
nvl (new.payment_count, 0),
nvl (old.order_count, 0) + nvl (new.order_count, 0),
nvl (old.payment_count, 0) + nvl (new.payment_count, 0)
FROM
(
SELECT
*
FROM
$ { APP }.dwt_activity_topic
) old
FULL OUTER JOIN (
SELECT
id,
activity_name,
order_count,
payment_count
FROM
$ { APP }.dws_activity_info_daycount
WHERE
dt = '$do_date'
) new ON old.id = new.id;
"
$hive -e "$sql"
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 dws_to_dwt.sh
3)执行脚本导入数据
[atguigu@hadoop102 bin]$ dws_to_dwt.sh 2020-03-11
4)查看导入数据
hive (gmall)>
select * from dwt_uv_topic limit 5;
select * from dwt_user_topic limit 5;
select * from dwt_sku_topic limit 5;
select * from dwt_coupon_topic limit 5;
select * from dwt_activity_topic limit 5;