数仓实战2(美团架构)

数仓实战06:数仓搭建-DWT层

2020-08-06  本文已影响0人  勇于自信
1.设备主题宽表

1)建表语句

hive (gmall) > DROP TABLE
IF EXISTS dwt_uv_topic;

CREATE external TABLE dwt_uv_topic (
    `mid_id` string COMMENT '设备唯一标识',
    `user_id` string COMMENT '用户标识',
    `version_code` string COMMENT '程序版本号',
    `version_name` string COMMENT '程序版本名',
    `lang` string COMMENT '系统语言',
    `source` string COMMENT '渠道号',
    `os` string COMMENT '安卓系统版本',
    `area` string COMMENT '区域',
    `model` string COMMENT '手机型号',
    `brand` string COMMENT '手机品牌',
    `sdk_version` string COMMENT 'sdkVersion',
    `gmail` string COMMENT 'gmail',
    `height_width` string COMMENT '屏幕宽高',
    `app_time` string COMMENT '客户端日志产生时的时间',
    `network` string COMMENT '网络模式',
    `lng` string COMMENT '经度',
    `lat` string COMMENT '纬度',
    `login_date_first` string COMMENT '首次活跃时间',
    `login_date_last` string COMMENT '末次活跃时间',
    `login_day_count` BIGINT COMMENT '当日活跃次数',
    `login_count` BIGINT COMMENT '累积活跃天数'
) stored AS parquet location '/warehouse/gmall/dwt/dwt_uv_topic';

2)数据装载

hive (gmall) > INSERT overwrite TABLE dwt_uv_topic SELECT
    nvl (new.mid_id, old.mid_id),
    nvl (new.user_id, old.user_id),
    nvl (
        new.version_code,
        old.version_code
    ),
    nvl (
        new.version_name,
        old.version_name
    ),
    nvl (new.lang, old.lang),
    nvl (new.source, old.source),
    nvl (new.os, old.os),
    nvl (new.area, old.area),
    nvl (new.model, old.model),
    nvl (new.brand, old.brand),
    nvl (
        new.sdk_version,
        old.sdk_version
    ),
    nvl (new.gmail, old.gmail),
    nvl (
        new.height_width,
        old.height_width
    ),
    nvl (new.app_time, old.app_time),
    nvl (new.network, old.network),
    nvl (new.lng, old.lng),
    nvl (new.lat, old.lat),

IF (
    old.mid_id IS NULL,
    '2020-03-10',
    old.login_date_first
),

IF (
    new.mid_id IS NOT NULL,
    '2020-03-10',
    old.login_date_last
),

IF (
    new.mid_id IS NOT NULL,
    new.login_count,
    0
),
 nvl (old.login_count, 0) +
IF (new.login_count > 0, 1, 0)
FROM
    (SELECT * FROM dwt_uv_topic) old
FULL OUTER JOIN (
    SELECT
        *
    FROM
        dws_uv_detail_daycount
    WHERE
        dt = '2020-03-10'
) new ON old.mid_id = new.mid_id;

3)查询加载结果
hive (gmall)> select * from dwt_uv_topic limit 5;

2.会员主题宽表

宽表字段怎么来?维度关联的事实表度量值+开头、结尾+累积+累积一个时间段。


1)建表语句

hive (gmall) > DROP TABLE
IF EXISTS dwt_user_topic;

CREATE external TABLE dwt_user_topic (
    user_id string COMMENT '用户 id',
    login_date_first string COMMENT '首次登录时间',
    login_date_last string COMMENT '末次登录时间',
    login_count BIGINT COMMENT '累积登录天数',
    login_last_30d_count BIGINT COMMENT '最近 30 日登录天数',
    order_date_first string COMMENT '首次下单时间',
    order_date_last string COMMENT '末次下单时间',
    order_count BIGINT COMMENT '累积下单次数',
    order_amount DECIMAL (16, 2) COMMENT '累积下单金额',
    order_last_30d_count BIGINT COMMENT '最近 30 日下单次数',
    order_last_30d_amount BIGINT COMMENT '最近 30 日下单金额',
    payment_date_first string COMMENT '首次支付时间',
    payment_date_last string COMMENT '末次支付时间',
    payment_count DECIMAL (16, 2) COMMENT '累积支付次数',
    payment_amount DECIMAL (16, 2) COMMENT '累积支付金额',
    payment_last_30d_count DECIMAL (16, 2) COMMENT '最近 30 日支付次数',
    payment_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日支付金额'
) COMMENT '用户主题宽表' stored AS parquet location '/warehouse/gmall/dwt/dwt_user_topic/' tblproperties (
    "parquet.compression" = "lzo"
);

2)数据装载

hive (gmall) > INSERT overwrite TABLE dwt_user_topic SELECT
    nvl (new.user_id, old.user_id),

IF (
    old.login_date_first IS NULL
    AND new.login_count > 0,
    '2020-03-10',
    old.login_date_first
),

IF (
    new.login_count > 0,
    '2020-03-10',
    old.login_date_last
),
 nvl (old.login_count, 0) +
IF (new.login_count > 0, 1, 0),
 nvl (new.login_last_30d_count, 0),

IF (
    old.order_date_first IS NULL
    AND new.order_count > 0,
    '2020-03-10',
    old.order_date_first
),

IF (
    new.order_count > 0,
    '2020-03-10',
    old.order_date_last
),
 nvl (old.order_count, 0) + nvl (new.order_count, 0),
 nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
 nvl (new.order_last_30d_count, 0),
 nvl (
    new.order_last_30d_amount,
    0
),

IF (
    old.payment_date_first IS NULL
    AND new.payment_count > 0,
    '2020-03-10',
    old.payment_date_first
),

IF (
    new.payment_count > 0,
    '2020-03-10',
    old.payment_date_last
),
 nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
 nvl (old.payment_amount, 0) + nvl (new.payment_amount, 0),
 nvl (
    new.payment_last_30d_count,
    0
),
 nvl (
    new.payment_last_30d_amount,
    0
)
FROM
    dwt_user_topic old
FULL OUTER JOIN (
    SELECT
        user_id,
        sum(

            IF (
                dt = '2020-03-10',
                login_count,
                0
            )
        ) login_count,
        sum(

            IF (
                dt = '2020-03-10',
                order_count,
                0
            )
        ) order_count,
        sum(

            IF (
                dt = '2020-03-10',
                order_amount,
                0
            )
        ) order_amount,
        sum(

            IF (
                dt = '2020-03-10',
                payment_count,
                0
            )
        ) payment_count,
        sum(

            IF (
                dt = '2020-03-10',
                payment_amount,
                0
            )
        ) payment_amount,
        sum(IF(login_count > 0, 1, 0)) login_last_30d_count,
        sum(order_count) order_last_30d_count,
        sum(order_amount) order_last_30d_amount,
        sum(payment_count) payment_last_30d_count,
        sum(payment_amount) payment_last_30d_amount
    FROM
        dws_user_action_daycount
    WHERE
        dt >= date_add('2020-03-10' ,- 30)
    GROUP BY
        user_id
) new ON old.user_id = new.user_id;

3)查询加载结果
hive (gmall)> select * from dwt_user_topic limit 5;

3.商品主题宽表

1)建表语句

hive (gmall) > DROP TABLE
IF EXISTS dwt_sku_topic;

CREATE external TABLE dwt_sku_topic (
    sku_id string COMMENT 'sku_id',
    spu_id string COMMENT 'spu_id',
    order_last_30d_count BIGINT COMMENT '最近 30 日被下单次数',
    order_last_30d_num BIGINT COMMENT '最近 30 日被下单件数',
    order_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日被下单金额',
    order_count BIGINT COMMENT '累积被下单次数',
    order_num BIGINT COMMENT '累积被下单件数',
    order_amount DECIMAL (16, 2) COMMENT '累积被下单金额',
    payment_last_30d_count BIGINT COMMENT '最近 30 日被支付次数',
    payment_last_30d_num BIGINT COMMENT '最近 30 日被支付件数',
    payment_last_30d_amount DECIMAL (16, 2) COMMENT '最近 30 日被支付金额',
    payment_count BIGINT COMMENT '累积被支付次数',
    payment_num BIGINT COMMENT '累积被支付件数',
    payment_amount DECIMAL (16, 2) COMMENT '累积被支付金额',
    refund_last_30d_count BIGINT COMMENT '最近三十日退款次数',
    refund_last_30d_num BIGINT COMMENT '最近三十日退款件数',
    refund_last_30d_amount DECIMAL (10, 2) COMMENT '最近三十日退款金额',
    refund_count BIGINT COMMENT '累积退款次数',
    refund_num BIGINT COMMENT '累积退款件数',
    refund_amount DECIMAL (10, 2) COMMENT '累积退款金额',
    cart_last_30d_count BIGINT COMMENT '最近 30 日被加入购物车次数',
    cart_last_30d_num BIGINT COMMENT '最近 30 日被加入购物车件数',
    cart_count BIGINT COMMENT '累积被加入购物车次数',
    cart_num BIGINT COMMENT '累积被加入购物车件数',
    favor_last_30d_count BIGINT COMMENT '最近 30 日被收藏次数',
    favor_count BIGINT COMMENT '累积被收藏次数',
    appraise_last_30d_good_count BIGINT COMMENT '最近 30 日好评数',
    appraise_last_30d_mid_count BIGINT COMMENT '最近 30 日中评数',
    appraise_last_30d_bad_count BIGINT COMMENT '最近 30 日差评数',
    appraise_last_30d_default_count BIGINT COMMENT '最近 30 日默认评价数',
    appraise_good_count BIGINT COMMENT '累积好评数',
    appraise_mid_count BIGINT COMMENT '累积中评数',
    appraise_bad_count BIGINT COMMENT '累积差评数',
    appraise_default_count BIGINT COMMENT '累积默认评价数'
) COMMENT '商品主题宽表' stored AS parquet location '/warehouse/gmall/dwt/dwt_sku_topic/' tblproperties (
    "parquet.compression" = "lzo"
);

2)数据装载

hive (gmall) > INSERT overwrite TABLE dwt_sku_topic SELECT
    nvl (new.sku_id, old.sku_id),
    sku_info.spu_id,
    nvl (new.order_count30, 0),
    nvl (new.order_num30, 0),
    nvl (new.order_amount30, 0),
    nvl (old.order_count, 0) + nvl (new.order_count, 0),
    nvl (old.order_num, 0) + nvl (new.order_num, 0),
    nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
    nvl (new.payment_count30, 0),
    nvl (new.payment_num30, 0),
    nvl (new.payment_amount30, 0),
    nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
    nvl (old.payment_num, 0) + nvl (new.payment_count, 0),
    nvl (old.payment_amount, 0) + nvl (new.payment_count, 0),
    nvl (new.refund_count30, 0),
    nvl (new.refund_num30, 0),
    nvl (new.refund_amount30, 0),
    nvl (old.refund_count, 0) + nvl (new.refund_count, 0),
    nvl (old.refund_num, 0) + nvl (new.refund_num, 0),
    nvl (old.refund_amount, 0) + nvl (new.refund_amount, 0),
    nvl (new.cart_count30, 0),
    nvl (new.cart_num30, 0),
    nvl (old.cart_count, 0) + nvl (new.cart_count, 0),
    nvl (old.cart_num, 0) + nvl (new.cart_num, 0),
    nvl (new.favor_count30, 0),
    nvl (old.favor_count, 0) + nvl (new.favor_count, 0),
    nvl (
        new.appraise_good_count30,
        0
    ),
    nvl (new.appraise_mid_count30, 0),
    nvl (new.appraise_bad_count30, 0),
    nvl (
        new.appraise_default_count30,
        0
    ),
    nvl (old.appraise_good_count, 0) + nvl (new.appraise_good_count, 0),
    nvl (old.appraise_mid_count, 0) + nvl (new.appraise_mid_count, 0),
    nvl (old.appraise_bad_count, 0) + nvl (new.appraise_bad_count, 0),
    nvl (
        old.appraise_default_count,
        0
    ) + nvl (
        new.appraise_default_count,
        0
    )
FROM
    (
        SELECT
            sku_id,
            spu_id,
            order_last_30d_count,
            order_last_30d_num,
            order_last_30d_amount,
            order_count,
            order_num,
            order_amount,
            payment_last_30d_count,
            payment_last_30d_num,
            payment_last_30d_amount,
            payment_count,
            payment_num,
            payment_amount,
            refund_last_30d_count,
            refund_last_30d_num,
            refund_last_30d_amount,
            refund_count,
            refund_num,
            refund_amount,
            cart_last_30d_count,
            cart_last_30d_num,
            cart_count,
            cart_num,
            favor_last_30d_count,
            favor_count,
            appraise_last_30d_good_count,
            appraise_last_30d_mid_count,
            appraise_last_30d_bad_count,
            appraise_last_30d_default_count,
            appraise_good_count,
            appraise_mid_count,
            appraise_bad_count,
            appraise_default_count
        FROM
            dwt_sku_topic
    ) old
FULL OUTER JOIN (
    SELECT
        sku_id,
        sum(

            IF (
                dt = '2020-03-10',
                order_count,
                0
            )
        ) order_count,
        sum(

            IF (
                dt = '2020-03-10',
                order_num,
                0
            )
        ) order_num,
        sum(

            IF (
                dt = '2020-03-10',
                order_amount,
                0
            )
        ) order_amount,
        sum(

            IF (
                dt = '2020-03-10',
                payment_count,
                0
            )
        ) payment_count,
        sum(

            IF (
                dt = '2020-03-10',
                payment_num,
                0
            )
        ) payment_num,
        sum(

            IF (
                dt = '2020-03-10',
                payment_amount,
                0
            )
        ) payment_amount,
        sum(

            IF (
                dt = '2020-03-10',
                refund_count,
                0
            )
        ) refund_count,
        sum(

            IF (
                dt = '2020-03-10',
                refund_num,
                0
            )
        ) refund_num,
        sum(

            IF (
                dt = '2020-03-10',
                refund_amount,
                0
            )
        ) refund_amount,
        sum(

            IF (
                dt = '2020-03-10',
                cart_count,
                0
            )
        ) cart_count,
        sum(

            IF (dt = '2020-03-10', cart_num, 0)
        ) cart_num,
        sum(

            IF (
                dt = '2020-03-10',
                favor_count,
                0
            )
        ) favor_count,
        sum(

            IF (
                dt = '2020-03-10',
                appraise_good_count,
                0
            )
        ) appraise_good_count,
        sum(

            IF (
                dt = '2020-03-10',
                appraise_mid_count,
                0
            )
        ) appraise_mid_count,
        sum(

            IF (
                dt = '2020-03-10',
                appraise_bad_count,
                0
            )
        ) appraise_bad_count,
        sum(

            IF (
                dt = '2020-03-10',
                appraise_default_count,
                0
            )
        ) appraise_default_count,
        sum(order_count) order_count30,
        sum(order_num) order_num30,
        sum(order_amount) order_amount30,
        sum(payment_count) payment_count30,
        sum(payment_num) payment_num30,
        sum(payment_amount) payment_amount30,
        sum(refund_count) refund_count30,
        sum(refund_num) refund_num30,
        sum(refund_amount) refund_amount30,
        sum(cart_count) cart_count30,
        sum(cart_num) cart_num30,
        sum(favor_count) favor_count30,
        sum(appraise_good_count) appraise_good_count30,
        sum(appraise_mid_count) appraise_mid_count30,
        sum(appraise_bad_count) appraise_bad_count30,
        sum(appraise_default_count) appraise_default_count30
    FROM
        dws_sku_action_daycount
    WHERE
        dt >= date_add('2020-03-10', - 30)
    GROUP BY
        sku_id
) new ON new.sku_id = old.sku_id
LEFT JOIN (
    SELECT
        *
    FROM
        dwd_dim_sku_info
    WHERE
        dt = '2020-03-10'
) sku_info ON nvl (new.sku_id, old.sku_id) = sku_info.id;

3)查询加载结果
hive (gmall)> select * from dwt_sku_topic limit 5;

4.优惠券主题宽表

1)建表语句

hive (gmall) > DROP TABLE
IF EXISTS dwt_coupon_topic;

CREATE external TABLE dwt_coupon_topic (
    `coupon_id` string COMMENT '优惠券 ID',
    `get_day_count` BIGINT COMMENT '当日领用次数',
    `using_day_count` BIGINT COMMENT '当日使用(下单)次数',
    `used_day_count` BIGINT COMMENT '当日使用(支付)次数',
    `get_count` BIGINT COMMENT '累积领用次数',
    `using_count` BIGINT COMMENT '累积使用(下单)次数',
    `used_count` BIGINT COMMENT '累积使用(支付)次数'
) COMMENT '购物券主题宽表' stored AS parquetlocation '/warehouse/gmall/dwt/dwt_coupon_topic/' tblproperties (
    "parquet.compression" = "lzo"
);

2)数据装载

hive (gmall)> insert overwrite table dwt_coupon_topic select nvl(new.coupon_id,old.coupon_id), nvl(new.get_count,0), nvl(new.using_count,0), nvl(new.used_count,0), nvl(old.get_count,0)+nvl(new.get_count,0), nvl(old.using_count,0)+nvl(new.using_count,0), nvl(old.used_count,0)+nvl(new.used_count,0) from ( select * from dwt_coupon_topic )old full outer join ( select coupon_id, get_count, using_count, used_count from dws_coupon_use_daycount where dt='2020-03-10' )new on old.coupon_id=new.coupon_id;

3)查询加载结果
hive (gmall)> select * from dwt_coupon_topic limit 5;

5.活动主题宽表

1)建表语句

hive (gmall)> drop table if exists dwt_activity_topic;
create external table dwt_activity_topic( `id` string COMMENT '活动 id', `activity_name` string COMMENT '活动名称', `order_day_count` bigint COMMENT '当日日下单次数', `payment_day_count` bigint COMMENT '当日支付次数', `order_count` bigint COMMENT '累积下单次数', `payment_count` bigint COMMENT '累积支付次数' ) COMMENT '活动主题宽表' row format delimited fields terminated by '\t' location '/warehouse/gmall/dwt/dwt_activity_topic/' tblproperties ("parquet.compression"="lzo");

2)数据装载

hive (gmall)> insert overwrite table dwt_activity_topic select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.order_count,0), nvl(new.payment_count,0), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.payment_count,0)+nvl(new.payment_count,0) from ( select * from dwt_activity_topic )old full outer join ( select id, activity_name, order_count, payment_count from dws_activity_info_daycount where dt='2020-03-10' )new on old.id=new.id;

3)查询加载结果
hive (gmall)> select * from dwt_activity_topic limit 5;

6.DWT层数据导入脚本

1)在/home/atguigu/bin 目录下创建脚本 dws_to_dwt.sh

[atguigu@hadoop102 bin]$ vim dws_to_dwt.sh

在脚本中填写如下内容

#!/bin/bash 
APP=gmall 
hive=/opt/module/hive/bin/hive 
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 
if [ -n "$1" ] ;
then 
do
_date=$1 
else
do
_date=`date -d "-1 day" +%F` 
fi
sql="
INSERT overwrite TABLE $ { APP }.dwt_uv_topic SELECT
    nvl (new.mid_id, old.mid_id),
    nvl (new.user_id, old.user_id),
    nvl (
        new.version_code,
        old.version_code
    ),
    nvl (
        new.version_name,
        old.version_name
    ),
    nvl (new.lang, old.lang),
    nvl (new.source, old.source),
    nvl (new.os, old.os),
    nvl (new.area, old.area),
    nvl (new.model, old.model),
    nvl (new.brand, old.brand),
    nvl (
        new.sdk_version,
        old.sdk_version
    ),
    nvl (new.gmail, old.gmail),
    nvl (
        new.height_width,
        old.height_width
    ),
    nvl (new.app_time, old.app_time),
    nvl (new.network, old.network),
    nvl (new.lng, old.lng),
    nvl (new.lat, old.lat),
    nvl (
        old.login_date_first,
        '$do_date'
    ),

IF (
    new.login_count > 0,
    '$do_date',
    old.login_date_last
),
 nvl (new.login_count, 0),
 nvl (new.login_count, 0) + nvl (old.login_count, 0)
FROM
    (
        SELECT
            *
        FROM
            $ { APP }.dwt_uv_topic
    ) old
FULL OUTER JOIN (
    SELECT
        *
    FROM
        $ { APP }.dws_uv_detail_daycount
    WHERE
        dt = '$do_date'
) new ON old.mid_id = new.mid_id;

INSERT overwrite TABLE $ { APP }.dwt_user_topic SELECT
    nvl (new.user_id, old.user_id),

IF (
    old.login_date_first IS NULL
    AND new.login_count > 0,
    '$do_date',
    old.login_date_first
),

IF (
    new.login_count > 0,
    '$do_date',
    old.login_date_last
),
 nvl (old.login_count, 0) +
IF (new.login_count > 0, 1, 0),
 nvl (new.login_last_30d_count, 0),

IF (
    old.order_date_first IS NULL
    AND new.order_count > 0,
    '$do_date',
    old.order_date_first
),

IF (
    new.order_count > 0,
    '$do_date',
    old.order_date_last
),
 nvl (old.order_count, 0) + nvl (new.order_count, 0),
 nvl (old.order_amount, 0) + nvl (new.order_amount, 0),
 nvl (new.order_last_30d_count, 0),
 nvl (
    new.order_last_30d_amount,
    0
),

IF (
    old.payment_date_first IS NULL
    AND new.payment_count > 0,
    '$do_date',
    old.payment_date_first
),

IF (
    new.payment_count > 0,
    '$do_date',
    old.payment_date_last
),
 nvl (old.payment_count, 0) + nvl (new.payment_count, 0),
 nvl (old.payment_amount, 0) + nvl (new.payment_amount, 0),
 nvl (
    new.payment_last_30d_count,
    0
),
 nvl (
    new.payment_last_30d_amount,
    0
)
FROM
    (
        SELECT
            *
        FROM
            $ { APP }.dwt_user_topic
    ) old
FULL OUTER JOIN (
    SELECT
        user_id,
        sum(

            IF (
                dt = '$do_date',
                login_count,
                0
            )
        ) login_count,
        sum(

            IF (
                dt = '$do_date',
                order_count,
                0
            )
        ) order_count,
        sum(

            IF (
                dt = '$do_date',
                order_amount,
                0
            )
        ) order_amount,
        sum(

            IF (
                dt = '$do_date',
                payment_count,
                0
            )
        ) payment_count,
        sum(

            IF (
                dt = '$do_date',
                payment_amount,
                0
            )
        ) payment_amount,
        sum(IF(order_count > 0, 1, 0)) login_last_30d_count,
        sum(order_count) order_last_30d_count,
        sum(order_amount) order_last_30d_amount,
        sum(payment_count) payment_last_30d_count,
        sum(payment_amount) payment_last_30d_amount
    FROM
        $ { APP }.dws_user_action_daycount
    WHERE
        dt >= date_add('$do_date' ,- 30)
    GROUP BY
        user_id
) new ON old.user_id = new.user_id;

WITH sku_act AS (
    SELECT
        sku_id,
        sum(

            IF (
                dt = '$do_date',
                order_count,
                0
            )
        ) order_count,
        sum(

            IF (dt = '$do_date', order_num, 0)
        ) order_num,
        sum(

            IF (
                dt = '$do_date',
                order_amount,
                0
            )
        ) order_amount,
        sum(

            IF (
                dt = '$do_date',
                payment_count,
                0
            )
        ) payment_count,
        sum(

            IF (
                dt = '$do_date',
                payment_num,
                0
            )
        ) payment_num,
        sum(

            IF (
                dt = '$do_date',
                payment_amount,
                0
            )
        ) payment_amount,
        sum(

            IF (
                dt = '$do_date',
                refund_count,
                0
            )
        ) refund_count,
        sum(

            IF (dt = '$do_date', refund_num, 0)
        ) refund_num,
        sum(

            IF (
                dt = '$do_date',
                refund_amount,
                0
            )
        ) refund_amount,
        sum(

            IF (dt = '$do_date', cart_count, 0)
        ) cart_count,
        sum(

            IF (dt = '$do_date', cart_num, 0)
        ) cart_num,
        sum(

            IF (
                dt = '$do_date',
                favor_count,
                0
            )
        ) favor_count,
        sum(

            IF (
                dt = '$do_date',
                appraise_good_count,
                0
            )
        ) appraise_good_count,
        sum(

            IF (
                dt = '$do_date',
                appraise_mid_count,
                0
            )
        ) appraise_mid_count,
        sum(

            IF (
                dt = '$do_date',
                appraise_bad_count,
                0
            )
        ) appraise_bad_count,
        sum(

            IF (
                dt = '$do_date',
                appraise_default_count,
                0
            )
        ) appraise_default_count,
        sum(order_count) order_count30,
        sum(order_num) order_num30,
        sum(order_amount) order_amount30,
        sum(payment_count) payment_count30,
        sum(payment_num) payment_num30,
        sum(payment_amount) payment_amount30,
        sum(refund_count) refund_count30,
        sum(refund_num) refund_num30,
        sum(refund_amount) refund_amount30,
        sum(cart_count) cart_count30,
        sum(cart_num) cart_num30,
        sum(favor_count) favor_count30,
        sum(appraise_good_count) appraise_good_count30,
        sum(appraise_mid_count) appraise_mid_count30,
        sum(appraise_bad_count) appraise_bad_count30,
        sum(appraise_default_count) appraise_default_count30
    FROM
        $ { APP }.dws_sku_action_daycount
    WHERE
        dt >= date_add('$do_date' ,- 30)
    GROUP BY
        sku_id
),
 sku_topic AS (
    SELECT
        sku_id,
        spu_id,
        order_last_30d_count,
        order_last_30d_num,
        order_last_30d_amount,
        order_count,
        order_num,
        order_amount,
        payment_last_30d_count,
        payment_last_30d_num,
        payment_last_30d_amount,
        payment_count,
        payment_num,
        payment_amount,
        refund_last_30d_count,
        refund_last_30d_num,
        refund_last_30d_amount,
        refund_count,
        refund_num,
        refund_amount,
        cart_last_30d_count,
        cart_last_30d_num,
        cart_count,
        cart_num,
        favor_last_30d_count,
        favor_count,
        appraise_last_30d_good_count,
        appraise_last_30d_mid_count,
        appraise_last_30d_bad_count,
        appraise_last_30d_default_count,
        appraise_good_count,
        appraise_mid_count,
        appraise_bad_count,
        appraise_default_count
    FROM
        $ { APP }.dwt_sku_topic
) INSERT overwrite TABLE $ { APP }.dwt_sku_topic SELECT
    nvl (
        sku_act.sku_id,
        sku_topic.sku_id
    ),
    sku_info.spu_id,
    nvl (sku_act.order_count30, 0),
    nvl (sku_act.order_num30, 0),
    nvl (sku_act.order_amount30, 0),
    nvl (sku_topic.order_count, 0) + nvl (sku_act.order_count, 0),
    nvl (sku_topic.order_num, 0) + nvl (sku_act.order_num, 0),
    nvl (sku_topic.order_amount, 0) + nvl (sku_act.order_amount, 0),
    nvl (sku_act.payment_count30, 0),
    nvl (sku_act.payment_num30, 0),
    nvl (sku_act.payment_amount30, 0),
    nvl (sku_topic.payment_count, 0) + nvl (sku_act.payment_count, 0),
    nvl (sku_topic.payment_num, 0) + nvl (sku_act.payment_count, 0),
    nvl (sku_topic.payment_amount, 0) + nvl (sku_act.payment_count, 0),
    nvl (refund_count30, 0),
    nvl (sku_act.refund_num30, 0),
    nvl (sku_act.refund_amount30, 0),
    nvl (sku_topic.refund_count, 0) + nvl (sku_act.refund_count, 0),
    nvl (sku_topic.refund_num, 0) + nvl (sku_act.refund_num, 0),
    nvl (sku_topic.refund_amount, 0) + nvl (sku_act.refund_amount, 0),
    nvl (sku_act.cart_count30, 0),
    nvl (sku_act.cart_num30, 0),
    nvl (sku_topic.cart_count, 0) + nvl (sku_act.cart_count, 0),
    nvl (sku_topic.cart_num, 0) + nvl (sku_act.cart_num, 0),
    nvl (sku_act.favor_count30, 0),
    nvl (sku_topic.favor_count, 0) + nvl (sku_act.favor_count, 0),
    nvl (
        sku_act.appraise_good_count30,
        0
    ),
    nvl (
        sku_act.appraise_mid_count30,
        0
    ),
    nvl (
        sku_act.appraise_bad_count30,
        0
    ),
    nvl (
        sku_act.appraise_default_count30,
        0
    ),
    nvl (
        sku_topic.appraise_good_count,
        0
    ) + nvl (
        sku_act.appraise_good_count,
        0
    ),
    nvl (
        sku_topic.appraise_mid_count,
        0
    ) + nvl (
        sku_act.appraise_mid_count,
        0
    ),
    nvl (
        sku_topic.appraise_bad_count,
        0
    ) + nvl (
        sku_act.appraise_bad_count,
        0
    ),
    nvl (
        sku_topic.appraise_default_count,
        0
    ) + nvl (
        sku_act.appraise_default_count,
        0
    )
FROM
    sku_act
FULL OUTER JOIN sku_topic ON sku_act.sku_id = sku_topic.sku_id
LEFT JOIN (
    SELECT
        *
    FROM
        $ { APP }.dwd_dim_sku_info
    WHERE
        dt = '$do_date'
) sku_info ON nvl (
    sku_topic.sku_id,
    sku_act.sku_id
) = sku_info.id;

INSERT overwrite TABLE $ { APP }.dwt_coupon_topic SELECT
    nvl (
        new.coupon_id,
        old.coupon_id
    ),
    nvl (new.get_count, 0),
    nvl (new.using_count, 0),
    nvl (new.used_count, 0),
    nvl (old.get_count, 0) + nvl (new.get_count, 0),
    nvl (old.using_count, 0) + nvl (new.using_count, 0),
    nvl (old.used_count, 0) + nvl (new.used_count, 0)
FROM
    (
        SELECT
            *
        FROM
            $ { APP }.dwt_coupon_topic
    ) old
FULL OUTER JOIN (
    SELECT
        coupon_id,
        get_count,
        using_count,
        used_count
    FROM
        $ { APP }.dws_coupon_use_daycount
    WHERE
        dt = '$do_date'
) new ON old.coupon_id = new.coupon_id;

INSERT overwrite TABLE $ { APP }.dwt_activity_topic SELECT
    nvl (new.id, old.id),
    nvl (
        new.activity_name,
        old.activity_name
    ),
    nvl (new.order_count, 0),
    nvl (new.payment_count, 0),
    nvl (old.order_count, 0) + nvl (new.order_count, 0),
    nvl (old.payment_count, 0) + nvl (new.payment_count, 0)
FROM
    (
        SELECT
            *
        FROM
            $ { APP }.dwt_activity_topic
    ) old
FULL OUTER JOIN (
    SELECT
        id,
        activity_name,
        order_count,
        payment_count
    FROM
        $ { APP }.dws_activity_info_daycount
    WHERE
        dt = '$do_date'
) new ON old.id = new.id;
"
$hive -e "$sql"

2)增加脚本执行权限

[atguigu@hadoop102 bin]$ chmod 777 dws_to_dwt.sh

3)执行脚本导入数据

[atguigu@hadoop102 bin]$ dws_to_dwt.sh 2020-03-11

4)查看导入数据

hive (gmall)> 
select * from dwt_uv_topic limit 5; 
select * from dwt_user_topic limit 5; 
select * from dwt_sku_topic limit 5; 
select * from dwt_coupon_topic limit 5; 
select * from dwt_activity_topic limit 5;
上一篇下一篇

猜你喜欢

热点阅读