Hadoop大数据

数仓--DW--Hadoop数仓实践Case-16-迟到的事实

2019-04-17  本文已影响63人  李小李的路

迟到的事实概述

修改数据仓库模式

use dw;
-- 在事务事实表中添加登记日期代理键列
alter table
    sales_order_fact rename to sales_order_fact_old;
-- 创建表
create table
    sales_order_fact
(
order_number int comment'order number',
customer_sk int comment'customer SK',
customer_zip_code_sk int comment'customer zip code SK',
shipping_zip_code_sk int comment'shipping zip code SK',
product_sk int comment'product SK',
sales_order_attribute_sk int comment'sales order attribute SK',
order_date_sk int comment'order date SK',
entry_date_sk int comment 'entry date SK',
allocate_date_sk int comment'allocate date SK',
allocate_quantity int comment'allocate quantity',
packing_date_sk int comment'packing date SK',
packing_quantity int comment'packing quantity',
ship_date_sk int comment'ship date SK',
ship_quantity int comment'ship quantity',
receive_date_sk int comment'receive date SK',
receive_quantity int comment'receive quantity',
request_delivery_date_sk int comment'request delivery date SK',
order_amount decimal(10,2) comment'order amount',
order_quantity int comment'order quantity'
) 
clustered by(order_number) into8 buckets
stored as
orc tblproperties ('transactional'='true');
-- 将数据插入到表中
insert into
sales_order_fact
select
order_number,
customer_sk,
customer_zip_code_sk,
shipping_zip_code_sk,
product_sk,
sales_order_attribute_sk,
order_date_sk,
null,
allocate_date_sk,
allocate_quantity,
packing_date_sk,
packing_quantity,
ship_date_sk,
ship_quantity,
receive_date_sk,
receive_quantity,
request_delivery_date_sk,
order_amount,
order_quantity
fromsales_order_fact_old;
drop table
sales_order_fact_old;
-- 建立登记日期维度视图
create view
entry_date_dim
(entry_date_sk, entry_date, month_name, month
, quarter, year
)
as select
date_sk, date, month_name, month, quarter, year
from
date_dim
;

修改定期装载脚本

-- 修改定期装载脚本
insert into
    sales_order_fact
select
    a.order_number,
    c.customer_sk,
    i.customer_zip_code_sk,
    j.shipping_zip_code_sk,
    d.product_sk,
    g.sales_order_attribute_sk,
    e.order_date_sk,
    h.entry_date_sk,
    null, null, null, null, null, null, null, null,
    f.request_delivery_date_sk,
    order_amount,
    quantity
from
    rds.sales_order a,
    customer_dim c,
    product_dim d,
    order_date_dim e,
    request_delivery_date_dim f,
    sales_order_attribute_dim g,
    customer_zip_code_dim i,
    shipping_zip_code_dim j,
    entry_date_dim h,
    rds.customer k,
    rds.cdc_time l
where
    a.order_status = 'N'
and
    a.customer_number = c.customer_number
and
    a.status_date >= c.effective_date
and 
    a.status_date < c.expiry_date
and
    a.customer_number = k.customer_number
and
    k.customer_zip_code = i.customer_zip_code
and
    a.status_date >= i.effective_date
and
    a.status_date <= i.expiry_date
and
    k.shipping_zip_code = j.shipping_zip_code
and
    a.status_date >= j.effective_date
and
    a.status_date <= j.expiry_date
and
    a.product_code = d.product_code
and
    a.status_date >= d.effective_date
and
    a.status_date < d.expiry_date
and to_date
    (a.status_date) = e.order_date
and 
    to_date(a.entry_date) = h.entry_date
and 
    to_date(a.request_delivery_date) = f.request_delivery_date
and
    a.verification_ind = g.verification_ind
and
    a.credit_check_flag = g.credit_check_flag
and
    a.new_customer_ind = g.new_customer_ind
and
    a.web_order_flag = g.web_order_flag
and
    a.entry_date >= l.last_load 
and a.entry_date < l.current_load ;
-- 更新分配库房、 打包、 配送、 收货4种订单状态的时间代理键和度量,
-- 也要加上entry_date_sk
status_date >= customer_dim.effective_date
and
status_date < customer_dim.expiry_date

修改装载月度周期快照事实表

drop table if exists tmp;
create table tmp as
select a.order_month_sk order_month_sk,
a.product_sk product_sk,
a.month_order_amount + b.order_amount month_order_amount,
a.month_order_quantity + b.order_quantity month_order_quantity
from month_end_sales_order_fact a,
(select d.month_sk month_sk,
a.product_sk product_sk,
sum(order_amount) order_amount,
sum(order_quantity) order_quantity
from sales_order_fact a,
order_date_dim b,
entry_date_dim c,
month_dim d
where a.order_date_sk = b.order_date_sk
and a.entry_date_sk = c.entry_date_sk
and c.month = month(${hivevar:pre_month_date})
and c.year = year(${hivevar:pre_month_date})
and b.month = d.month
and b.year = d.year
and b.order_date <> c.entry_date
group by d.month_sk , a.product_sk) b
where a.product_sk = b.product_sk
and a.order_month_sk = b.month_sk;
delete from month_end_sales_order_fact
where exists
(select 1
from tmp t2
where month_end_sales_order_fact.order_month_sk = t2.order_month_sk
and month_end_sales_order_fact.product_sk = t2.product_sk);
insert into month_end_sales_order_fact select * from tmp;
insert into
    month_end_sales_order_fact
select
    d.month_sk, a.product_sk, sum(order_amount), sum(order_quantity)
from
    sales_order_fact a,
    order_date_dim b,
    entry_date_dim c,month_dim d
where
    a.order_date_sk = b.order_date_sk
and
    a.entry_date_sk = c.entry_date_sk
and
    c.month= month(${hivevar:pre_month_date})
and
    c.year= year(${hivevar:pre_month_date})
and
    b.month= d.month
and
    b.year= d.year
and 
    not exists
    (select
        1
    from
        month_end_sales_order_fact p
    where
        p.order_month_sk = d.month_sk
    and
    p.product_sk = a.product_sk)
group by
    d.month_sk , a.product_sk
;

迟到的事实总结

上一篇 下一篇

猜你喜欢

热点阅读