四、Hive使用

2020-07-01  本文已影响0人  人间桑

一、每个用户截止到每月为止的最大交易金额和累计到该月的总交易金额

over(partition by a order by b) 最后处理的窗口函数,partition by分组,order by排序,然后前面加聚合函数,如有排序,则聚合函数只处理b之前的项 row_number() over()顺序

select

    customer_key,

    substr(create_date,0,7) umonth,

    count(distinct sales_order_key) ucount,

    sum(unit_price) as usum

from ods_sales_orders

group by customer_key,substr(create_date,0,7)

limit 10;

select

    customer_key,umonth,ucount,max(usum) over(partition by customer_key order by umonth) current_max,

    sum(usum) over(partition by customer_key order by umonth) current_sum

from(select

customer_key,

 substr(create_date,0,7) umonth,

count(distinct sales_order_key) ucount,

    sum(unit_price) as usum

from ods_sales_orders

group by customer_key,substr(create_date,0,7)) a

limit 10;

二、计算用户的回购率和复购率

复购率:

select

    customer_key,

    substr(create_date,0,7) umonth,

    count(distinct sales_order_key) ucount

from ods_sales_orders

group by customer_key,substr(create_date,0,7)

limit 10;

select

    umonth,

    count(distinct customer_key) customer_num,

    sum(if(ucount>1,1,0)) Rep_num,

    sum(if(ucount>1,1,0))/count(distinct customer_key) rep_rate

from(select

    customer_key,

    substr(create_date,0,7) umonth,

    count(distinct sales_order_key) ucount

from ods_sales_orders

group by customer_key,substr(create_date,0,7)) a

group by umonth;

回购率:

select

    a_umonth,

    count(customer_key) as ct,

    count(b_umonth) as reb_num, 

    concat(round(count(b_umonth)/count(distinct customer_key)*100,2),'%') as reb_rate

from(select

    a.customer_key,a.umonth a_umonth,b.umonth b_umonth

from(select

    customer_key,

    substr(create_date,0,7) umonth

from ods_sales_orders

group by customer_key,substr(create_date,0,7)) a

left join (select

    customer_key,

    substr(create_date,0,7) umonth

from ods_sales_orders

group by customer_key,substr(create_date,0,7)) b

on a.customer_key=b.customer_key and

((substr(a.umonth,6,2)=substr(b.umonth,6,2)-1 and substr(b.umonth,1,4)=substr(a.umonth,1,4)) or (substr(a.umonth,6,2)='12' and substr(b.umonth,6,2)='01' and substr(b.umonth,1,4)=substr(a.umonth,1,4)+1))) c

group by a_umonth;

左连接连上月 hive不能用中文

三、求用户号对应不同的产品

用户好-产品1-产品2 按购买时间排序,只输出两个

select

concat(customer_key,'-',cpzl_zw,'-',if(isnull(cpzl_zw1),'null',cpzl_zw1))

from(select

*,

row_number() over(partition by customer_key order by create_date asc) as rn

from(select

    *

from(select

    customer_key,cpzl_zw,create_date,

    lead(cpzl_zw) over(partition by customer_key order by create_date asc) cpzl_zw1

from ods_sales_orders) a

where cpzl_zw!=cpzl_zw1 or cpzl_zw1 is null) b) c

where rn=1;

四、统计各个省份所属城市下最受欢迎的Top 3产品和其销量(不能出现有null)

select

    *

from(select

    chinese_city,english_product_name, ucount,

    row_number() over(partition by chinese_city order by ucount desc) rn

from(select

    chinese_city,english_product_name,count(*) ucount

from ods_sales_orders a

left join ods_customer b on a.customer_key=b.customer_key

group by chinese_city,english_product_name) c) d

where rn<=3 and chinese_city!='null';

五、商品的销售数量top10,排名需考虑并列排名的情况

select

    *

from(select

    english_product_name,sale_amount,

    dense_rank() over(order by sale_amount desc) rank

from (select

    english_product_name,count(*) sale_amount

from ods_sales_orders

group by english_product_name) a) b

where rank<=10;

六、计算累计和(统计2019年1-12月的累积销量,即1月为1月份的值,2月为1、2月份值的和,3月为1、2、3月份的和,12月为1-12月份值的和)

select

    *,

    sum(sale_amount) over(order by umonth asc) sum_amount

from(select

    substr(create_date,0,7) umonth,count(*) sale_amount

from ods_sales_orders

where substr(create_date,0,4)='2019'

group by substr(create_date,0,7)) a;

七、计算客户平均购买一次商品的间隔时间

select

    customer_key,avg(date_diff) avg_date_diff

from(select

    customer_key,create_date date0,

    lead(create_date,1,null) over(partition by customer_key order by create_date asc) date1,

    datediff(lead(create_date,1,null) over(partition by customer_key order by create_date asc),create_date) date_diff

from ods_sales_orders) a

where date_diff is not null

group by customer_key;

八、查询最近前20%时间的订单信息

select

    *

from(select 

    *,

    ntile(5) over(sort by create_date desc) sorted

from ods_sales_orders) a

where sorted=1;

上一篇下一篇

猜你喜欢

热点阅读