四、Hive使用
一、每个用户截止到每月为止的最大交易金额和累计到该月的总交易金额
over(partition by a order by b) 最后处理的窗口函数,partition by分组,order by排序,然后前面加聚合函数,如有排序,则聚合函数只处理b之前的项 row_number() over()顺序
select
customer_key,
substr(create_date,0,7) umonth,
count(distinct sales_order_key) ucount,
sum(unit_price) as usum
from ods_sales_orders
group by customer_key,substr(create_date,0,7)
limit 10;
select
customer_key,umonth,ucount,max(usum) over(partition by customer_key order by umonth) current_max,
sum(usum) over(partition by customer_key order by umonth) current_sum
from(select
customer_key,
substr(create_date,0,7) umonth,
count(distinct sales_order_key) ucount,
sum(unit_price) as usum
from ods_sales_orders
group by customer_key,substr(create_date,0,7)) a
limit 10;
二、计算用户的回购率和复购率
复购率:
select
customer_key,
substr(create_date,0,7) umonth,
count(distinct sales_order_key) ucount
from ods_sales_orders
group by customer_key,substr(create_date,0,7)
limit 10;
select
umonth,
count(distinct customer_key) customer_num,
sum(if(ucount>1,1,0)) Rep_num,
sum(if(ucount>1,1,0))/count(distinct customer_key) rep_rate
from(select
customer_key,
substr(create_date,0,7) umonth,
count(distinct sales_order_key) ucount
from ods_sales_orders
group by customer_key,substr(create_date,0,7)) a
group by umonth;
回购率:
select
a_umonth,
count(customer_key) as ct,
count(b_umonth) as reb_num,
concat(round(count(b_umonth)/count(distinct customer_key)*100,2),'%') as reb_rate
from(select
a.customer_key,a.umonth a_umonth,b.umonth b_umonth
from(select
customer_key,
substr(create_date,0,7) umonth
from ods_sales_orders
group by customer_key,substr(create_date,0,7)) a
left join (select
customer_key,
substr(create_date,0,7) umonth
from ods_sales_orders
group by customer_key,substr(create_date,0,7)) b
on a.customer_key=b.customer_key and
((substr(a.umonth,6,2)=substr(b.umonth,6,2)-1 and substr(b.umonth,1,4)=substr(a.umonth,1,4)) or (substr(a.umonth,6,2)='12' and substr(b.umonth,6,2)='01' and substr(b.umonth,1,4)=substr(a.umonth,1,4)+1))) c
group by a_umonth;
左连接连上月 hive不能用中文
三、求用户号对应不同的产品
用户好-产品1-产品2 按购买时间排序,只输出两个
select
concat(customer_key,'-',cpzl_zw,'-',if(isnull(cpzl_zw1),'null',cpzl_zw1))
from(select
*,
row_number() over(partition by customer_key order by create_date asc) as rn
from(select
*
from(select
customer_key,cpzl_zw,create_date,
lead(cpzl_zw) over(partition by customer_key order by create_date asc) cpzl_zw1
from ods_sales_orders) a
where cpzl_zw!=cpzl_zw1 or cpzl_zw1 is null) b) c
where rn=1;
四、统计各个省份所属城市下最受欢迎的Top 3产品和其销量(不能出现有null)
select
*
from(select
chinese_city,english_product_name, ucount,
row_number() over(partition by chinese_city order by ucount desc) rn
from(select
chinese_city,english_product_name,count(*) ucount
from ods_sales_orders a
left join ods_customer b on a.customer_key=b.customer_key
group by chinese_city,english_product_name) c) d
where rn<=3 and chinese_city!='null';
五、商品的销售数量top10,排名需考虑并列排名的情况
select
*
from(select
english_product_name,sale_amount,
dense_rank() over(order by sale_amount desc) rank
from (select
english_product_name,count(*) sale_amount
from ods_sales_orders
group by english_product_name) a) b
where rank<=10;
六、计算累计和(统计2019年1-12月的累积销量,即1月为1月份的值,2月为1、2月份值的和,3月为1、2、3月份的和,12月为1-12月份值的和)
select
*,
sum(sale_amount) over(order by umonth asc) sum_amount
from(select
substr(create_date,0,7) umonth,count(*) sale_amount
from ods_sales_orders
where substr(create_date,0,4)='2019'
group by substr(create_date,0,7)) a;
七、计算客户平均购买一次商品的间隔时间
select
customer_key,avg(date_diff) avg_date_diff
from(select
customer_key,create_date date0,
lead(create_date,1,null) over(partition by customer_key order by create_date asc) date1,
datediff(lead(create_date,1,null) over(partition by customer_key order by create_date asc),create_date) date_diff
from ods_sales_orders) a
where date_diff is not null
group by customer_key;
八、查询最近前20%时间的订单信息
select
*
from(select
*,
ntile(5) over(sort by create_date desc) sorted
from ods_sales_orders) a
where sorted=1;