sql案例用户消费行为分析
一、数据导入
1、创建数据库和表:
create table ordercreate table order_info(
order_id int primary key ,
user_id int,
is_paid varchar(10),
price float,
paid_time varchar(30));
create table user(
user_id int primary key,
sex varchar(10),
birth date);
2、导入csv数据表
命令行导入:
load data local infile '文件路径' into table 表名 fields terminated by ',';
注意:
*路径不要有中文,是左斜杆/,而不是
*mysql 8.0 登陆使用 mysql --local-infile -uroot -p
*要有fields terminated by ',' 是因为csv 文件是以逗号为分割符的
导入错误有
1、没有先选择数据库(use 数据库名)
2、sql编辑器中输入 SHOW VARIABLES LIKE '%local%')
查到 local_infile OFF 即该变量未开启
解决:输入SET GLOBAL local_infile=1(其中global是全局变量的意思)
这种导入容易出问题,一般不选
另选导入方法navicat :选择表→导入向导→注意,如果导入中文乱码可以选用10008 (MAC - Simplified Chinese GB 2312)这个编码规则导入

二、数据清洗:
先把时间格式标准化成1993-02-27 这样的
update 表名 set 字段名=replace(字段名, '/', '-') where 字段名 is not null;
然后更新字符串为日期格式,然后才能使用日期函数进行操作
update 表名 set 字段名=str_to_date(字段名, '%Y-%m-%d %H:%i') where 字段名 is not null;
三、
SELECT paidtime,date(paidtime),DATE_FORMAT(paidtime,"%Y-%m") as date from orderinfo;
#1、统计不同月份的下单人数
select date_format(paidtime,"%Y-%m") as month,count(distinct userid) from
orderinfo where ispaid !="未支付" GROUP BY month --按年分月
select month(paidtime) as month ,count(distinct userid)
from orderinfo where ispaid !="未支付" GROUP BY month; --分月
2、统计用户三月份的回购率(这个月买了,下个月又买了)和复购率(买的次数超过一次)
-- 回购率:
-- a先得到每个用户消费的月份
select userid, date_format(paidtime, '%Y-%m') from orderinfo
where ispaid = '已支付'
group by userid
-- b.将a得到的表与自己进行左链接
select * from(
select userid, date_format(paidtime, '%Y-%m-01') from orderinfo
where ispaid = '已支付'group by userid )
as t1
left join(
select userid, date_format(paidtime, '%Y-%m-01') from orderinfo
where ispaid = '已支付'
group by userid) as t2
on t1.userid=t2.userid
-- c.将t2付款时间-t1的付款时间 > 1,就是回购的用户
select t1.m, count(t1.m), count(t2.m) from(
select * from(
select userid, date_format(paidtime, '%Y-%m-01') as m from orderinfo
where ispaid = '已支付'group by userid )
as t1
left join(
select userid, date_format(paidtime, '%Y-%m-01') as m from orderinfo
where ispaid = '已支付'
group by userid) as t2
on t1.userid = t2.userid and t1.m = date_sub(t2.m, interval 1 month)
group by t1.m;
-- 复购率
-- a.先得到3月份每个用户购买的次数
select userid, count(userid) from orderinfo
where ispaid = '已支付' and month(paidtime)=3
group by userid
-- b.复购率=当月购买大于1次的用户数/当月购买的用户数
select count(userid) as 购买用户数, count(if(buynum> 1, 1, null)) as "复购用户数",
count(if(buynum> 1, 1, null))/count(userid)
from (
select userid, count(userid) as buynum from orderinfo
where ispaid = '已支付' and month(paidtime)=3
group by userid ) t
3、统计男女用户的消费频次是否有差异
select sex,avg(num) from(
select t1.userid,t2.sex,count(*) as num from orderinfo t1
inner join(
select t2.userid,t2.sex from user where sex is not null) as t2
on t1.userid = t2.userid
GROUP BY t1.userid,t2.sex) as t3
GROUP BY sex;
4、统计多次消费的用户,第一次和最后一次消费间隔是多少
select userid,min(paidtime),max(paidtime),
DATEDIFF(max(paidtime)-,min(paidtime)) as "间隔天数"
from orderinfo
where ispaid="已支付"
GROUP BY userid
having count(*)>1;
5、统计不同年龄段,用户消费金额是否有差异????
-- 答案
select age, avg(consume) from (
select orderinfo.userid, age, sum(price) as consume from orderinfo
inner join (
select userid, ceil((year(now)-year(birth))/10) as age from userinfo
where birth > date('1901-00-00')) t
on orderinfo.userid = userinfo.userid
where userinfo.ispaid='已支付'
group by orderinfo.userid, age) t1
group by age
--
关于case when
case
when 年龄>0 and 年龄<18 then "0-18岁"
when 年龄>=18 and 年龄<30 then "18-30岁"
when 年龄>=30 and 年龄<50 then "30-50岁"
else "50岁以上" end (
from (select userid,(year(now())-year(birth)) as 年龄 from user) as t2 where 年龄>0) as t3
INNER JOIN (select * from orderinfo where ispaid="已支付" ) as t1
on t1.userid=t3.userid
6、统计消费的二八法则,消费的top20%用户,贡献了多少额度
select sum(sumprice) "top20%总消费"
from (
select *,dense_rank() over(order by sumprice desc) as "排序" from
(
select userid,sum(price) as sumprice from orderinfo
where ispaid="已支付" GROUP BY userid) as t1) as t2
where 排序<(select count(distinct userid)*0.2 from orderinfo where ispaid="已支付");
利用窗口函数求得排序