MysSQL——用户消费行为分析
2020-05-19 本文已影响0人
Mandygoon
所需数据:表1:orderinfo.csv,表2:userinfo.csv
数据来源:网络
分析维度:下单人数,回购率, 复购率, 消费频次, 二八法则等
一、将数据导入数据库
二、用户消费行为分析
- 统计不同月份的下单人数
2.统计用户三月份的回购率和复购率
3.统计男女的消费频次是否有差异
4.统计多次消费的用户,第一次和最后一次消费时间的间隔
5.统计不同年龄段的用户消费金额是否有差异
6.统计消费的二八法则,消费的top20%用户,贡献了多少额度
一、将数据导入数据库
两份csv的数据都是10万起的数据,我使用office已经打不开了,所以对于这类大量数据文件,使用SQL或者python来处理比较合适
(一)建立订单明细表
CREATE TABLE ORDERINFO (
ORDERID varchar(10) NULL, --订单ID
USERID varchar(10) NULL, --用户ID,可以和用户表关联
ISPAID varchar(10) NULL, --是否支付
PRICE varchar(10) NULL, --订单价格
PAIDTIME varchar(100) NULL --订单支付价格
)
(二)建立用户表
CREATE TABLE YWUSER.USERINFO (
USERID varchar(10) NULL,--用户ID
SEX varchar(10) NULL,--性别
BIRTH date NULL --出生日期
)
(三)导入数据
我使用的是cmd导入的方式
mysql->load data local infile 'D:/MYSQL/mysql_sj/用户行为分析/orderinfo.csv' into table mydata.orderinfo
-> fields terminnated by ',';
二、使用MySQL分析用户消费行为
(一)统计不同月份的下单人数
SELECT
concat(
substring( paidtime, 1, 4 ),
'-',
substring( paidtime, 6, 1 )) AS month_,
count( DISTINCT userid ) AS num
FROM
orderinfo
WHERE
ispaid = '已支付'
GROUP BY
month_
image.png
(二)统计用户三月份的回购率和复购率
1.复购率
--复购率:重复购买(消费两次)的客户占总消费客户的比重
SELECT
count( t1.userid ) 三月份购买的总用户数,
count( t1.num1 ) 三月份多次购买的用户数,
round( count( t1.num1 )/ count( t1.userid ), 2 ) as 复购率
FROM
(SELECT t.userid userid,
CASE WHEN t.num > 1 THEN
1 ELSE NULL
END AS num1
FROM
(SELECT userid, count( userid ) AS num
FROM
orderinfo
WHERE
ispaid = '已支付'
AND substring( paidtime, 6, 1 ) = 3
GROUP BY
userid
) AS t
) AS t1
image.png
2.回购率
--回购率——三月购买的客户四月份依然购买
SELECT
count( t.userid ) AS 三月回购客户数,
( SELECT count( DISTINCT userid ) FROM orderinfo WHERE ispaid = '已支付' AND substring( paidtime, 6, 1 ) = 4 ) AS 四月消费客户数,
round(
count( t.userid )/(SELECT count( DISTINCT userid )
FROM orderinfo
WHERE ispaid = '已支付'
AND substring( paidtime, 6, 1 ) = 4),2) AS 回购率
FROM
(SELECT DISTINCT
userid,
count( userid ) AS num
FROM orderinfo
WHERE ispaid = '已支付'
AND substring( paidtime, 6, 1 ) = 4
AND userid IN ( SELECT userid FROM orderinfo WHERE ispaid = '已支付' AND substring( paidtime, 6, 1 ) = 3 GROUP BY userid )
GROUP BY userid
) AS t
image.png
(三)统计男女的消费频次是否有差
--消费频次指一定时间内,平均一个消费者发生消费的频次。本题以一个月为单位计算。
SELECT t.month_, t.sex,
round( avg( num ), 2 ) AS 消费频数
FROM
(SELECT u.userid, u.sex, count( o.orderid ) AS num,
concat(
substring( paidtime, 1, 4 ),
'-',
substring( paidtime, 6, 1 )) AS month_
FROM
orderinfo o
INNER JOIN userinfo u ON u.userid = o.userid
WHERE
o.ispaid = '已支付'
GROUP BY u.sex, u.userid, month_
) AS t
GROUP BY t.sex, t.month_
image.png
(四)统计多次消费的用户,第一次和最后一次消费时间的间隔
----先统计消费2次以上的客户使用group by,再根据消费时间查看间隔,日期相加减使用datediff函数
SELECT
t.month_,
t.userid,
t.num,
t.paidtime_max,
t.paidtime_min,
datediff( t.paidtime_max, t.paidtime_min ) AS 间隔
FROM
(
SELECT concat(
substring( paidtime, 1, 4 ),
'-',
substring( paidtime, 6, 1 )) AS month_,
userid,
count( orderid ) AS num,
date_format( substring( REPLACE ( max( paidtime ), '/', '-' ), 1, 8 ), '%Y-%m-%d' ) AS paidtime_max,
date_format( substring( REPLACE ( min( paidtime ), '/', '-' ), 1, 8 ), '%Y-%m-%d' ) AS paidtime_min
FROM orderinfo
WHERE ispaid = '已支付'
GROUP BY userid,month_
HAVING num > 1
) AS t
(五)统计不同年龄段的用户消费金额是否有差异
--这里排除了没有年龄的数据
select q.NLD,
round(sum(q.price_f),2) as 消费总金额,
round(avg(q.price_f),2) as 平均消费
from(
select t.orderid, t.userid,
convert(price, float) as price_f,
(case when t.age between 0 and 10 then '0-10岁'
when t.age between 11 and 20 then '11-20岁'
when t.age between 21 and 30 then '21-30岁'
when t.age between 31 and 40 then '31-40岁'
when t.age between 41 and 50 then '41-50岁'
when t.age between 51 and 60 then '51-60岁'
when t.age between 61 and 70 then '61-70岁' else
'70以上' end) as NLD
from (
select o.orderid,o.userid,
o.ispaid,o.price,a.age
from orderinfo o
inner join (select userid,
(date_format(now(), '%Y')-date_format(birth, '%Y')) as age
from userinfo where birth not like '0000%') as a
on o.userid = a.userid
where o.ispaid = '已支付') as t) as q
group by q.NLD
order by q.NLD asc
image.png
(六)统计消费的二八法则,消费的top20%用户,贡献了多少额度
--按userid统计消费情况,然后根据排序查看贡献的额度
SELECT
sum( num )
FROM
(SELECT
a.userid,
a.num,
@rank := @rank + 1 AS rank_r
FROM
(SELECT
userid,
round( sum( CONVERT ( price, FLOAT )), 2 ) AS num
FROM orderinfo,
( SELECT @rank := 0 ) i
WHERE
ispaid = '已支付'
GROUP BY userid
ORDER BY num DESC
) a
) b
WHERE
rank_r < (
SELECT
count( DISTINCT userid ) * 0.2
FROM orderinfo
WHERE ispaid = '已支付')
image.png
第一次分享数据分析的的内容,请各位大佬多多指教。