数据分析SQL面试题目9套汇总
金九银十又是找工作的好季节啊,最近不少数据蛙数据分析社群同学,问到一些面试数据分析时的SQL题目,所以就结合大家的面试题目以及工作场景给大家总结了一些经典题目。同时也特别感谢001号同学和002号同学的帮忙整理
面试题目一
1.题目如下
2.下面开始建表、插入数据
create table datafrog_test1
(userid varchar(20),
changjing varchar(20),
inttime varchar(20)
);
insert into datafrog_test1 values
(1,1001,1400),
(2,1002,1401),
(1,1002,1402),
(1,1001,1402),
(2,1003,1403),
(2,1004,1404),
(3,1003,1400)
(4,1004,1402),
(4,1003,1403),
(4,1001,1403),
(4,1002,1404)
(5,1002,1402),
(5,1002,1403),
(5,1001,1404),
(5,1003,1405);
3.解答思路:排序及concat连接
select concat(t.userid,'-',group_concat(t.changjing separator'-')) as result
from(
select userid,changjing,inttime,
if(@tmp=userid,@rank:=@rank+1,@rank:=1) as new_rank,
@tmp:=userid as tmp
from (select userid,changjing, min(inttime) inttime from datafrog_test1 group by userid,changjing)temp
order by userid,inttime
)t
where t.new_rank<=2
group by t.userid;
4.输出结果:
5.注意:
有可能大家的代码会有报错现象,主要是ONLY_FULL_GROUP_BY引起的报错,解决办法是运行上面代码时,运行下这个就好set sql_mode='' 。其实mysql 作为查询还是不错的,但是拿来做分析的话,就是有点乏力了,像排序、中位数等都比较麻烦些的,工作中一般用pandas、sqlserver、oracle、hive、spark这些来做分析。这里可不是说mysql没用,反而是特别有用,也容易上手,是其他的基础。
面试题目二
1.题目如下
2.下面开始建表、插入数据
create database xiangji;
use xiangji;
create table userinfo(
uid varchar(10),
app_name varchar(20),
duration int(10),
times int(10),
dayno varchar(30)
);
load data infile 'D:/d.csv'
into table userinfo
fields terminated by ','
ignore 1 lines;
咱们数据蛙小伙伴给大家编辑了一份数据,大家文末查看哟
3.先看看活跃度的计算
select dayno, count(distinct uid) as 活跃度
from aui
where app_name='相机'
group by dayno ;
4.次日留存
使用两表自交,利用case when找到符合相差日期为1天的id,计数,得出次日留存人数,最后用distinct去重
select
a.day1,count(distinct case when day2-day1=1 then a.uid end) 次留
from
(select uid,date_format(dayno,'%Y%m%d')as day1 from aui where app_name='相机') a
#用date_format把dayno的文本格式改为可计算的形式
left join
(select uid,date_format(dayno,'%Y%m%d')as day2 from aui where app_name='相机') b
on a.uid=b.uid
group by a.day1;
5.计算次日、三日、七日留存
select
day1,count(distinct a.uid) 活跃,
count(distinct case when day2-day1=1 then a.uid end) 次留,
count(distinct case when day2-day1=3 then a.uid end) 三留,
count(distinct case when day2-day1=7 then a.uid end) 七留,
concat(count(distinct case when day2-day1=1 then a.uid end)/count(distinct a.uid)*100,'%') 次日留存率,
concat(count(distinct case when day2-day1=3 then a.uid end)/count(distinct a.uid)*100,'%') 三日留存率,
concat(count(distinct case when day2-day1=7 then a.uid end)/count(distinct a.uid)*100,'%') 七日留存率
from (select uid,date_format(dayno,'%Y%m%d') day1 from aui where app_name = '相机') a
left join (select uid,date_format(dayno,'%Y%m%d') day2 from aui where app_name = '相机') b
on a.uid=b.uid
group by day1;
知识点:date_format用法,sum()结合条件语句的用法,百分比符号concat添加
面试题目三
1.行转列(图中左变右)
1.行转列(图中左变右)
-- 创建 course 表
create table course (
id varchar(20),
teacher_id varchar(20),
week_day varchar(20),
has_course varchar(20)
);
insert into course value
(1,1,2,"Yes"),
(2,1,3,"Yes"),
(3,2,1,"Yes"),
(4,3,2,"Yes"),
(5,1,2,"Yes")
;
select * from course;
-- 行转列
select id,teacher_id,
(case when week_day = 1 then "Yes" else " " end) "mon",
(case when week_day = 2 then "Yes" else " " end) "tue",
(case when week_day = 3 then "Yes" else " " end) "thi",
(case when week_day = 4 then "Yes" else " " end) "thu",
(case when week_day = 5 then "Yes" else " " end) "fri"
from course;
方法二:
select id,teacher_id,
(case week_day when 1 then "Yes" else " " end) "mon",
(case week_day when 2 then "Yes" else " " end) "tue",
(case week_day when 3 then "Yes" else " " end) "thi",
(case week_day when 4 then "Yes" else " " end) "thu",
(case week_day when 5 then "Yes" else " " end) "fri"
from course;
知识点:case when的用法
4.结果如下:
面试题目四
1.问题
2.数据导入
create table a1 (
name varchar(20),
english int,
maths int,
music int);
insert into a1 values
("Jim",90,88,99);
3.解答如下
SELECT name, "english" AS subject, english AS score
FROM a1
UNION
SELECT name, "maths" AS subject, maths AS score
FROM a1
UNION
SELECT name, "music" AS subject, music AS score
FROM a1
ORDER BY name;
这个题目还有其他方法,大家思考下,主要是考察行转列的
面试题目五
1.问题
2.建表导入数据
create table A2 (
FDATE datetime,
value int
);
insert into a2 values
("2018/11/23",10),
("2018/12/31",3),
("2019/2/9",53),
("2019/3/31",23),
("2019/7/8",11),
("2019/7/31",10);
# delete from a2 where value = 10;
select * from a2;
3.解答第一问
-- 添加索引 给 FDATE;
create index id_FDATE on a2(FDATE);
show index from a2;
4.解答第二问
SELECT FYEAR, FMONTH, VALU AS VALUE, YSUM, SUM
FROM (
SELECT b1.*
, @YSUM := if(@year = fyear, @YSUM + valu, valu) AS YSUM
, @sum := @sum + valu AS SUM
, @year := fyear
FROM (
SELECT year(FDATE) AS FYEAR, month(Fdate) AS FMONTH
, SUM(VALUE) AS valu
FROM A2
GROUP BY FMONTH
ORDER BY FYEAR, FMONTH
) b1, (
SELECT @sum := 0, @YSUM := 0
, @YEAR := NULL
) b2
) B;
面试题目六
1.问题
2.建表导入数据
create table userlog
(
id int ,
name varchar(10),
EmailAddress varchar(50),
lastlogon varchar(50)
)
insert into userlog values(100,'test4','test4@yahoo.cn','2007-11-25 16:31:26');
insert into userlog values(13,'test1','test4@yahoo.cn','2007-3-22 16:27:07');
insert into userlog values(19,'test1','test4@yahoo.cn','2007-10-25 14:13:46');
insert into userlog values(42,'test1','test4@yahoo.cn','2007-10-25 14:20:10');
insert into userlog values(45,'test2','test4@yahoo.cn','2007-4-25 14:17:39');
insert into userlog values(49,'test2','test4@yahoo.cn','2007-5-25 14:22:36');
3.1解答第一问,方法1
select user2.name,user2.EmailAddress,count(user2.day1) ,max(user2.time2)from
(select * ,max(time1) time2 from
(select
name,EmailAddress,
date_format(lastlogon, '%Y-%m-%d %H:%i:%s') as time1,
date_format(substring_index(lastlogon,' ',1),'%Y-%m-%d') as day1
from userlog order by name asc ,time1 asc
)
as user1
group by user1.name,user1.day1) user2
group by user2.name
3.2解答第一问,方法2
select t1.name,max(t1.lastlogon),max(t1.days),count(distinct days) from
(
select
id,name,
emailAddress,
date_format(lastlogon,'%Y-%m-%d %H:%i:%s') as lastlogon,
date_format(lastlogon,'%Y-%m-%d') as days
from data.userlog as u
) as t1
group by t1.name
4.解答第二问
DROP TABLE IF EXISTS tmp_table;
CREATE TEMPORARY TABLE tmp_table
select
user2.name as Name ,user2.time1 as lastlogon ,user2.rank1 as num_logontime,user2.rank2 as num_logonday
from
(select * ,
@rank:=if(@nam=user1.name,@rank+1,1) rank1,
@rank1:=if(@nam=user1.name,if(@da=user1.day1,@rank1,@rank1+1),1) rank2,
@nam:=user1.name,
@da:=user1.day1
from
(select
name,EmailAddress,
date_format(lastlogon, '%Y-%m-%d %H:%i:%s') as time1,
date_format(substring_index(lastlogon,' ',1),'%Y-%m-%d') as day1
from userlog order by name asc ,time1 asc
)
as user1 ,(select @rank:=0,@rank1:=0,@nam:=null,@da:=null) temp
order by user1.name,user1.time1) as user2
;
select * from tmp_table
面试题目七
1.问题
2.建表导入数据
create table tableA (qq int(20),
game varchar(20));
insert into tableA values
(10000,"a"),
(10000,"b"),
(10000,"c"),
(20000,"c"),
(20000,"d");
3.解答第一问
drop table if exists tableB;
create TEMPORARY table tableB
(
select qq,group_concat(game separator"-") as game
from tableA group by qq);
select * from tableB;
4.1解答第二问,substring_index的使用
select qq,game,
substring_index(game,"-",0) a0,
substring_index(game,"-",1) a1,
substring_index(game,"-",2) a2,
substring_index(game,"-",3) a3,
substring_index(game,"-",-1) "a-1",
substring_index(substring_index(game,"-",2),"-",-1) "a(a-1)"
from tableB;
4.2解答第二问,mysql.help_topic中的help_topic_id从0递增
select * from mysql.help_topic;
**4.3解答第二问,解答思路 **
利用内置表模mysql.help_topic 进行左连,实现 help_topic_id的自增;利用length限制长度,寻找“-”的个数范围;
select a.*,
help_topic_id as id,
substring_index(game,"-",help_topic_id+1) as "sbi(id+1)",
substring_index(substring_index(game,"-",help_topic_id+1),"-",-1) as "sbi(sbi(id+1),-1)",
length(game) as 全长,
length(replace(game,"-","")) as 除去符号长度,
length(game)-length(replace(game,"-",""))+1 as "逗号个数+1"
from tableB a
left join mysql.help_topic as b
on help_topic_id < (length(game)-length(replace(game,"-",""))+1);
**4.4解答第二问,解答思路 **
select qq,
substring_index(substring_index(game,"-",help_topic_id+1),"-",-1) as game
from tableB a
left join mysql.help_topic as b
on help_topic_id < (length(game)-length(replace(game,"-",""))+1);
面试题目八
1.问题
1.1防止大家看不清晰,给大家罗列出来
1、计算2019年6月1日至今,每日DAU(活跃用户量,即有登陆的用户)
2、计算20190601至今,每日领取红包的新用户数,老用户数,及人均领取金额,人均领取次数
3、计算2019年3月,每个月按领红包取天数为1、2、3……30、31天区分,计算取每个月领取红包的用户数,人均领取金额,人均领取次数
4、计算2019年3月,每个月领过红包用户和未领红包用户的数量,平均月活跃天数(即本月平均活跃多少天)
5、计算2019年3月至今,每个月活跃用户的注册日期,2019年3月1日前注册的用户日期填空即可
6、计算2019年3月至今,每日的用户次日留存率,领取红包用户的次日留存,未领取红包用户的次日留存率
7、计算2019年6月1日至今,每日新用户领取得第一个红包的金额
8.计算2019年3月1日至今,每个新用户领取的第一个红包和第二个红包的时间差(只计算注册当日有领取红包的用户,注册当日及以后的DAU表中新用户为1的用户)
2.建表导入数据
2.1用户活跃模型表
create table tmp_liujg_dau_based(
imp_date varchar(20) not null comment '日期',
qimei varchar(20) not null comment '用户唯一标识',
is_new varchar(10) comment '新用户表示,1表示新用户,0表示老用户',
primary key(imp_date,qimei));
ALTER TABLE tmp_liujg_dau_based COMMENT '用户活跃模型表';
2.2红包参与领取模型表
create table tmp_liujg_packed_based
(
imp_date varchar(20) comment '日期',
report_time varchar(20) comment '领取时间戳',
qimei varchar(20) not null comment '用户唯一标识',
add_money varchar(20) not null comment '领取金额,单位为分');
ALTER TABLE tmp_liujg_packed_based COMMENT '红包参与领取模型表';
2.3用户活跃模型表数据导入
insert into tmp_liujg_dau_based values('20190301','001','0')
;insert into tmp_liujg_dau_based values('20190301','002','0')
;insert into tmp_liujg_dau_based values('20190301','003','1')
;insert into tmp_liujg_dau_based values('20190301','004','1')
;insert into tmp_liujg_dau_based values('20190301','005','1')
;insert into tmp_liujg_dau_based values('20190301','006','1')
;insert into tmp_liujg_dau_based values('20190302','001','0')
;insert into tmp_liujg_dau_based values('20190302','002','0')
;insert into tmp_liujg_dau_based values('20190302','003','0')
;insert into tmp_liujg_dau_based values('20190302','005','0')
;insert into tmp_liujg_dau_based values('20190302','006','0')
;insert into tmp_liujg_dau_based values('20190302','007','1')
;insert into tmp_liujg_dau_based values('20190303','005','0')
;insert into tmp_liujg_dau_based values('20190303','006','0')
;insert into tmp_liujg_dau_based values('20190303','007','0')
;insert into tmp_liujg_dau_based values('20190303','008','1')
;insert into tmp_liujg_dau_based values('20190303','009','1')
;insert into tmp_liujg_dau_based values('20190303','010','1')
;insert into tmp_liujg_dau_based values('20190401','008','0')
;insert into tmp_liujg_dau_based values('20190401','009','0')
;insert into tmp_liujg_dau_based values('20190401','010','0')
;insert into tmp_liujg_dau_based values('20190401','011','1')
;insert into tmp_liujg_dau_based values('20190401','012','1')
;insert into tmp_liujg_dau_based values('20190402','009','0')
;insert into tmp_liujg_dau_based values('20190402','010','0')
;insert into tmp_liujg_dau_based values('20190402','011','0')
;insert into tmp_liujg_dau_based values('20190402','012','0')
;insert into tmp_liujg_dau_based values('20190402','013','1')
;insert into tmp_liujg_dau_based values('20190402','014','1')
;insert into tmp_liujg_dau_based values('20190501','001','0')
;insert into tmp_liujg_dau_based values('20190501','002','0')
;insert into tmp_liujg_dau_based values('20190501','008','0')
;insert into tmp_liujg_dau_based values('20190501','007','0')
;insert into tmp_liujg_dau_based values('20190501','015','1')
;insert into tmp_liujg_dau_based values('20190501','016','1')
;insert into tmp_liujg_dau_based values('20190501','017','1')
;insert into tmp_liujg_dau_based values('20190501','018','1')
;insert into tmp_liujg_dau_based values('20190601','008','0')
;insert into tmp_liujg_dau_based values('20190601','017','0')
;insert into tmp_liujg_dau_based values('20190601','018','0')
;insert into tmp_liujg_dau_based values('20190601','019','1')
;insert into tmp_liujg_dau_based values('20190601','020','1')
;insert into tmp_liujg_dau_based values('20190601','021','1')
;insert into tmp_liujg_dau_based values('20190601','022','1')
;insert into tmp_liujg_dau_based values('20190603','021','0')
;insert into tmp_liujg_dau_based values('20190603','022','0')
;insert into tmp_liujg_dau_based values('20190603','011','0')
;insert into tmp_liujg_dau_based values('20190603','012','0')
;insert into tmp_liujg_dau_based values('20190603','023','1')
;insert into tmp_liujg_dau_based values('20190701','023','0')
;insert into tmp_liujg_dau_based values('20190701','008','0')
;insert into tmp_liujg_dau_based values('20190701','011','0')
;insert into tmp_liujg_dau_based values('20190701','022','0')
;insert into tmp_liujg_dau_based values('20190701','012','0')
;insert into tmp_liujg_dau_based values('20190701','024','1')
;insert into tmp_liujg_dau_based values('20190701','025','1')
;insert into tmp_liujg_dau_based values('20190701','026','1')
;insert into tmp_liujg_dau_based values('20190701','027','1')
;insert into tmp_liujg_dau_based values('20190705','026','0')
;insert into tmp_liujg_dau_based values('20190705','027','0')
;insert into tmp_liujg_dau_based values('20190705','009','0')
;insert into tmp_liujg_dau_based values('20190705','010','0')
;insert into tmp_liujg_dau_based values('20190705','028','1')
;insert into tmp_liujg_dau_based values('20190705','029','1')
;
2.2红包参与领取模型表数据导入
insert into tmp_liujg_packed_based values('20190301','2019/03/01 10:15:01','001','1.05')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 13:15:01','001','2.30')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 11:15:01','002','0.80')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 17:15:01','002','0.89')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 14:15:01','003','2.12')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 18:15:01','003','1.12')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 17:15:01','005','1.12')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 19:15:01','005','0.12')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 09:15:01','006','0.98')
;insert into tmp_liujg_packed_based values('20190301','2019/03/01 11:15:01','006','1.45')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 09:30:01','001','0.78')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 19:30:01','001','0.88')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 10:30:01','003','0.68')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 09:30:01','005','1.01')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 14:30:01','005','1.88')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 14:30:01','006','1.88')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 10:30:01','006','0.68')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 15:30:01','007','0.68')
;insert into tmp_liujg_packed_based values('20190302','2019/03/02 16:30:01','007','1.78')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 16:30:01','005','0.68')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 08:50:01','006','0.32')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 10:50:01','006','1.78')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 10:50:01','007','0.32')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 15:50:01','008','1.01')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 17:50:01','008','1.68')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 10:30:01','010','1.88')
;insert into tmp_liujg_packed_based values('20190303','2019/03/03 15:50:01','010','0.32')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 09:50:00','008','0.18')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 11:50:00','009','0.88')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 12:35:00','009','0.32')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 17:50:00','010','1.01')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 12:35:00','011','1.68')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 12:50:00','011','0.88')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 15:50:00','012','0.32')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 17:50:00','012','1.68')
;insert into tmp_liujg_packed_based values('20190401','2019/04/01 12:35:00','012','1.88')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 09:50:00','009','0.18')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 19:50:00','009','1.18')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 17:50:00','010','0.88')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 16:50:00','010','0.32')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 19:50:00','010','0.32')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 16:50:00','013','0.88')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 19:55:00','013','0.88')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 20:55:00','013','1.01')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 19:55:00','014','0.32')
;insert into tmp_liujg_packed_based values('20190402','2019/04/02 20:55:00','014','1.01')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 09:50:00','001','1.18')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 09:55:00','002','0.32')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 10:55:00','002','0.32')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 10:00:00','007','0.88')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 10:55:00','015','0.88')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 10:00:00','015','0.32')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 14:00:00','017','1.01')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 15:00:00','017','1.01')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 15:30:00','018','0.88')
;insert into tmp_liujg_packed_based values('20190501','2019/05/01 16:30:00','018','0.68')
;insert into tmp_liujg_packed_based values('20190601','2019/06/01 09:50:00','008','1.38')
;insert into tmp_liujg_packed_based values('20190601','2019/06/01 14:50:00','017','0.88')
;insert into tmp_liujg_packed_based values('20190601','2019/06/01 11:50:00','019','1.01')
;insert into tmp_liujg_packed_based values('20190601','2019/06/01 13:50:00','019','0.88')
;insert into tmp_liujg_packed_based values('20190601','2019/06/01 14:50:00','019','0.68')
;insert into tmp_liujg_packed_based values('20190602','2019/06/02 09:50:00','021','0.38')
;insert into tmp_liujg_packed_based values('20190602','2019/06/02 13:50:00','012','0.88')
;insert into tmp_liujg_packed_based values('20190602','2019/06/02 15:50:00','012','1.12')
;insert into tmp_liujg_packed_based values('20190602','2019/06/02 13:59:00','023','0.88')
;insert into tmp_liujg_packed_based values('20190602','2019/06/02 15:50:00','023','1.01')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 09:50:00','023','0.38')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 13:50:00','023','0.78')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 14:50:00','008','0.68')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 14:50:00','024','0.68')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 15:50:00','024','1.68')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 14:50:00','026','0.68')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 18:50:00','026','1.68')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 14:50:00','027','0.68')
;insert into tmp_liujg_packed_based values('20190701','2019/07/01 19:35:00','027','1.11')
;insert into tmp_liujg_packed_based values('20190702','2019/07/02 09:50:00','026','0.78')
;insert into tmp_liujg_packed_based values('20190702','2019/07/02 11:50:00','026','0.78')
;insert into tmp_liujg_packed_based values('20190702','2019/07/02 13:50:00','028','1.01')
;insert into tmp_liujg_packed_based values('20190702','2019/07/02 14:35:00','028','0.88')
;insert into tmp_liujg_packed_based values('20190702','2019/07/02 15:35:00','028','0.33')
3.解答第一问
计算2019年6月1日至今,每日DAU(活跃用户量,即有登陆的用户)
Select imp_date,count(qimei) dau from tmp_liujg_dau_based
Group by imp_date
Having imp_date > '20190601'
4.解答第二问
计算20190601至今,每日领取红包的新用户数,老用户数,及人均领取金额,人均领取次数
Select
aa. imp_date,
aa.is_new,count(distinct aa.qimei) 领取红包人数,
sum(aa.add_money)/count(distinct aa.qimei) 人均领取金额,
count(aa.report_time)/count(distinct aa.qimei) 人均领取次数
from
(Select a. imp_date,a.qimei,a.add_money,a.report_time,
Case when b.is_new = 1 then '新用户' when b.is_new = 0 then '老用户' else '领取红包但未登陆'end is_new
from
tmp_liujg_packed_based a
Left join tmp_liujg_dau_based b on a.imp_date = b.imp_date and a.qimei = b.qimei where a.imp_date > '20190601')aa
Group by aa.imp_date,aa.is_new
5.解答第三问
计算2019年3月,每个月按领红包取天数为1、2、3……30、31天区分,计算取每个月领取红包的用户数,人均领取金额,人均领取次数
Select
left(imp_date,6) 月份,
count(distinct imp_date) 每月领取天数,
count(distinct qimei) 每月用户数,
sum(add_money)/count(distinct qimei) 每月人均领取金额,
count(report_time)/count(distinct qimei) 每月人均领取次数
from
tmp_liujg_packed_based
where imp_date >= '20190301'
Group by left(imp_date,6)
6.解答第四问
计算2019年3月,每个月领过红包用户和未领红包用户的数量,平均月活跃天数(即本月平均活跃多少天)
Select
left(cc.imp_date,6) 月份,
cc.is_packet_user 红包用户,
Count(distinct cc.qimei) 用户数量,
Count(is_packet_user)/Count(distinct cc.qimei) 月活跃天
from
(Select
a.imp_date, a.qimei,b.qimei hb_qimei,
Case when b.qimei is not null then '红包用户' else '非红包用户' end is_packet_user,
Case when b.qimei is not null then b.qimei else a.qimei end is_qimei
from tmp_liujg_dau_based a
Left join
(select distinct left(imp_date,6) imp_date ,qimei from tmp_liujg_packed_based where imp_date >= '20190301' )b
On left(a.imp_date,6) = b.imp_date and a.qimei = b.qimei)cc
Group by left(cc.imp_date,6),cc.is_packet_user
7.解答第五问
计算2019年3月至今,每个月活跃用户的注册日期,2019年3月1日前注册的用户日期填空即可
Select left(a.imp_date,6) 月份, a.qimei, b.用户注册日期
From tmp_liujg_dau_based a
Left join
(Select qimei,min(imp_date) 用户注册日期 from
tmp_liujg_dau_based
where is_new = 1 and imp_date >= '20190301' group by qimei ) b on a.qimei = b.qimei
Where a.imp_date >='20190301'
8.解答第六问
计算2019年6月至今,每日的用户次日留存率,领取红包用户的次日留存,未领取红包用户的次日留存率
次日留存率:(当天新增的用户中,在注册的第2天还登录的用户数/第一天新增总用户数)
//备注:活跃用户表、红包参与领取表,仅有3月1日,3月2日,3月3日,4月1日,4月2日,5月1日,6月1日,6月3日,7月1日,7月5日的数据
Select
cc.imp_date 日期,
count(distinct cc.qimei) 当日用户数,
count(distinct cc.Hb_qimei)/count(distinct cc.qimei) 当日领取红包用户占比,
count(distinct cc.qimei_lc)/count(distinct cc.qimei) 次日留存率,
count(distinct cc.hb_qimei_lc)/count(distinct cc.hb_qimei) 当日领取红包用户留存率
from
(Select aa.imp_date,
aa.qimei,--当日登陆用户
aa.Hb_qimei,--当日领取红包用户
bb.qimei qimei_lc,--次日依然登陆的用户
Case when aa.Hb_qimei = bb.qimei and aa.imp_date = bb.imp_date-1 then aa.Hb_qimei else null end hb_qimei_lc --领红包并次日登陆的用户
--------------hb_qimei_lc 第一天领取红包第二天还留存的用户,第一天红包用户ID关联第二天活跃用户ID
from
(Select a. imp_date, a.qimei, b.qimei as Hb_qimei
from tmp_liujg_dau_based a
Left join tmp_liujg_packed_based b on a.imp_date = b.imp_date and a.qimei = b.qimei
Where a.imp_date>='20190301'
)aa
Left join
(Select a.imp_date,a.qimei,b.qimei as Hb_qimei
from tmp_liujg_dau_based a
Left join tmp_liujg_packed_based b on a.imp_date = b.imp_date and a.qimei = b.qimei
Where a.imp_date>= '20190301'
)bb
On aa.imp_date = bb.imp_date-1 and aa.qimei = bb.qimei) cc
Group by cc.imp_date
9.解答第七问
计算2019年6月1日至今,每日新用户领取得第一个红包的金额
备注:由于数据较少这里选择的时3月1号之后的数据
Select
a.imp_date 日期,
a.qimei 用户ID,
b.report_time 第一次领取红包时间,
b.add_money 领取红包金额
from
(select * from tmp_liujg_dau_based where is_new = '1' and Imp_date >='20190301') a
Left join
(select * from tmp_liujg_packed_based where concat_ws('-',imp_date,qimei,report_time) in
(select concat_ws('-',imp_date,qimei,min(report_time)) from tmp_liujg_packed_based group by imp_date,qimei))b
on a.imp_date = b.imp_date and a.qimei = b.qimei
10.解答第八问
计算2019年3月1日至今,每个新用户领取的第一个红包和第二个红包的时间差(只计算注册当日有领取红包的用户,注册当日及以后的DAU表中新用户为1的用户)
Select
aaa.imp_date 注册日期,
aaa.qimei,
aaa.report_time 新用户第一次领取红包的时间,
bbb.第二次领红包时间,
TIMESTAMPDIFF(Minute,aaa.report_time,bbb.第二次领红包时间) 第一次第二次红包时间间隔_分钟
from
(
Select a.imp_date,a.qimei,b.report_time from
(select * from tmp_liujg_dau_based where is_new = '1' and Imp_date >='20190301') a
inner join
(select * from tmp_liujg_packed_based where concat_ws('-',imp_date,qimei,report_time) in
(select concat_ws('-',imp_date,qimei,min(report_time)) from tmp_liujg_packed_based group by imp_date,qimei))b
on a.imp_date = b.imp_date and a.qimei = b.qimei
)aaa
Left join
(Select min(bb.imp_date) imp_date ,--注册日期
bb.qimei,
min(report_time) 第二次领红包时间
from
(Select imp_date,qimei,report_time from tmp_liujg_packed_based where Imp_date >='20190301'
-----筛选3月1日后新用户领红包的记录-----------
And qimei in
(Select distinct qimei from tmp_liujg_dau_based where is_new = '1' and Imp_date >='20190301')
--------排除3月1日新用户第一次领取时间的记录---------------
And concat_ws('-',imp_date,qimei,report_time)
Not in
(Select concat_ws('-',a.imp_date,a.qimei,b.report_time) from
(select * from tmp_liujg_dau_based where is_new = '1' and Imp_date >='20190301') a
inner join
(select * from tmp_liujg_packed_based where concat_ws('-',imp_date,qimei,report_time) in
(select concat_ws('-',imp_date,qimei,min(report_time)) from tmp_liujg_packed_based group by imp_date,qimei))b
on a.imp_date = b.imp_date and a.qimei = b.qimei))bb
group by bb.qimei)bbb
On aaa.qimei = bbb.qimei and aaa.imp_date = bbb.imp_date