sql面试题(二)

2023-05-04  本文已影响0人  Yobhel
image.png
id      dt              lowcarbon
1001    2021-12-12      123
1002    2021-12-12      45
1001    2021-12-13      43
1001    2021-12-13      45
1001    2021-12-13      23
1002    2021-12-14      45
1001    2021-12-14      230
1002    2021-12-15      45
1001    2021-12-15      23
找出连续3天及以上减少碳排放量在100以上的用户

1)按照用户ID及时间字段分组,计算每个用户单日减少的碳排放量
select
    id,
    dt,
    sum(lowcarbon) lowcarbon
from test1
group by id,dt
having lowcarbon>100;t1
1001    2021-12-12      123
1001    2021-12-13      111
1001    2021-12-14      230

等差数列法:两个等差数列如果等差相同,则相同位置的数据相减等到的结果相同
2)按照用户分组,同时按照时间排序,计算每条数据的Rank值
select
    id,
    dt,
    lowcarbon,
    rank() over(partition by id order by dt) rk
from t1;t2

3)将每行数据中的日期减去Rank值
select
    id,
    dt,
    lowcarbon,
    date_sub(dt,rk) flag
from t2;t3

4)按照用户及Flag分组,求每个组有多少条数据,并找出大于等于3条的数据
select
    id,
    flag,
    count(*) ct
from t3
group by id,flag
having ct>=3;

5)最终HQL
select
    id,
    flag,
    count(*) ct
from 
(select
    id,
    dt,
    lowcarbon,
    date_sub(dt,rk) flag
from 
(select
    id,
    dt,
    lowcarbon,
    rank() over(partition by id order by dt) rk
from 
(select
    id,
    dt,
    sum(lowcarbon) lowcarbon
from test1
group by id,dt
having lowcarbon>100)t1)t2)t3
group by id,flag
having ct>=3;
image.png
id      ts
1001    17523641234
1001    17523641256
1002    17523641278
1001    17523641334
1002    17523641434
1001    17523641534
1001    17523641544
1002    17523641634
1001    17523641638
1001    17523641654
时间间隔小于60秒,则分为同一个组
1001    17523641234     1
1001    17523641256     1
1001    17523641334     2
1001    17523641534     3
1001    17523641544     3
1001    17523641638     4
1001    17523641654     4
1002    17523641278     1
1002    17523641434     2
1002    17523641634     3

1)将上一行时间数据下移
lead:领导
lag:延迟
select
    id,
    ts,
    lag(ts,1,0) over(partition by id order by ts) lagts
from
    test2;t1
1001    17523641234 0
1001    17523641256 17523641234
1001    17523641334 17523641256
1001    17523641534 17523641334
1001    17523641544 17523641534
1001    17523641638 17523641544
1001    17523641654 17523641638
1002    17523641278 0
1002    17523641434 17523641278
1002    17523641634 17523641434

2)将当前行时间数据减去上一行时间数据
select
    id,
    ts,
    ts-lagts tsdiff
from
    t1;t2

select
    id,
    ts,
    ts-lagts tsdiff
from
    (select
    id,
    ts,
    lag(ts,1,0) over(partition by id order by ts) lagts
from
    test2)t1;t2
1001    17523641234 17523641234
1001    17523641256 22
1001    17523641334 78
1001    17523641534 200
1001    17523641544 10
1001    17523641638 94
1001    17523641654 16
1002    17523641278 17523641278
1002    17523641434 156
1002    17523641634 200

3)计算每个用户范围内从第一行到当前行tsdiff大于等于60的总个数(分组号)
select
    id,
    ts,
    sum(if(tsdiff>=60,1,0)) over(partition by id order by ts) groupid
from
    t2;

4)最终HQL
select
    id,
    ts,
    sum(if(tsdiff>=60,1,0)) over(partition by id order by ts) groupid
from
    (select
    id,
    ts,
    ts-lagts tsdiff
from
    (select
    id,
    ts,
    lag(ts,1,0) over(partition by id order by ts) lagts
from
    test2)t1)t2;
image.png
id      dt
1001    2021-12-12
1001    2021-12-13
1001    2021-12-14
1001    2021-12-16
1001    2021-12-19
1001    2021-12-20

1002    2021-12-12
1002    2021-12-16
1002    2021-12-17

结果
1001    5
1002    2

思路一:等差数列
1001    2021-12-12  1
1001    2021-12-13  2
1001    2021-12-14  3
1001    2021-12-16  4
1001    2021-12-19  5
1001    2021-12-20  6

1001    2021-12-12  1   2021-12-11
1001    2021-12-13  2   2021-12-11
1001    2021-12-14  3   2021-12-11
1001    2021-12-16  4   2021-12-12
1001    2021-12-19  5   2021-12-14
1001    2021-12-20  6   2021-12-14

1001    2021-12-11  3
1001    2021-12-12  1
1001    2021-12-14  1

1001    2021-12-11  3   1
1001    2021-12-12  1   2
1001    2021-12-14  1   3

1001    2021-12-11  3   1   2021-12-10
1001    2021-12-12  1   2   2021-12-10
1001    2021-12-14  1   3   2021-12-11

思路二:分组
1001    2021-12-12
1001    2021-12-13
1001    2021-12-14
1001    2021-12-16
1001    2021-12-19
1001    2021-12-20

1)将上一行时间数据下移
1001    2021-12-12  1970-01-01
1001    2021-12-13  2021-12-12
1001    2021-12-14  2021-12-13
1001    2021-12-16  2021-12-14
1001    2021-12-19  2021-12-16
1001    2021-12-20  2021-12-19
select
    id,
    dt,
    lag(dt,1,'1970-01-01') over(partition by id order by dt) lagdt
from
    test3;t1

2)将当前行时间减去上一行时间数据(datediff(dt1,dt2))
1001    2021-12-12  564564
1001    2021-12-13  1
1001    2021-12-14  1
1001    2021-12-16  2
1001    2021-12-19  3
1001    2021-12-20  1
select
    id,
    dt,
    datediff(dt,lagdt) flag
from
    t1;t2

3)按照用户分组,同时按照时间排序,计算从第一行到当前行大于2的数据的总条数(sum(if(flag>2,1,0)))
1001    2021-12-12  1
1001    2021-12-13  1
1001    2021-12-14  1
1001    2021-12-16  1
1001    2021-12-19  2
1001    2021-12-20  2
select
    id,
    dt,
    sum(if(flag>2,1,0)) over(partition by id order by dt) flag
from
    t2;t3

4)按照用户和flag分组,求最大时间减去最小时间并加上1
select
    id,
    flag,
    datediff(max(dt),min(dt)) days
from
    t3
group by id,flag;t4

5)取连续登录天数的最大值
select
    id,
    max(days)+1
from
    t4
group by id;

6)最终HQL
select
    id,
    max(days)+1
from
    (select
    id,
    flag,
    datediff(max(dt),min(dt)) days
from
    (select
    id,
    dt,
    sum(if(flag>2,1,0)) over(partition by id order by dt) flag
from
    (select
    id,
    dt,
    datediff(dt,lagdt) flag
from
    (select
    id,
    dt,
    lag(dt,1,'1970-01-01') over(partition by id order by dt) lagdt
from
    test3)t1)t2)t3
group by id,flag)t4
group by id;
image.png
id      stt         edt
oppo    2021-06-05  2021-06-09
oppo    2021-06-11  2021-06-21

vivo    2021-06-05  2021-06-15
vivo    2021-06-09  2021-06-21

redmi   2021-06-05  2021-06-21
redmi   2021-06-09  2021-06-15
redmi   2021-06-17  2021-06-26

huawei  2021-06-05  2021-06-26
huawei  2021-06-09  2021-06-15
huawei  2021-06-17  2021-06-21

1)将当前行以前的数据中最大的edt放置当前行
select
    id,
    stt,
    edt,
    max(edt) over(partition by id order by stt rows between UNBOUNDED PRECEDING and 1 PRECEDING) maxEdt
from test4;t1
redmi   2021-06-05  2021-06-21  null
redmi   2021-06-09  2021-06-15  2021-06-21
redmi   2021-06-17  2021-06-26  2021-06-21

2)比较开始时间与移动下来的数据,如果开始时间大,则不需要操作,
反之则需要将移动下来的数据加一替换当前行的开始时间
如果是第一行数据,maxEDT为null,则不需要操作
select
    id,
    if(maxEdt is null,stt,if(stt>maxEdt,stt,date_add(maxEdt,1))) stt,
    edt
from t1;t2
redmi   2021-06-05  2021-06-21
redmi   2021-06-22  2021-06-15
redmi   2021-06-22  2021-06-26

3)将每行数据中的结束日期减去开始日期
select
    id,
    datediff(edt,stt) days
from
    t2;t3
redmi   16
redmi   -4
redmi   4

4)按照品牌分组,计算每条数据加一的总和
select
    id,
    sum(if(days>=0,days+1,0)) days
from
    t3
group by id;
redmi   22

5)最终HQL
select
    id,
    sum(if(days>=0,days+1,0)) days
from
    (select
    id,
    datediff(edt,stt) days
from
    (select
    id,
    if(maxEdt is null,stt,if(stt>maxEdt,stt,date_add(maxEdt,1))) stt,
    edt
from 
    (select
    id,
    stt,
    edt,
    max(edt) over(partition by id order by stt rows between UNBOUNDED PRECEDING and 1 PRECEDING) maxEdt
from test4)t1)t2)t3
group by id;
image.png
image.png
上一篇 下一篇

猜你喜欢

热点阅读