MySQL 定义连续值范围的开始点和结束点
备注:测试数据库版本为MySQL 8.0
一.需求
mysql> select * from v;
+---------+------------+------------+
| proj_id | proj_start | proj_end |
+---------+------------+------------+
| 1 | 2020-10-01 | 2020-10-02 |
| 2 | 2020-10-02 | 2020-10-03 |
| 3 | 2020-10-03 | 2020-10-04 |
| 4 | 2020-10-04 | 2020-10-05 |
| 5 | 2020-10-06 | 2020-10-07 |
| 6 | 2020-10-16 | 2020-10-17 |
| 7 | 2020-10-17 | 2020-10-18 |
| 8 | 2020-10-18 | 2020-10-19 |
| 9 | 2020-10-19 | 2020-10-20 |
| 10 | 2020-10-21 | 2020-10-22 |
| 11 | 2020-10-26 | 2020-10-27 |
| 12 | 2020-10-27 | 2020-10-28 |
| 13 | 2020-10-28 | 2020-10-29 |
| 14 | 2020-10-29 | 2020-10-30 |
+---------+------------+------------+
希望得到如下结果集
+----------+------------+------------+
| proj_grp | proj_start | proj_end |
+----------+------------+------------+
| 1 | 2020-10-01 | 2020-10-05 |
| 2 | 2020-10-06 | 2020-10-07 |
| 3 | 2020-10-16 | 2020-10-20 |
| 4 | 2020-10-21 | 2020-10-22 |
| 5 | 2020-10-26 | 2020-10-30 |
+----------+------------+------------+
二.解决方案
首先,必须识别它们的范围。proj_start和proj_end的值定义了一行的范围,要把某行看做“连续的”或者属于一个组,
则它的PROJ_START值一定要等于它前一行的PROJ_END值。
如果某个行的PROJ_START值不等于前一行的PROJ_END值,而且它的PROJ_END值也不等于下一行的PROJ_START值,则它就是一个单独的组。
确定了范围之后,就需要将属于同意范围的行划成同一组,并且只返回组开始和结束的点。
create view v2
as
select a.*,
case
when (
select b.proj_id
from v b
where a.proj_start = b.proj_end
)
is not null then 0 else 1
end as flag
from v a;
select * from v2;
select proj_grp,
min(proj_start) as proj_start,
max(proj_end) as proj_end
from (
select a.proj_id,a.proj_start,a.proj_end,
(select sum(b.flag)
from v2 b
where b.proj_id <= a.proj_id) as proj_grp
from v2 a
) x
group by proj_grp;
测试记录
mysql>
mysql> create view v2
-> as
-> select a.*,
-> case
-> when (
-> select b.proj_id
-> from v b
-> where a.proj_start = b.proj_end
-> )
-> is not null then 0 else 1
-> end as flag
-> from v a;
Query OK, 0 rows affected (0.01 sec)
mysql>
mysql> select * from v2;
+---------+------------+------------+------+
| proj_id | proj_start | proj_end | flag |
+---------+------------+------------+------+
| 1 | 2020-10-01 | 2020-10-02 | 1 |
| 2 | 2020-10-02 | 2020-10-03 | 0 |
| 3 | 2020-10-03 | 2020-10-04 | 0 |
| 4 | 2020-10-04 | 2020-10-05 | 0 |
| 5 | 2020-10-06 | 2020-10-07 | 1 |
| 6 | 2020-10-16 | 2020-10-17 | 1 |
| 7 | 2020-10-17 | 2020-10-18 | 0 |
| 8 | 2020-10-18 | 2020-10-19 | 0 |
| 9 | 2020-10-19 | 2020-10-20 | 0 |
| 10 | 2020-10-21 | 2020-10-22 | 1 |
| 11 | 2020-10-26 | 2020-10-27 | 1 |
| 12 | 2020-10-27 | 2020-10-28 | 0 |
| 13 | 2020-10-28 | 2020-10-29 | 0 |
| 14 | 2020-10-29 | 2020-10-30 | 0 |
+---------+------------+------------+------+
14 rows in set (0.00 sec)
mysql>
mysql> select proj_grp,
-> min(proj_start) as proj_start,
-> max(proj_end) as proj_end
-> from (
-> select a.proj_id,a.proj_start,a.proj_end,
-> (select sum(b.flag)
-> from v2 b
-> where b.proj_id <= a.proj_id) as proj_grp
-> from v2 a
-> ) x
-> group by proj_grp;
+----------+------------+------------+
| proj_grp | proj_start | proj_end |
+----------+------------+------------+
| 1 | 2020-10-01 | 2020-10-05 |
| 2 | 2020-10-06 | 2020-10-07 |
| 3 | 2020-10-16 | 2020-10-20 |
| 4 | 2020-10-21 | 2020-10-22 |
| 5 | 2020-10-26 | 2020-10-30 |
+----------+------------+------------+
5 rows in set (0.00 sec)
mysql>