【SQL】抽样

2020-11-24  本文已影响0人  7ccc099f4608

随机采样


--获取10%的随机样本数据
SELECT t.*
FROM <t> t
WHERE RAND() < 0.1

分层采样

hash 版

select 
    bins, score, rank1, st_count
from 
(
    select 
        round(score, 3) as bins, 
        score,
        count(*) over (partition by hash(level)  ) as st_count, 
        rank() over (partition by hash(level) order by rand()) as rank1
    FROM 
        t_table
) A 
where rank1 <= 0.1 * st_count; 

非hash 版

select 
    bins, score, rank1, st_count
from 
(
    select 
        round(score, 3) as bins, 
        score,
        count(*) over (partition by level  ) as st_count, 
        rank() over (partition by level order by rand()) as rank1
    FROM 
        t_table
) A 
where rank1 <= 0.1 * st_count; 
上一篇 下一篇

猜你喜欢

热点阅读