nb sql
-- ## 模型 计算 p(x) p(c)
drop table ym_consult_drug_new_data_nb_pc;
create table ym_consult_drug_new_data_nb_pc
as
select name_cns,count(*) as pc_count
from ym_consult_drug_new_data_train
group by name_cns;
drop table ym_consult_drug_new_data_nb_px;
create table ym_consult_drug_new_data_nb_px
as
select gender,dept_name,keyword,name_cns,count(*) as px_count
from ym_consult_drug_new_data_train
group by gender,dept_name,keyword,name_cns;
drop table ym_consult_drug_new_data_nb_pc1;
create table ym_consult_drug_new_data_nb_pc1
as
select gender,dept_name,name_cns,count(*) as cn_group_count
from ym_consult_drug_new_data_train
group by gender,dept_name,name_cns;
drop table ym_consult_drug_new_data_nb_pc2;
create table ym_consult_drug_new_data_nb_pc2
as
select gender,dept_name,keyword,count(*) as tag_group_count
from ym_consult_drug_new_data_train
group by gender,dept_name,keyword;
drop table ym_consult_drug_new_data_nb_pxc_t1;
create table ym_consult_drug_new_data_nb_pxc_t1
as
select a.*,b.pc_count,b1.cn_group_count,b2.tag_group_count
from ym_consult_drug_new_data_nb_px a
left outer join
ym_consult_drug_new_data_nb_pc b
on a.name_cns=b.name_cns
left outer join
ym_consult_drug_new_data_nb_pc1 b1
on a.name_cns=b1.name_cns
and a.gender=b1.gender
and a.dept_name=b1.dept_name
left outer join
ym_consult_drug_new_data_nb_pc2 b2
on a.keyword=b2.keyword
and a.gender=b2.gender
and a.dept_name=b2.dept_name;
drop table ym_consult_drug_new_data_nb_pxc;
create table ym_consult_drug_new_data_nb_pxc
as
select a.*,
coalesce(round(px_count/pc_count,4),0) as p,
coalesce(round(px_count*10/cn_group_count,4),0) as p1,
coalesce(round(px_count/tag_group_count,4),0) as p2,
coalesce(round(px_count*px_count*100/tag_group_count/cn_group_count,4),0) as pr,
coalesce(round((px_count+1)*1000/(pc_count+448886),4),0) as p1_l,
coalesce(round((px_count+1)*1000/(pc_count+869),4),0) as p2_l,
coalesce(round((px_count+1)*1000/(cn_group_count+448886),4),0) as p3_l,
coalesce(round((px_count+1)*1000/(cn_group_count+869),4),0) as p4_l,
coalesce(round((px_count+1)*1000/(tag_group_count+448886),4),0) as p5_l,
coalesce(round((px_count+1)*1000/(tag_group_count+869),4),0) as p6_l,
coalesce(round((px_count+1)*(px_count+1)*10000/(tag_group_count+869)/(cn_group_count+869),4),0) as pr2
from ym_consult_drug_new_data_nb_pxc_t1 a;
-- 每个关键词只保留最大的10个
drop table ym_consult_drug_new_data__nb_pxc_sort;
create table ym_consult_drug_new_data__nb_pxc_sort
as
select *
from
(select *,ROW_NUMBER() OVER(PARTITION BY gender,dept_name,keyword ORDER BY pr desc) AS rn
from ym_consult_drug_new_data_nb_pxc ) a
where rn<=10;
-- 输出模型文件
drop table mid_consult_drug_recommend;
create table mid_consult_drug_recommend
as
select gender,"all" as age,dept_name,keyword,name_cns as name_cn,pr as prob
from ym_consult_drug_new_data__nb_pxc_sort;