nb sql

2018-08-10  本文已影响0人  e237262360d2

-- ## 模型 计算 p(x) p(c)

drop table ym_consult_drug_new_data_nb_pc;

create table ym_consult_drug_new_data_nb_pc

as

select name_cns,count(*) as pc_count

from ym_consult_drug_new_data_train

group by name_cns;

drop table ym_consult_drug_new_data_nb_px;

create table ym_consult_drug_new_data_nb_px

as

select gender,dept_name,keyword,name_cns,count(*) as px_count

from ym_consult_drug_new_data_train

group by gender,dept_name,keyword,name_cns;

drop table ym_consult_drug_new_data_nb_pc1;

create table ym_consult_drug_new_data_nb_pc1

as

select gender,dept_name,name_cns,count(*) as cn_group_count

from ym_consult_drug_new_data_train

group by gender,dept_name,name_cns;

drop table ym_consult_drug_new_data_nb_pc2;

create table ym_consult_drug_new_data_nb_pc2

as

select gender,dept_name,keyword,count(*) as tag_group_count

from ym_consult_drug_new_data_train

group by gender,dept_name,keyword;

drop table ym_consult_drug_new_data_nb_pxc_t1;

create table ym_consult_drug_new_data_nb_pxc_t1

as

select a.*,b.pc_count,b1.cn_group_count,b2.tag_group_count

from ym_consult_drug_new_data_nb_px a

left outer join

ym_consult_drug_new_data_nb_pc b

on a.name_cns=b.name_cns

left outer join

ym_consult_drug_new_data_nb_pc1 b1

on a.name_cns=b1.name_cns

and a.gender=b1.gender

and a.dept_name=b1.dept_name

left outer join

ym_consult_drug_new_data_nb_pc2 b2

on a.keyword=b2.keyword

and a.gender=b2.gender

and a.dept_name=b2.dept_name;

drop table ym_consult_drug_new_data_nb_pxc;

create table ym_consult_drug_new_data_nb_pxc

as

select a.*,

coalesce(round(px_count/pc_count,4),0) as p,

coalesce(round(px_count*10/cn_group_count,4),0) as p1,

coalesce(round(px_count/tag_group_count,4),0) as p2,

coalesce(round(px_count*px_count*100/tag_group_count/cn_group_count,4),0) as pr,

coalesce(round((px_count+1)*1000/(pc_count+448886),4),0) as p1_l,

coalesce(round((px_count+1)*1000/(pc_count+869),4),0) as p2_l,

coalesce(round((px_count+1)*1000/(cn_group_count+448886),4),0) as p3_l,

coalesce(round((px_count+1)*1000/(cn_group_count+869),4),0) as p4_l,

coalesce(round((px_count+1)*1000/(tag_group_count+448886),4),0) as p5_l,

coalesce(round((px_count+1)*1000/(tag_group_count+869),4),0) as p6_l,

coalesce(round((px_count+1)*(px_count+1)*10000/(tag_group_count+869)/(cn_group_count+869),4),0) as pr2

from ym_consult_drug_new_data_nb_pxc_t1 a;

-- 每个关键词只保留最大的10个

drop table ym_consult_drug_new_data__nb_pxc_sort;

create table ym_consult_drug_new_data__nb_pxc_sort

as

select *

from

(select *,ROW_NUMBER() OVER(PARTITION BY gender,dept_name,keyword ORDER BY pr desc) AS rn

from ym_consult_drug_new_data_nb_pxc ) a

where rn<=10;

-- 输出模型文件

drop table mid_consult_drug_recommend;

create table mid_consult_drug_recommend

as

select gender,"all" as age,dept_name,keyword,name_cns as name_cn,pr as prob

from ym_consult_drug_new_data__nb_pxc_sort;

上一篇 下一篇

猜你喜欢

热点阅读