20190803工作进展
- 测试新得到的embedding的效果
hs_tmp_156 : | query_id | title_id | query | title |
hs_tmp_160 (query) : | id | emb |
hs_tmp_161 (query) : | id | emb |
create table hs_tmp_163
as select c., d.emb as title_emb from
(select a., b.emb as query_emb from (select * from hs_tmp_156)a left join (select * from hs_tmp_160)b on a.query_id == b.id)c left join (select * from hs_tmp_161)d on c.title_id == d.id;
hs_dssm_train_v2_0 : | query_id | item_id | label |
hs_tmp_124 : | se_keyword_mainse_ws | title_mainse_ws | label |
drop table hs_tmp_163;
yes
create table hs_tmp_163
as select c.se_keyword_mainse_ws, d.emb as title_mainse_ws, c.label from
(select a.*, b.emb as se_keyword_mainse_ws from (select * from hs_dssm_train_v2_0)a left join (select * from hs_tmp_160)b on a.query_id == b.id)c left join (select * from hs_tmp_161)d on c.item_id == d.id;
drop table hs_train_data_dssm_v2_3;
yes
drop table hs_test_data_dssm_v2_3;
yes
PAI -name split -project algo_public
-DinputTableName=graph_embedding.hs_tmp_163
-Doutput1TableName=graph_embedding.hs_train_data_dssm_v2_3
-Doutput2TableName=graph_embedding.hs_test_data_dssm_v2_3
-Dfraction=0.8
-DmemSizePerCore=4096
-DcoreNum=100
;
- 还是先测试dssm网络吧
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="train_v6.py" -Dcluster='{"worker":{"count":30, "cpu":200, "memory":4000}, "ps":{"count":10, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_train_data_dssm_v2_3,odps://graph_embedding/tables/hs_test_data_dssm_v2_3" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=1024 --is_save_model=True --attention_type=1 --num_epochs=10 --ckpt=hs_ugc_video_3e_5.ckpt" -DuseSparseClusterSchema=True;