20190728工作进展

2019-07-28  本文已影响0人  Songger
  1. 现在的结果在hs_tmp_79

tf.flags.DEFINE_boolean("infer_query", True, "infer query")
tf.flags.DEFINE_boolean("infer_doc", False, "infer doc")

truncate table hs_dssm_result_query_0;
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="inference_v5.py" -Dcluster='{"worker":{"count":1, "cpu":200, "memory":4000}, "ps":{"count":1, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_dssm_inf_querys" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_query_0" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=5000 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video.ckpt-3 --infer_query=True --infer_doc=False" -DuseSparseClusterSchema=True;

truncate table hs_dssm_result_title_0;
truncate table hs_dssm_result_query_0;
pai -name tensorflow140 -Dscript="file:///home/hengsong/origin_deep_cluster_odps_8.tar.gz" -DentryFile="inference_v5.py" -Dcluster='{"worker":{"count":10, "cpu":200, "memory":4000}, "ps":{"count":1, "cpu":200, "memory":5000}}' -Dtables="odps://graph_embedding/tables/hs_tmp_dssm_inf_titles" -Doutputs="odps://graph_embedding/tables/hs_dssm_result_title_0" -DcheckpointDir="oss://bucket-automl/hengsong/?role_arn=acs:ram::1293303983251548:role/graph2018&host=cn-hangzhou.oss-internal.aliyun-inc.com" -DuserDefinedParameters="--learning_rate=3e-4 --batch_size=5000 --is_save_model=True --attention_type=1 --num_epochs=1 --ckpt=hs_ugc_video.ckpt-3 --infer_query=False --infer_doc=True" -DuseSparseClusterSchema=True;

  1. 结果
    hs_tmp_dssm_inf_querys
    hs_tmp_dssm_inf_titles
    hs_dssm_result_0

create table hs_tmp_77 as
select c., d.words as video_words from
(select a.
, b.words as query_words from
(select video_id, query_id, score from hs_dssm_result_0 where score > 0.5)a join (select * from hs_tmp_dssm_inf_querys)b on a.query_id == b.id)c join (select * from hs_tmp_dssm_inf_titles)d on c.video_id == d.id;

hs_dssm_result_2

drop table hs_tmp_79;
yes
create table hs_tmp_79 as
select c., d.words as video_words from
(select a.
, b.words as query_words from
(select video_id, query_id, score from hs_dssm_result_2 where score < 0.3)a join (select * from hs_tmp_dssm_inf_querys)b on a.query_id == b.id)c join (select * from hs_tmp_dssm_inf_titles)d on c.video_id == d.id;

  1. 测试knn

create table hs_tmp_80 as select distinct video_id, video_emb from hs_dssm_result_0;
drop table hs_tmp_81;
yes
create table hs_tmp_81 as select distinct query_id, query_emb from hs_dssm_result_0;
create table hs_tmp_82 as select video_id as id, video_emb as emb from hs_tmp_80;
create table hs_tmp_83 as select query_id as id, query_emb as emb from hs_tmp_81;

create table
PAI -name am_vsearch_nearest_neighbor_014 -project algo_market
-Dcluster="{"worker":{"count":1,"gpu":100}}"
-Ddim=100
-Did_col="id"
-Dvector_col="emb"
-Dinput_slice=1
-Dtopk=50
-Dnprob=1024
-Dmetric="l2"
-Dinput="odps://graph_embedding/tables/hs_tmp_82"
-Dquery="odps://graph_embedding/tables/hs_dssm_result_query_0"
-Doutputs="odps://graph_embedding/tables/hs_tmp_84"
-DenableDynamicCluster=true -DmaxTrainingTimeInHour=60;

得到title的emb

http://logview.odps.aliyun-inc.com:8080/logview/?h=http://service-corp.odps.aliyun-inc.com/api&p=graph_embedding&i=2019072814045070gdmtqtvj2_ae7d7f2c_163b_4f17_9573_6b9171be1556&token=RUZ4QlBtbVNraTNsUkhMczJPN2RpcU1OL2drPSxPRFBTX09CTzoxMjkzMzAzOTgzMjUxNTQ4LDE1NjQ5Mjc0OTIseyJTdGF0ZW1lbnQiOlt7IkFjdGlvbiI6WyJvZHBzOlJlYWQiXSwiRWZmZWN0IjoiQWxsb3ciLCJSZXNvdXJjZSI6WyJhY3M6b2RwczoqOnByb2plY3RzL2dyYXBoX2VtYmVkZGluZy9pbnN0YW5jZXMvMjAxOTA3MjgxNDA0NTA3MGdkbXRxdHZqMl9hZTdkN2YyY18xNjNiXzRmMTdfOTU3M182YjkxNzFiZTE1NTYiXX1dLCJWZXJzaW9uIjoiMSJ9

上一篇 下一篇

猜你喜欢

热点阅读