CapsE胶囊网络补全代码调试记录

2019-11-13  本文已影响0人  布口袋_天晴了
网络结构

GitHub: CapsE
论文: A Capsule Network-based Embedding Model for Knowledge Graph Completion and Search Personalization

整个模型思路简介:使用预训练的实体和关系向量作为模型的Embedding层,对成立的三元组进行打分,让这些三元组的打分尽量的靠近1。打分的过程是由胶囊网络进行处理的,第一层是一个卷积操作,第二次是一个全连接操作。

胶囊网络是卷积神经网络的改进版本,内部由使用路由算法。

1.数据预处理(CapsE.py)
#加载训练数据集/评估数据集/测试数据集
#train训练数据集,存在的三元组,标记1
#valid评估数据集,存在的三元组,标记1
#test测试数据集,存在的三元组,标记1
#words_indexes,indexes_words互为逆反的数据,将实体和关系混合在一起,标记出索引
#headTailSelector头尾实体选择器
#entity2id,id2entity互为逆反的数据,单独的实体<->id表
#relation2id,id2relation互为逆反的数据,单独的关系<->id表
train, valid, test, words_indexes, indexes_words, headTailSelector, entity2id, id2entity, relation2id, id2relation = build_data(path=args.data, name=args.name)
#训练数据集大小
data_size = len(train)
#训练数据集批次数据生成器
train_batch = Batch_Loader(train, words_indexes, indexes_words, headTailSelector, entity2id, id2entity, relation2id, id2relation, batch_size=args.batch_size, neg_ratio=args.neg_ratio)
#将批次训练数据的[实体id取出来,作为实体数组entity_array
entity_array = np.array(list(train_batch.indexes_ents.keys()))

#评估数据集的三元组作为x_valid,标记1作为标签y_valid,让这些成立的三元组打分都靠近1
x_valid = np.array(list(valid.keys())).astype(np.int32)
y_valid = np.array(list(valid.values())).astype(np.float32)

#测试数据集的三元组作为x_test,标记1作为标签y_test,让这些成立的三元组打分都靠近1
x_test = np.array(list(test.keys())).astype(np.int32)
y_test = np.array(list(test.values())).astype(np.float32)
2.初始化实体关系Embedding(CapsE.py)

initialization就是将预训练好的实体向量和关系向量整合成一个Embedding(CapsE模型中的实体向量和关系向量是存在一个表里的)

#此处是初始化一个实体和关系混合的Embedding矩阵,即initialization矩阵
initialization = []
print("Using initialization.")
#初始化为[len(words_indexes),embedding_dim]维度的全零矩阵,其中len(words_indexes)=len(enitity)+len(relation)
initialization = np.empty([len(words_indexes), args.embedding_dim]).astype(np.float32)
#将预训练好的实体向量和关系向量加载进来
initEnt, initRel = init_norm_Vector(args.data + args.name + '/relation2vec' + str(args.embedding_dim) + '.init',
                                        args.data + args.name + '/entity2vec' + str(args.embedding_dim) + '.init', args.embedding_dim)
#将实体向量和关系向量按照words_indexes的映射状态融合成一个Embedding矩阵,即initialization.
for _word in words_indexes:
    if _word in relation2id:
        index = relation2id[_word]
        _ind = words_indexes[_word]
        initialization[_ind] = initRel[index]
    elif _word in entity2id:
        index = entity2id[_word]
        _ind = words_indexes[_word]
        initialization[_ind] = initEnt[index]
    else:
        print('*****************Error********************!')
        break
#此处的initialization就是预训练的实体向量和预训练的关系向量的整合矩阵
initialization = np.array(initialization, dtype=np.float32)
预训练好的实体向量和关系向量
3.用tensorflow搭建训练模型(capsuleNet.py)
import tensorflow as tf
from capsuleLayer import CapsLayer
import math

epsilon = 1e-9

class CapsE(object):
    def __init__(self, sequence_length, embedding_size, num_filters, vocab_size, iter_routing, batch_size=256,
                 num_outputs_secondCaps=1, vec_len_secondCaps=10, initialization=[], filter_size=1, useConstantInit=False):
        # Placeholders for input, output
        # 申明模型的输入数据占位符,初始化模型的超参
        self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x")  #shape=(256,3)
        self.input_y = tf.placeholder(tf.float32, [batch_size, 1], name="input_y")  #shape=(256,1)
        self.filter_size = filter_size  #1
        self.num_filters = num_filters  #400
        self.sequence_length = sequence_length  #3
        self.embedding_size = embedding_size  #100
        self.iter_routing = iter_routing  #1
        self.num_outputs_secondCaps = num_outputs_secondCaps  #1
        self.vec_len_secondCaps = vec_len_secondCaps  #10
        self.batch_size = batch_size  #256
        self.useConstantInit = useConstantInit  #false

        # Embedding layer
        # Embedding层,既可以随机初始化,也可以加载预训练好的嵌入特征
        with tf.name_scope("embedding"):
            if initialization == []:
                self.W = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -math.sqrt(1.0 / embedding_size),
                                      math.sqrt(1.0 / embedding_size), seed=1234), name="W")
            else:
                self.W = tf.get_variable(name="W2", initializer=initialization)  #shape=(40954,100)
        #对self.input_x输入的三元组进行映射处理,将self.input_x扩充一个向量维度,生成(batch_size,3,dim)三维
        self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)  #shape=(256,3,100)
        #将三维的self.embedded_chars,扩充一维,变成四维(batch_size,3,dim,1),方便后续的Conv2D进行特征提取
        self.X = tf.expand_dims(self.embedded_chars, -1)  #shape=(256,3,100,1)

        #自定义胶囊层函数
        self.build_arch()
        #自定义模型损失函数
        self.loss()
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=500)

        tf.logging.info('Seting up the main structure')

    def build_arch(self):
        #The first capsule layer
        with tf.variable_scope('FirstCaps_layer'):
            self.firstCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps,
                                    with_routing=False, layer_type='CONV', embedding_size=self.embedding_size,
                                    batch_size=self.batch_size, iter_routing=self.iter_routing,
                                    useConstantInit=self.useConstantInit, filter_size=self.filter_size,
                                    num_filters=self.num_filters, sequence_length=self.sequence_length)

            self.caps1 = self.firstCaps(self.X, kernel_size=1, stride=1)  #shape=(256,100,400,1)
        #The second capsule layer
        with tf.variable_scope('SecondCaps_layer'):
            self.secondCaps = CapsLayer(num_outputs_secondCaps=self.num_outputs_secondCaps, vec_len_secondCaps=self.vec_len_secondCaps,
                                    with_routing=True, layer_type='FC',
                                    batch_size=self.batch_size, iter_routing=self.iter_routing,
                                    embedding_size=self.embedding_size, useConstantInit=self.useConstantInit, filter_size=self.filter_size,
                                    num_filters=self.num_filters, sequence_length=self.sequence_length)
            self.caps2 = self.secondCaps(self.caps1)  #shape=(256,1,10,1)

        self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon)  #shape=(256,1,1,1)

    def loss(self):
        self.scores = tf.reshape(self.v_length, [self.batch_size, 1])  #shape=(256,1)
        self.predictions = tf.nn.sigmoid(self.scores)
        print("Using square softplus loss")
        ##注:使用的损失函数不是常用的交叉熵损失函数
        losses = tf.square(tf.nn.softplus(self.scores * self.input_y))
        self.total_loss = tf.reduce_mean(losses)
4.分批次训练模型(CapsE.py)
with tf.Graph().as_default():
    #图的相关配置
    session_conf = tf.ConfigProto(allow_soft_placement=args.allow_soft_placement, log_device_placement=args.log_device_placement)
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=session_conf)##创建图session的一些配置
    with sess.as_default():
        global_step = tf.Variable(0, name="global_step", trainable=False)
        capse = CapsE(sequence_length=x_valid.shape[1],
                            initialization=initialization,
                            embedding_size=args.embedding_dim,
                            filter_size=args.filter_size,
                            num_filters=args.num_filters,
                            vocab_size=len(words_indexes),
                            iter_routing=args.iter_routing,
                            batch_size=2*args.batch_size,
                            num_outputs_secondCaps=args.num_outputs_secondCaps,
                            vec_len_secondCaps=args.vec_len_secondCaps,
                            useConstantInit=args.useConstantInit
                            )##搭建模型

        # Define Training procedure
        # 选择Adam作为模型训练优化器
        optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) 
        # 利用损失函数的情况,反向调参
        grads_and_vars = optimizer.compute_gradients(capse.total_loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
        # 模型保存路径
        out_dir = os.path.abspath(os.path.join(args.run_folder, "runs_CapsE", args.model_name))
        print("Writing to {}\n".format(out_dir))
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        # Initialize all variables 初始化所有参数
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                capse.input_x: x_batch,
                capse.input_y: y_batch
            }
            _, step, loss = sess.run([train_op, global_step, capse.total_loss], feed_dict)
            return loss

        #分批次训练
        num_batches_per_epoch = int((data_size - 1) / args.batch_size) + 1
        for epoch in range(args.num_epochs):
            for batch_num in range(num_batches_per_epoch):
                x_batch, y_batch = train_batch()
                loss = train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                #print(loss)
            if epoch > 0:
                if epoch % args.savedEpochs == 0:
                    path = capse.saver.save(sess, checkpoint_prefix, global_step=epoch)
                    print("Saved model checkpoint to {}\n".format(path))
5.训练过程
上一篇下一篇

猜你喜欢

热点阅读