图(graph)神经网络--LGCN大图卷积网络(tensorf
LGCN(Large-Scale Learnable Graph Convolutional Networks )
GitHub项目(LGCN[tensorflow版])
该项目做的任务仍是图中节点分类问题,语料仍是Cora
图总共的节点数量是2708个
为了适应大规模图的情况,LGCN提出了一种子图训练策略,将采样的子图放入一个小批处理中。
Large-Scale Learnable Graph Convolutional Networks
将图结构数据变换到网格状数据中,使用传统的一维卷积进行卷积。变换的方式是:针对每个特征的大小,对邻居结点进行排序,取这个特征前k大的数作为它邻居这列特征的k个值。如果邻居不够,那就用0来补。这样就能得到该顶点的邻居信息,组成一个矩阵,然后使用一维卷积。
1.下载代码,本地调试 将代码正确部署到本地 2.模型超参配置 模型超参配置 3.模型初始化----数据加载self.process_data()
class GraphNet(object):
def __init__(self, sess, conf):
self.sess = sess
self.conf = conf
if not os.path.exists(conf.modeldir):
os.makedirs(conf.modeldir)
if not os.path.exists(conf.logdir):
os.makedirs(conf.logdir)
self.process_data() ###数据加载
self.configure_networks() ###配置网络
self.train_summary = self.config_summary('train')
self.valid_summary = self.config_summary('valid')
self.test_summary = self.config_summary('test')
4.数据加载process_data()
def process_data(self):
data = load_data('cora')
adj, feas = data[:2]
self.adj = adj.todense()
self.normed_adj = preprocess_adj(adj)
self.feas = preprocess_features(feas, False)
self.y_train, self.y_val, self.y_test = data[2:5]
self.train_mask, self.val_mask, self.test_mask = data[5:]
数据类型
self.adj === mtarix === [2078,2078] (图顶点数*图顶点数,整个大图的邻接矩阵)
self.normed_adj === matrix === [2708,2708] (归一化的大图邻接矩阵)
self.feas === matrix === [2078,1433] (所有顶点的特征向量组成的特征矩阵)
self.y_xxxx === ndarray === [2078,7] (顶点的类别标签one-hot向量)
self.y_xxxx标签向量one-hot
5.模型初始化----配置网络self.configure_networks()
def configure_networks(self):
self.build_network()
self.cal_loss()
optimizer = self.get_optimizer(self.conf.learning_rate)
self.train_op = optimizer.minimize(self.loss_op, name='train_op')
self.seed = int(time.time())
tf.set_random_seed(self.seed)
self.sess.run(tf.global_variables_initializer())
trainable_vars = tf.trainable_variables()
self.saver = tf.train.Saver(var_list=trainable_vars, max_to_keep=0)
if self.conf.is_train:
self.writer = tf.summary.FileWriter(self.conf.logdir, self.sess.graph)
self.print_params_num()
def build_network(self):
self.labels_mask = tf.placeholder(tf.int32, None, name='labels_mask') ###需要遮掩的标签
self.matrix = tf.placeholder(tf.int32, [None, None], name='matrix') ###大图邻接矩阵
self.normed_matrix = tf.placeholder(tf.float32, [None, None], name='normed_matrix') ###大图归一化处理的邻接矩阵
self.inputs = tf.placeholder(tf.float32, [None, self.feas.shape[1]], name='inputs') ###输入数据的特征
self.labels = tf.placeholder(tf.int32, [None, self.conf.class_num], name='labels') ###输入数据的标签正确答案
self.is_train = tf.placeholder(tf.bool, name='is_train')
self.preds = self.inference(self.inputs) ###将inputs特征输入模型进行推断,输出self.preds预测标签
def inference(self, outs):
###第一次卷积(一层卷积,simple_conv)
outs = getattr(ops, self.conf.first_conv)(
self.normed_matrix, outs, 4*self.conf.ch_num, self.conf.adj_keep_r,
self.conf.keep_r, self.is_train, 'conv_s', act_fn=None)
###第二次卷积(两层卷积,graph_conv)
for layer_index in range(self.conf.layer_num):
cur_outs= getattr(ops, self.conf.second_conv)(
self.normed_matrix, outs, self.conf.ch_num, self.conf.adj_keep_r,
self.conf.keep_r, self.is_train, 'conv_%s' % (layer_index+1),
act_fn=None, k=self.conf.k)
outs = tf.concat([outs, cur_outs], axis=1, name='concat_%s' % layer_index)
####第三次卷积(一层卷积,simple_conv)
outs = ops.simple_conv(
self.normed_matrix, outs, self.conf.class_num, self.conf.adj_keep_r,
self.conf.keep_r, self.is_train, 'conv_f', act_fn=None, norm=False)
return outs
6.simple_conv()
adj_m.shape==(?,?) outs.shape==(?,1433) num_out==32
adj_keep_r==0.999 keep_r==0.16
def simple_conv(adj_m, outs, num_out, adj_keep_r, keep_r, is_train, scope,
act_fn=tf.nn.elu, norm=True, **kw):
adj_m = dropout(adj_m, adj_keep_r, is_train, scope+'/drop1') ###将邻接矩阵按照概率为0.999进行神经元丢弃
outs = dropout(outs, keep_r, is_train, scope+'/drop2') ###输入的特征矩阵也进行神经元丢弃
outs = fully_connected(outs, num_out, scope+'/fully', None) ###全连接
outs = tf.matmul(adj_m, outs, name=scope+'/matmul') ###矩阵相乘
#if norm:
# outs = batch_norm(outs, is_train, scope=scope+'/norm', act_fn=None)
outs = outs if not act_fn else act_fn(outs, scope+'/act') ###激活函数处理
return outs
7.graph_conv()
adj_m.shape==(?,?) outs.shape==(?,32) num_out==8
adj_keep_r==0.999 keep_r==0.16 k=8(程序开始之前,人为设置的值为8)
def graph_conv(adj_m, outs, num_out, adj_keep_r, keep_r, is_train, scope, k=5,
act_fn=tf.nn.relu6, **kw):
num_in = outs.shape[-1].value
adj_m = dropout(adj_m, adj_keep_r, is_train, scope+'/drop1')
outs = top_k_features(adj_m, outs, k, scope+'/top_k') ###提取top8的特征
outs = dropout(outs, keep_r, is_train, scope+'/drop1')
outs = conv1d(outs, (num_in+num_out)//2, (k+1)//2+1, scope+'/conv1', None, True)
outs = act_fn(outs, scope+'act1') if act_fn else outs
outs = dropout(outs, keep_r, is_train, scope+'/drop2')
outs = conv1d(outs, num_out, k//2+1, scope+'/conv2', None)
outs = tf.squeeze(outs, axis=[1], name=scope+'/squeeze')
return batch_norm(outs, True, scope+'/norm2', act_fn)
def top_k_features(adj_m, fea_m, k, scope):
###adj_m扩充一个维度,由原来的(?,?)变为了(?,1,?)
adj_m = tf.expand_dims(adj_m, axis=1, name=scope+'/expand1')
###fea_m扩充一个维度,由原来的(?,32)变为了(?,32,1)
fea_m = tf.expand_dims(fea_m, axis=-1, name=scope+'/expand2')
###feas.shape==(?,32,?)
feas = tf.multiply(adj_m, fea_m, name=scope+'/mul')
###feas.shape==(?,32,?)
feas = tf.transpose(feas, perm=[2, 1, 0], name=scope+'/trans1')
###top_k.shape==(?,32,8)
top_k = tf.nn.top_k(feas, k=k, name=scope+'/top_k').values
#pre, post = tf.split(top_k, 2, axis=2, name=scope+'/split')
###top_k.shape==(?,32,9)
top_k = tf.concat([fea_m, top_k], axis=2, name=scope+'/concat')
###top_k.shape==(?,9,32)
top_k = tf.transpose(top_k, perm=[0, 2, 1], name=scope+'/trans2')
return top_k
7.模型损失函数的定义
def cal_loss(self):
with tf.variable_scope('loss'):
self.class_loss = ops.masked_softmax_cross_entropy(
self.preds, self.labels, self.labels_mask)
self.regu_loss = 0
for var in tf.trainable_variables():
self.regu_loss += self.conf.weight_decay * tf.nn.l2_loss(var)
self.loss_op = self.class_loss + self.regu_loss ###分类预测损失+参数正则化损失
self.accuracy_op = ops.masked_accuracy(self.preds, self.labels, self.labels_mask)
8.训练过程,输入数据
可见,在给模型输入数据的时候,根据中心点的个数和训练批次的大小,大图就被划分为小图处理了
def pack_trans_dict(self, action):
feed_dict = {
self.matrix: self.adj, self.normed_matrix: self.normed_adj,
self.inputs: self.feas}
if action == 'train':
feed_dict.update({
self.labels: self.y_train, self.labels_mask: self.train_mask,
self.is_train: True})
if self.conf.use_batch:
###batch_size=2500 indices=644
indices = get_indice_graph(
self.adj, self.train_mask, self.conf.batch_size, 1.0)
new_adj = self.adj[indices,:][:,indices] ###将大图邻接矩阵进行了缩小化处理
new_normed_adj = self.normed_adj[indices,:][:,indices] ###将归一化的大图邻接矩阵进行了缩小化处理
###训练模型时,真正的输入数据
feed_dict.update({
self.labels: self.y_train[indices],
self.labels_mask: self.train_mask[indices],
self.matrix: new_adj, self.normed_matrix: new_normed_adj,
self.inputs: self.feas[indices]})
网络层参数的形状
indices size:--------------> 644
indices size:--------------> 1484
indices size:--------------> 2190
9.大图化小的操作
此处是通过广度优先,扩展节点数量
def get_indice_graph(adj, mask, size, keep_r=1.0):
indices = mask.nonzero()[0] ###获取非0标记的有效数据索引,是train则有140个非零标记索引
if keep_r < 1.0:
indices = np.random.choice(indices, int(indices.size*keep_r), False)
pre_indices = set()
indices = set(indices) ###将数据索引有数组形式,转换为set集合形式
while len(indices) < size: ###此处的size是batch_size批次大小:2500
new_add = indices - pre_indices
if not new_add:
break
pre_indices = indices
candidates = get_candidates(adj, new_add) - indices ###获取候选集索引,并排除以前的索引,是train则找到504个新的索引
if len(candidates) > size - len(indices): ###确保不能超过batch_size=2500个索引
candidates = set(np.random.choice(list(candidates), size-len(indices), False))
indices.update(candidates) ###跟新索引,是train则索引总共是140+504=644个索引
print('indices size:-------------->', len(indices))
return sorted(indices)
def get_candidates(adj, new_add):
return set(adj[sorted(new_add)].sum(axis=0).nonzero()[1])
给定一个图,我们先采样出一些初始顶点。从它们开始,我们使用广度优先搜索算法,迭代地将邻接顶点扩充到子图内。经过一定次数的迭代后,初始顶点的高阶邻居顶点就会被加进去。注意,我们在算法中使用一个简单的参数Nm。实际上在每个迭代中,我们将Nm设置为了不同的值。
大图化小的算法,与以上get_indice_graph()代码相对应
随机划分子图的例子
随机的切分子图,可以在大尺度的图上训练深层模型。此外,充分利用mini-batch训练方法可以加速学习过程。在每轮训练中,可以使用子图训练方法采样多个子图,然后把它们放到batch中。对应的特征向量和邻接矩阵组成了网络的输入。