tensorflow 实现kmeans
2018-01-12 本文已影响0人
王小鸟_wpcool
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
num_points = 2000
conjunto_points = []
for i in range(num_points):
if np.random.random() < 0.5:
conjunto_points.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
else:
conjunto_points.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])
df = pd.DataFrame({"x1": [v[0] for v in conjunto_points], "y1": [v[1] for v in conjunto_points]})
sns.lmplot("x1", "y1", data=df, fit_reg=False)
plt.show()
sess = tf.Session()
np.set_printoptions(threshold=5)
vectors = tf.constant(conjunto_points)
k = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [k, -1]))
print ("centroides = ", tf.slice(tf.random_shuffle(vectors), [0, 0], [k, 2]).eval(session=sess))
expended_vector = tf.expand_dims(vectors, 0)
expended_centroides = tf.expand_dims(centroides, 1)
reduce_sum = tf.reduce_sum(tf.square(tf.subtract(expended_vector, expended_centroides)), 2)
assignments = tf.argmin(reduce_sum, 0)
means = tf.concat([tf.reduce_mean(tf.gather(vectors, tf.reshape(tf.where(tf.equal(assignments, c)), [1, -1])), reduction_indices=[1]) for c in range(k)], 0)
update_centroides = tf.assign(centroides, means)
init = tf.global_variables_initializer()
sess.run(init)
for i in range(100):
pcentroides = sess.run(update_centroides)
print (sess.run(centroides))
assignment_values = sess.run( assignments )
data = { "x":[], "y":[], "cluster":[] }
for i in range( len(assignment_values) ):
data[ 'x' ].append( conjunto_points[i][0] )
data[ 'y' ].append( conjunto_points[i][1] )
data[ 'cluster' ].append( assignment_values[i] )
df = pd.DataFrame( data )
sns.lmplot( "x", "y", data=df, fit_reg = False, size=6, hue='cluster', legend = False )
plt.show()

