Every line of 'k means clustering python' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
117 def cluster_k_means(points, k, distance, num_iterations=10): 118 """ Clusters a group of points into k groups given a distance function 119 between two points. 120 121 Algorithm: 122 1. initialization: pick k points at random and set them as initial means 123 of the clusters. 124 2. for each point compute the distance to the current means and assign 125 it to the cluster who's mean is closest. 126 3. for each clusters, compute the mean of all points in the cluster 127 4. repeat 2 and 3 until convergence, ie. until global error falls under 128 a specified threshold or the number of iterations allowed is finished. 129 130 Args: 131 points: list of tuples, format (x, y) where x and y are the coordinates. 132 k: int, number of clusters to generate. 133 distance: function, computes distance between two points. 134 num_iterations: int, the number of iterations before stopping. 135 136 Returns: 137 A dicts with format {cluster_leader: [list of points in cluster]}. 138 """ 139 140 # Initialization. 141 means = random.sample(points, k) 142 clusters = dict((mean, [mean]) for mean in means) 143 144 for __ in range(num_iterations): 145 new_means = [] 146 for mean, cluster_points in clusters.iteritems(): 147 new_mean_x = (sum(x for (x, __, __) in cluster_points))/len(cluster_points) 148 new_mean_y = (sum(y for (__, y, __) in cluster_points))/len(cluster_points) 149 new_means.append((new_mean_x, new_mean_y)) 150 151 clusters = dict((mean, []) for mean in new_means) 152 means = new_means 153 154 for point in points: 155 min_dist = float('inf') 156 new_mean = None 157 for mean in means: 158 dist = distance(point, mean) 159 if min_dist > dist: 160 min_dist = dist 161 new_mean = mean 162 clusters[new_mean].append(point) 163 164 return clusters
19 def kmeans_clusters(self, n_clusters, data): 20 k_means = cluster.KMeans(n_clusters=n_clusters) 21 k_means.fit(data) 22 return k_means.predict(data)
100 def kmeans(X, cluster_num, numepochs, learningrate=0.01, batchsize=100, verbose=True): 101 ''' 102 klp_kmeans based NUMPY, better for small scale problems 103 inherited from http://www.iro.umontreal.ca/~memisevr/code.html 104 ''' 105 106 rng = np.random 107 W =rng.randn(cluster_num, X.shape[1]) 108 X2 = (X**2).sum(1)[:, None] 109 for epoch in range(numepochs): 110 for i in range(0, X.shape[0], batchsize): 111 D = -2*np.dot(W, X[i:i+batchsize,:].T) + (W**2).sum(1)[:, None] + X2[i:i+batchsize].T 112 S = (D==D.min(0)[None,:]).astype("float").T 113 W += learningrate * (np.dot(S.T, X[i:i+batchsize,:]) - S.sum(0)[:, None] * W) 114 if verbose: 115 print "epoch", epoch, "of", numepochs, " cost: ", D.min(0).sum() 116 return W
54 def kmeans(data_tuple, data_type, command_list): 55 """ 56 run k-means 57 :param data_tuple: matrix[sent vectors], array[string sentences], array[string filename] 58 :param data_type: String -> fact/decision 59 :param command_list: command line argument 60 :return: None 61 """ 62 try: 63 cluster_size = int(command_list[0]) 64 KMeansWrapper(data_tuple, data_type, cluster_size=cluster_size).cluster() 65 return True 66 except ValueError: 67 Log.write("Commands must be numerics") 68 return False