4 examples of 'k means clustering python' in Python

Every line of 'k means clustering python' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
117def cluster_k_means(points, k, distance, num_iterations=10):
118 """ Clusters a group of points into k groups given a distance function
119 between two points.
120
121 Algorithm:
122 1. initialization: pick k points at random and set them as initial means
123 of the clusters.
124 2. for each point compute the distance to the current means and assign
125 it to the cluster who's mean is closest.
126 3. for each clusters, compute the mean of all points in the cluster
127 4. repeat 2 and 3 until convergence, ie. until global error falls under
128 a specified threshold or the number of iterations allowed is finished.
129
130 Args:
131 points: list of tuples, format (x, y) where x and y are the coordinates.
132 k: int, number of clusters to generate.
133 distance: function, computes distance between two points.
134 num_iterations: int, the number of iterations before stopping.
135
136 Returns:
137 A dicts with format {cluster_leader: [list of points in cluster]}.
138 """
139
140 # Initialization.
141 means = random.sample(points, k)
142 clusters = dict((mean, [mean]) for mean in means)
143
144 for __ in range(num_iterations):
145 new_means = []
146 for mean, cluster_points in clusters.iteritems():
147 new_mean_x = (sum(x for (x, __, __) in cluster_points))/len(cluster_points)
148 new_mean_y = (sum(y for (__, y, __) in cluster_points))/len(cluster_points)
149 new_means.append((new_mean_x, new_mean_y))
150
151 clusters = dict((mean, []) for mean in new_means)
152 means = new_means
153
154 for point in points:
155 min_dist = float('inf')
156 new_mean = None
157 for mean in means:
158 dist = distance(point, mean)
159 if min_dist > dist:
160 min_dist = dist
161 new_mean = mean
162 clusters[new_mean].append(point)
163
164 return clusters
19def kmeans_clusters(self, n_clusters, data):
20 k_means = cluster.KMeans(n_clusters=n_clusters)
21 k_means.fit(data)
22 return k_means.predict(data)
100def kmeans(X, cluster_num, numepochs, learningrate=0.01, batchsize=100, verbose=True):
101 '''
102 klp_kmeans based NUMPY, better for small scale problems
103 inherited from http://www.iro.umontreal.ca/~memisevr/code.html
104 '''
105
106 rng = np.random
107 W =rng.randn(cluster_num, X.shape[1])
108 X2 = (X**2).sum(1)[:, None]
109 for epoch in range(numepochs):
110 for i in range(0, X.shape[0], batchsize):
111 D = -2*np.dot(W, X[i:i+batchsize,:].T) + (W**2).sum(1)[:, None] + X2[i:i+batchsize].T
112 S = (D==D.min(0)[None,:]).astype("float").T
113 W += learningrate * (np.dot(S.T, X[i:i+batchsize,:]) - S.sum(0)[:, None] * W)
114 if verbose:
115 print "epoch", epoch, "of", numepochs, " cost: ", D.min(0).sum()
116 return W
54def kmeans(data_tuple, data_type, command_list):
55 """
56 run k-means
57 :param data_tuple: matrix[sent vectors], array[string sentences], array[string filename]
58 :param data_type: String -> fact/decision
59 :param command_list: command line argument
60 :return: None
61 """
62 try:
63 cluster_size = int(command_list[0])
64 KMeansWrapper(data_tuple, data_type, cluster_size=cluster_size).cluster()
65 return True
66 except ValueError:
67 Log.write("Commands must be numerics")
68 return False

Related snippets