import math
import random
import matplotlib.pyplot as plt
import matplotlib.collections as mcoll
import time
def generate_random_points(n):
points = []
for _ in range(n):
x = random.uniform(0, 100)
y = random.uniform(0, 100)
points.append((x, y))
return points
def calculate_mean_distance(points, centroids):
total_distance = 0
for x, y in points:
min_distance = float('inf')
for cx, cy in centroids:
distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2)
min_distance = min(min_distance, distance)
total_distance += min_distance
return total_distance / len(points)
'''
This method starts with k centroids randomly chosen from the given coordinates.
It then
'''
def k_means(points, k):
centroids = random.sample(points, k)
iterations = 0
while True:
iterations += 1
clusters = [[] for _ in range(k)]
for x, y in points:
min_distance = float('inf')
closest_centroid = None
for i, (cx, cy) in enumerate(centroids):
distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2)
if distance < min_distance:
min_distance = distance
closest_centroid = i
clusters[closest_centroid].append((x, y))
new_centroids = []
for cluster in clusters:
x_sum = sum(x for x, y in cluster)
y_sum = sum(y for x, y in cluster)
new_centroids.append((x_sum / len(cluster), y_sum / len(cluster)))
if new_centroids == centroids:
break
centroids = new_centroids
plot_iteration(points, centroids, clusters, iterations)
time.sleep(1) # Pause for 1 second
return centroids, clusters
def plot_iteration(points, centroids, clusters, iteration):
plt.clf() # Clear the previous plot
colors = ['b', 'g', 'r', 'c', 'm'] # Colors for clusters
# Plot the random points
x_coords, y_coords = zip(*points)
plt.scatter(x_coords, y_coords, c='k', marker='o', s=10, alpha=0.5, label='Random Points')
# Plot the centroids
centroid_x, centroid_y = zip(*centroids)
plt.scatter(centroid_x, centroid_y, c='r', marker='*', s=100, label='Centroids')
# Plot the line segments and clusters
for i, cluster in enumerate(clusters):
x_coords, y_coords = zip(*cluster)
plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5)
line_segments = []
for x, y in cluster:
line_segments.append([(x, y), (centroids[i][0], centroids[i][1])])
line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5)
plt.gca().add_collection(line_collection)
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.title(f'Iteration {iteration}')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.legend()
plt.pause(0.01) # Pause for a brief moment to update the plot
# Example usage
n = 100 # Number of random points
k = 5 # Number of centroids to find
points = generate_random_points(n)
centroids, clusters = k_means(points, k)
mean_distance = calculate_mean_distance(points, centroids)
print(f"Mean distance of {k} centroids from {n} points: {mean_distance:.2f}")
# Plot the final points, centroids, and line segments
plt.figure(figsize=(8, 6))
colors = ['b', 'g', 'r', 'c', 'm'] # Colors for clusters
for i, cluster in enumerate(clusters):
x_coords, y_coords = zip(*cluster)
plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5)
centroid_x, centroid_y = centroids[i]
plt.scatter(centroid_x, centroid_y, c='k', marker='*', s=100)
line_segments = []
for x, y in cluster:
line_segments.append([(x, y), (centroid_x, centroid_y)])
line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5)
plt.gca().add_collection(line_collection)
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.title('Random Points, Centroids, and Line Segments')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.legend()
plt.show()