Given a city with coordinates of n houses, find the most optimal location for k hospitals so that the mean distance required to be traveled by the residents of the city is minimum.
Input
n (0 < n <= 100)
(x, y) coordinates of n houses
k (0 < n <= 5)
Output
k coordinates representing the locations of the hospitals.
Solution
This problem can be solved by using the k-means clustering algorithm which involves finding clusters in a scatter plot based on the condition that the mean distance of the points in a cluster from the cluster centroid is minimum
Python Code
import math
import random
import matplotlib.pyplot as plt
import matplotlib.collections as mcoll
import time
def generate_random_points(n):
points = []
for _ in range(n):
x = random.uniform(0, 100)
y = random.uniform(0, 100)
points.append((x, y))
return points
def calculate_mean_distance(points, centroids):
total_distance = 0
for x, y in points:
min_distance = float('inf')
for cx, cy in centroids:
distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2)
min_distance = min(min_distance, distance)
total_distance += min_distance
return total_distance / len(points)
'''
This method starts with k centroids randomly chosen from the given coordinates.
It then
'''
def k_means(points, k):
centroids = random.sample(points, k)
iterations = 0
while True:
iterations += 1
clusters = [[] for _ in range(k)]
for x, y in points:
min_distance = float('inf')
closest_centroid = None
for i, (cx, cy) in enumerate(centroids):
distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2)
if distance < min_distance:
min_distance = distance
closest_centroid = i
clusters[closest_centroid].append((x, y))
new_centroids = []
for cluster in clusters:
x_sum = sum(x for x, y in cluster)
y_sum = sum(y for x, y in cluster)
new_centroids.append((x_sum / len(cluster), y_sum / len(cluster)))
if new_centroids == centroids:
break
centroids = new_centroids
plot_iteration(points, centroids, clusters, iterations)
time.sleep(1) # Pause for 1 second
return centroids, clusters
def plot_iteration(points, centroids, clusters, iteration):
plt.clf() # Clear the previous plot
colors = ['b', 'g', 'r', 'c', 'm'] # Colors for clusters
# Plot the random points
x_coords, y_coords = zip(*points)
plt.scatter(x_coords, y_coords, c='k', marker='o', s=10, alpha=0.5, label='Random Points')
# Plot the centroids
centroid_x, centroid_y = zip(*centroids)
plt.scatter(centroid_x, centroid_y, c='r', marker='*', s=100, label='Centroids')
# Plot the line segments and clusters
for i, cluster in enumerate(clusters):
x_coords, y_coords = zip(*cluster)
plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5)
line_segments = []
for x, y in cluster:
line_segments.append([(x, y), (centroids[i][0], centroids[i][1])])
line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5)
plt.gca().add_collection(line_collection)
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.title(f'Iteration {iteration}')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.legend()
plt.pause(0.01) # Pause for a brief moment to update the plot
# Example usage
n = 100 # Number of random points
k = 5 # Number of centroids to find
points = generate_random_points(n)
centroids, clusters = k_means(points, k)
mean_distance = calculate_mean_distance(points, centroids)
print(f"Mean distance of {k} centroids from {n} points: {mean_distance:.2f}")
# Plot the final points, centroids, and line segments
plt.figure(figsize=(8, 6))
colors = ['b', 'g', 'r', 'c', 'm'] # Colors for clusters
for i, cluster in enumerate(clusters):
x_coords, y_coords = zip(*cluster)
plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5)
centroid_x, centroid_y = centroids[i]
plt.scatter(centroid_x, centroid_y, c='k', marker='*', s=100)
line_segments = []
for x, y in cluster:
line_segments.append([(x, y), (centroid_x, centroid_y)])
line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5)
plt.gca().add_collection(line_collection)
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.title('Random Points, Centroids, and Line Segments')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.legend()
plt.show()
The above code requires matplotlib library to be installed.
Scatter Plot
The circles represent the coordinates of the houses, stars represent the cluster centroids (or hospitals) and the line segments represent the nearest centroid.