Problem
Given a city with coordinates of n houses, find the most optimal location for k hospitals so that the mean distance required to be traveled by the residents of the city is minimum.
Input
n (0 < n <= 100)
(x, y) coordinates of n houses
k (0 < n <= 5)
Output
k coordinates representing the locations of the hospitals.
Solution
This problem can be solved by using the k-means clustering algorithm which involves finding clusters in a scatter plot based on the condition that the mean distance of the points in a cluster from the cluster centroid is minimum
Python Code
import math import random import matplotlib.pyplot as plt import matplotlib.collections as mcoll import time def generate_random_points(n): points = [] for _ in range(n): x = random.uniform(0, 100) y = random.uniform(0, 100) points.append((x, y)) return points def calculate_mean_distance(points, centroids): total_distance = 0 for x, y in points: min_distance = float('inf') for cx, cy in centroids: distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2) min_distance = min(min_distance, distance) total_distance += min_distance return total_distance / len(points) ''' This method starts with k centroids randomly chosen from the given coordinates. It then ''' def k_means(points, k): centroids = random.sample(points, k) iterations = 0 while True: iterations += 1 clusters = [[] for _ in range(k)] for x, y in points: min_distance = float('inf') closest_centroid = None for i, (cx, cy) in enumerate(centroids): distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2) if distance < min_distance: min_distance = distance closest_centroid = i clusters[closest_centroid].append((x, y)) new_centroids = [] for cluster in clusters: x_sum = sum(x for x, y in cluster) y_sum = sum(y for x, y in cluster) new_centroids.append((x_sum / len(cluster), y_sum / len(cluster))) if new_centroids == centroids: break centroids = new_centroids plot_iteration(points, centroids, clusters, iterations) time.sleep(1) # Pause for 1 second return centroids, clusters def plot_iteration(points, centroids, clusters, iteration): plt.clf() # Clear the previous plot colors = ['b', 'g', 'r', 'c', 'm'] # Colors for clusters # Plot the random points x_coords, y_coords = zip(*points) plt.scatter(x_coords, y_coords, c='k', marker='o', s=10, alpha=0.5, label='Random Points') # Plot the centroids centroid_x, centroid_y = zip(*centroids) plt.scatter(centroid_x, centroid_y, c='r', marker='*', s=100, label='Centroids') # Plot the line segments and clusters for i, cluster in enumerate(clusters): x_coords, y_coords = zip(*cluster) plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5) line_segments = [] for x, y in cluster: line_segments.append([(x, y), (centroids[i][0], centroids[i][1])]) line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5) plt.gca().add_collection(line_collection) plt.xlim(0, 100) plt.ylim(0, 100) plt.title(f'Iteration {iteration}') plt.xlabel('X') plt.ylabel('Y') plt.grid(True) plt.legend() plt.pause(0.01) # Pause for a brief moment to update the plot # Example usage n = 100 # Number of random points k = 5 # Number of centroids to find points = generate_random_points(n) centroids, clusters = k_means(points, k) mean_distance = calculate_mean_distance(points, centroids) print(f"Mean distance of {k} centroids from {n} points: {mean_distance:.2f}") # Plot the final points, centroids, and line segments plt.figure(figsize=(8, 6)) colors = ['b', 'g', 'r', 'c', 'm'] # Colors for clusters for i, cluster in enumerate(clusters): x_coords, y_coords = zip(*cluster) plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5) centroid_x, centroid_y = centroids[i] plt.scatter(centroid_x, centroid_y, c='k', marker='*', s=100) line_segments = [] for x, y in cluster: line_segments.append([(x, y), (centroid_x, centroid_y)]) line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5) plt.gca().add_collection(line_collection) plt.xlim(0, 100) plt.ylim(0, 100) plt.title('Random Points, Centroids, and Line Segments') plt.xlabel('X') plt.ylabel('Y') plt.grid(True) plt.legend() plt.show()
The above code requires matplotlib library to be installed.
Scatter Plot
The circles represent the coordinates of the houses, stars represent the cluster centroids (or hospitals) and the line segments represent the nearest centroid.
No comments:
Post a Comment