Problem
Given a city with coordinates of n houses, find the most optimal location for k hospitals so that the mean distance required to be traveled by the residents of the city is minimum.
Input
n (0 < n <= 100)
(x, y) coordinates of n houses
k (0 < n <= 5) 
Output
k coordinates representing the locations of the hospitals.
Solution
This problem can be solved by using the k-means clustering algorithm which involves finding clusters in a scatter plot based on the condition that the mean distance of the points in a cluster from the cluster centroid is minimum
Python Code 
  
import math
import random
import matplotlib.pyplot as plt
import matplotlib.collections as mcoll
import time
def generate_random_points(n):
    points = []
    for _ in range(n):
        x = random.uniform(0, 100)
        y = random.uniform(0, 100)
        points.append((x, y))
    return points
def calculate_mean_distance(points, centroids):
    total_distance = 0
    for x, y in points:
        min_distance = float('inf')
        for cx, cy in centroids:
            distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2)
            min_distance = min(min_distance, distance)
        total_distance += min_distance
    return total_distance / len(points)
'''
This method starts with k centroids randomly chosen from the given coordinates.
It then 
'''
def k_means(points, k):
    centroids = random.sample(points, k)
    iterations = 0
    while True:
        iterations += 1
        clusters = [[] for _ in range(k)]
        for x, y in points:
            min_distance = float('inf')
            closest_centroid = None
            for i, (cx, cy) in enumerate(centroids):
                distance = math.sqrt((x - cx) ** 2 + (y - cy) ** 2)
                if distance < min_distance:
                    min_distance = distance
                    closest_centroid = i
            clusters[closest_centroid].append((x, y))
        new_centroids = []
        for cluster in clusters:
            x_sum = sum(x for x, y in cluster)
            y_sum = sum(y for x, y in cluster)
            new_centroids.append((x_sum / len(cluster), y_sum / len(cluster)))
        if new_centroids == centroids:
            break
        centroids = new_centroids
        plot_iteration(points, centroids, clusters, iterations)
        time.sleep(1)  # Pause for 1 second
    return centroids, clusters
def plot_iteration(points, centroids, clusters, iteration):
    plt.clf()  # Clear the previous plot
    colors = ['b', 'g', 'r', 'c', 'm']  # Colors for clusters
    # Plot the random points
    x_coords, y_coords = zip(*points)
    plt.scatter(x_coords, y_coords, c='k', marker='o', s=10, alpha=0.5, label='Random Points')
    # Plot the centroids
    centroid_x, centroid_y = zip(*centroids)
    plt.scatter(centroid_x, centroid_y, c='r', marker='*', s=100, label='Centroids')
    # Plot the line segments and clusters
    for i, cluster in enumerate(clusters):
        x_coords, y_coords = zip(*cluster)
        plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5)
        line_segments = []
        for x, y in cluster:
            line_segments.append([(x, y), (centroids[i][0], centroids[i][1])])
        line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5)
        plt.gca().add_collection(line_collection)
    plt.xlim(0, 100)
    plt.ylim(0, 100)
    plt.title(f'Iteration {iteration}')
    plt.xlabel('X')
    plt.ylabel('Y')
    plt.grid(True)
    plt.legend()
    plt.pause(0.01)  # Pause for a brief moment to update the plot
# Example usage
n = 100  # Number of random points
k = 5    # Number of centroids to find
points = generate_random_points(n)
centroids, clusters = k_means(points, k)
mean_distance = calculate_mean_distance(points, centroids)
print(f"Mean distance of {k} centroids from {n} points: {mean_distance:.2f}")
# Plot the final points, centroids, and line segments
plt.figure(figsize=(8, 6))
colors = ['b', 'g', 'r', 'c', 'm']  # Colors for clusters
for i, cluster in enumerate(clusters):
    x_coords, y_coords = zip(*cluster)
    plt.scatter(x_coords, y_coords, c=colors[i], marker='o', label=f'Cluster {i+1}', alpha=0.5)
    centroid_x, centroid_y = centroids[i]
    plt.scatter(centroid_x, centroid_y, c='k', marker='*', s=100)
    line_segments = []
    for x, y in cluster:
        line_segments.append([(x, y), (centroid_x, centroid_y)])
    line_collection = mcoll.LineCollection(line_segments, colors=colors[i], linewidths=0.5, alpha=0.5)
    plt.gca().add_collection(line_collection)
plt.xlim(0, 100)
plt.ylim(0, 100)
plt.title('Random Points, Centroids, and Line Segments')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True)
plt.legend()
plt.show()
  The above code requires matplotlib library to be installed. 
Scatter Plot
The circles represent the coordinates of the houses, stars represent the cluster centroids (or hospitals) and the line segments represent the nearest centroid.

No comments:
Post a Comment