Alternate approach for Calculating Closest Cluster

K - mean

clean code without much if else

import math

import numpy as np

# Add the function, `assign_to_cluster`
# This creates the column, `cluster`, by applying assign_to_cluster row-by-row
# Uncomment when ready


def assign_to_cluster(row):
    
    id_distance = {}
    
    for centroid_i,features in centroids_dict.items():
        
        sub_distance=(np.array(features)) - (np.array([row["ppg"],row["atr"]]))
        sub_distance=sub_distance**2
        root_distance = sub_distance.sum()
        euclid_distance = math.sqrt(root_distance)
        id_distance[centroid_i] = euclid_distance
        

    #below will return key / cluster id corresponding to lowest euclid_distance
    return min(id_distance, key=id_distance.get)
    
    


point_guards['cluster'] = point_guards.apply(lambda row: assign_to_cluster(row), axis=1)

2 Likes