Professional Documents
Culture Documents
AP19110010030 R Lab-Assignment-5
AP19110010030 R Lab-Assignment-5
AP19110010030 R Lab-Assignment-5
Kilaru Sravan
AP19110010030
CSE-A
Implementing K-Means
In [1]:
library("ggplot2")
library("matrixStats")
mydata = read.csv("BMI.csv", header = TRUE)
head(mydata, 8)
Warning message:
In [2]:
Height Weight
174 96
189 87
185 110
195 104
149 61
189 104
147 92
154 111
localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 1/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook
In [3]:
In [4]:
k = 4
k_cent_points=df[sample(nrow(df), k), ]
k_cent_points
Height Weight
94 177 61
265 147 84
280 188 57
localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 2/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook
In [6]:
repeat
{
x = df$cluster
df$dist_1 = ((k_cent_points[1, "Height"]-df$Height)^2 + (k_cent_points[1, "Weight"]-df$
df$dist_2 = ((k_cent_points[2, "Height"]-df$Height)^2 + (k_cent_points[2, "Weight"]-df$
df$dist_3 = ((k_cent_points[3, "Height"]-df$Height)^2 + (k_cent_points[3, "Weight"]-df$
df$dist_4 = ((k_cent_points[4, "Height"]-df$Height)^2 + (k_cent_points[4, "Weight"]-df$
df$min_dist=rowMins(as.matrix(df[,c(3, 4, 5, 6)]))
for (i in 1:500)
{
if (df[i,"min_dist"]==df[i,"dist_1"]){
df$cluster[i]=1
}
if (df[i,"min_dist"]==df[i,"dist_2"]){
df$cluster[i]=2
}
if (df[i,"min_dist"]==df[i,"dist_3"]){
df$cluster[i]=3
}
if (df[i,"min_dist"]==df[i,"dist_4"]){
df$cluster[i]=4
}
}
y = df$cluster
f = 0
for (i in 1:500){
if (x[i]==y[i]){
f = f + 1
}
}
if(f == 500)
{
break
}
sum_Height_cluster1 = 0
sum_Height_cluster2 = 0
sum_Height_cluster3 = 0
sum_Height_cluster4 = 0
sum_Weight_cluster1 = 0
sum_Weight_cluster2 = 0
sum_Weight_cluster3 = 0
sum_Weight_cluster4 = 0
for (i in 1:500)
{
if (df[i,"cluster"]==1){
sum_Height_cluster1 = sum_Height_cluster1 + df[i,"Height"]
sum_Weight_cluster1 = sum_Weight_cluster1 + df[i,"Weight"]
}
if (df[i,"cluster"]==2){
sum_Height_cluster2 = sum_Height_cluster2 + df[i,"Height"]
sum_Weight_cluster2 = sum_Weight_cluster2 + df[i,"Weight"]
}
if (df[i,"cluster"]==3){
sum_Height_cluster3 = sum_Height_cluster3 + df[i,"Height"]
sum_Weight_cluster3 = sum_Weight_cluster3 + df[i,"Weight"]
}
if (df[i,"cluster"]==4){
sum_Height_cluster4 = sum_Height_cluster4 + df[i,"Height"]
localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 3/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook
localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 4/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook
In [8]:
print(k_cent_points)
f
plot1 = ggplot(df, aes(x = Height, y = Weight, col=cluster))+geom_point()
plot1
Height Weight
94 177 61
265 147 84
280 188 57
500
In [9]:
500
In [ ]:
localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 5/6