AP19110010030 R Lab-Assignment-5

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook

Kilaru Sravan

AP19110010030

CSE-A

Implementing K-Means
In [1]:

library("ggplot2")
library("matrixStats")
mydata = read.csv("BMI.csv", header = TRUE)
head(mydata, 8)

Warning message:

"package 'matrixStats' was built under R version 3.6.3"

Gender Height Weight Category Index

Male 174 96 Over weight 2

Male 189 87 Weak 0

Female 185 110 Over weight 2

Female 195 104 Normal 1

Male 149 61 Normal 1

Male 189 104 Normal 1

Male 147 92 Obesity 3

Male 154 111 Obesity 3

In [2]:

df = subset(mydata, select = c(Height, Weight))


head(df, 8)

Height Weight

174 96

189 87

185 110

195 104

149 61

189 104

147 92

154 111

localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 1/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook

In [3]:

plot1 = ggplot(df, aes(x = Height, y = Weight))+geom_point()


print(plot1)

In [4]:

k = 4
k_cent_points=df[sample(nrow(df), k), ]
k_cent_points

Height Weight

125 158 149

94 177 61

265 147 84

280 188 57

localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 2/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook

In [6]:

repeat
{
x = df$cluster
df$dist_1 = ((k_cent_points[1, "Height"]-df$Height)^2 + (k_cent_points[1, "Weight"]-df$
df$dist_2 = ((k_cent_points[2, "Height"]-df$Height)^2 + (k_cent_points[2, "Weight"]-df$
df$dist_3 = ((k_cent_points[3, "Height"]-df$Height)^2 + (k_cent_points[3, "Weight"]-df$
df$dist_4 = ((k_cent_points[4, "Height"]-df$Height)^2 + (k_cent_points[4, "Weight"]-df$
df$min_dist=rowMins(as.matrix(df[,c(3, 4, 5, 6)]))
for (i in 1:500)
{
if (df[i,"min_dist"]==df[i,"dist_1"]){
df$cluster[i]=1
}
if (df[i,"min_dist"]==df[i,"dist_2"]){
df$cluster[i]=2
}
if (df[i,"min_dist"]==df[i,"dist_3"]){
df$cluster[i]=3
}
if (df[i,"min_dist"]==df[i,"dist_4"]){
df$cluster[i]=4
}
}
y = df$cluster

f = 0
for (i in 1:500){
if (x[i]==y[i]){
f = f + 1
}
}
if(f == 500)
{
break
}
sum_Height_cluster1 = 0
sum_Height_cluster2 = 0
sum_Height_cluster3 = 0
sum_Height_cluster4 = 0
sum_Weight_cluster1 = 0
sum_Weight_cluster2 = 0
sum_Weight_cluster3 = 0
sum_Weight_cluster4 = 0
for (i in 1:500)
{
if (df[i,"cluster"]==1){
sum_Height_cluster1 = sum_Height_cluster1 + df[i,"Height"]
sum_Weight_cluster1 = sum_Weight_cluster1 + df[i,"Weight"]
}
if (df[i,"cluster"]==2){
sum_Height_cluster2 = sum_Height_cluster2 + df[i,"Height"]
sum_Weight_cluster2 = sum_Weight_cluster2 + df[i,"Weight"]
}
if (df[i,"cluster"]==3){
sum_Height_cluster3 = sum_Height_cluster3 + df[i,"Height"]
sum_Weight_cluster3 = sum_Weight_cluster3 + df[i,"Weight"]
}
if (df[i,"cluster"]==4){
sum_Height_cluster4 = sum_Height_cluster4 + df[i,"Height"]
localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 3/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook

sum_Weight_cluster4 = sum_Weight_cluster4 + df[i,"Weight"]


}
}
n=c(table(df$cluster))

Height = list(sum_Height_cluster1/n[1], sum_Height_cluster2/n[2], sum_Height_cluster3/n


Weight = list(sum_Weight_cluster1/n[1], sum_Weight_cluster2/n[2], sum_Weight_cluster3/n
lists = list(Height, Weight)
k_cent_points = as.data.frame(do.call(rbind, lists))
print(k_cent_points)
}

localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 4/6
19/04/2022, 09:32 AP19110010030_R_Lab-Assignment-5 - Jupyter Notebook

In [8]:

print(k_cent_points)
f
plot1 = ggplot(df, aes(x = Height, y = Weight, col=cluster))+geom_point()
plot1

Height Weight

125 158 149

94 177 61

265 147 84

280 188 57

500

In [9]:

500

In [ ]:

localhost:8888/notebooks/AP19110010030_R_Lab-Assignment-5.ipynb 5/6

You might also like