Professional Documents
Culture Documents
2 - 9 - KNN Code
2 - 9 - KNN Code
[3]: 2.0
[5]: 2
1
d = euclidean_distance(x, points[i])
distances.append([i, d])
return c
points_train = points[:seuil]
points_test = points[seuil:]
classes_train = classes[:seuil]
classes_test = classes[seuil:]
succes = 0
nb_test = len(points_test)
for i in range(nb_test):
prediction = knn(points_train, classes_train, points_test[i], k)
if prediction == classes_test[i]:
succes += 1
return succes/nb_test
1 Generic Dataset
[8]: points, classes = make_blobs(n_samples = 500, n_features = 2, centers =␣
↪3,cluster_std = 1.5, random_state = 6)
2
[9]: plt.figure(figsize = (10,5))
plt.scatter(points[:,0], points[:,1], c=classes, marker= '.
↪',s=100,edgecolors='black')
plt.show()
[11]: print(points_train[:10])
[[ 7.80291838 -3.49667437]
[-6.2660849 1.92611179]
[-8.85654973 3.25691309]
[-5.84437689 4.59816109]
[ 6.55402995 -2.8281474 ]
[ 6.85441089 -9.26260683]
[ 7.66709846 -5.41332313]
[-7.72643879 -2.05980392]
[10.11138133 -4.25359347]
[ 6.15349088 -8.59446213]]
[12]: print(points_train[:10])
print(classes_train[:10])
[[ 7.80291838 -3.49667437]
[-6.2660849 1.92611179]
[-8.85654973 3.25691309]
3
[-5.84437689 4.59816109]
[ 6.55402995 -2.8281474 ]
[ 6.85441089 -9.26260683]
[ 7.66709846 -5.41332313]
[-7.72643879 -2.05980392]
[10.11138133 -4.25359347]
[ 6.15349088 -8.59446213]]
[0 2 2 2 0 1 0 2 0 1]
[13]: x = points_test[33]
print(x)
knn(points_train, classes_train, x, 10)
[-6.43194186 0.92589598]
[13]: 2
[14]: 0.99
2 Iris Dataset
[15]: dataset_iris = pd.read_csv('iris.csv')
[16]: print(len(dataset_iris))
print(dataset_iris)
150
sepal.length sepal.width petal.length petal.width variety
0 5.1 3.5 1.4 0.2 Setosa
1 4.9 3.0 1.4 0.2 Setosa
2 4.7 3.2 1.3 0.2 Setosa
3 4.6 3.1 1.5 0.2 Setosa
4 5.0 3.6 1.4 0.2 Setosa
.. … … … … …
145 6.7 3.0 5.2 2.3 Virginica
146 6.3 2.5 5.0 1.9 Virginica
147 6.5 3.0 5.2 2.0 Virginica
148 6.2 3.4 5.4 2.3 Virginica
149 5.9 3.0 5.1 1.8 Virginica
4
[18]: knn(points_iris, classes_iris, [3.5,3.5,4.5,4.5], 5)
[18]: 'Virginica'
[19]: print(points_iris[:10])
[20]: print(classes_iris[:10])
[21]: 0.8
3 Diabetes Dataset
[22]: dataset_diabetes = pd.read_csv('diabetes.csv')
[23]: print(len(dataset_diabetes))
dataset_diabetes.head()
768
5
[24]: dataset_diabetes = np.array(dataset_diabetes)
points_diabetes = dataset_diabetes[:,:8]
classes_diabetes = dataset_diabetes[:,8:]
classes_diabetes = classes_diabetes[:,0]
[25]: print(points_diabetes[:10])
[26]: print(classes_diabetes[:10])
[1. 0. 1. 0. 1. 0. 1. 0. 1. 1.]
[27]: 0.7207792207792207