Professional Documents
Culture Documents
Experiment 8 Heirarchical Clustering
Experiment 8 Heirarchical Clustering
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv("Mall_Customers.csv")
data.head()
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CustomerID 200 non-null int64
1 Genre 200 non-null object
2 Age 200 non-null int64
3 Annual Income (k$) 200 non-null int64
4 Spending Score (1-100) 200 non-null int64
dtypes: int64(4), object(1)
memory usage: 7.9+ KB
data.isnull().sum()
CustomerID 0
Genre 0
Age 0
Annual Income (k$) 0
Spending Score (1-100) 0
dtype: int64
Genre
Female 112
Male 88
dtype: int64
types = data.dtypes
print(types)
CustomerID int64
Genre object
Age int64
Annual Income (k$) int64
Spending Score (1-100) int64
dtype: object
data.hist()
plt.show()
C = linkage(X, 'complete')
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist
S = linkage(X, 'single')
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist
data.isnull().sum()
# Fitting Hierarchical Clustering to the dataset
from sklearn.cluster import AgglomerativeClustering
hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean',
linkage = 'single')
y_hc = hc.fit_predict(X)
C:\Users\Shyam Singh\anaconda3\lib\site-packages\sklearn\cluster\
_agglomerative.py:983: FutureWarning: Attribute `affinity` was
deprecated in version 1.2 and will be removed in 1.4. Use `metric`
instead
warnings.warn(
print(y_hc)
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 0 1 0 4 0 4 3 2]
print(y_hc)
# Visualising the clusters
plt.scatter(X[:,0], X[:,1], s = 100, c = 'black', label = 'Data
Distribution')
plt.title('Customer Distribution before clustering')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 0 1 0 4 0 4 3 2]
# Visualising the clusters
plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red',
label = 'Careless')
plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue',
label = 'standard')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green',
label = 'Target')
plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan',
label = 'Careful')
plt.scatter(X[y_hc== 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta',
label = 'Sensible')
plt.title('Clusters of customers')
plt.show()
# Fitting Hierarchical Clustering to the dataset
from sklearn.cluster import AgglomerativeClustering
hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean',
linkage = 'ward')
y_hc = hc.fit_predict(X)
C:\Users\Shyam Singh\anaconda3\lib\site-packages\sklearn\cluster\
_agglomerative.py:983: FutureWarning: Attribute `affinity` was
deprecated in version 1.2 and will be removed in 1.4. Use `metric`
instead
warnings.warn(
print(y_hc)
[4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4 3 4
3 4
3 4 3 4 3 4 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1
1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 1 2 0 2 0 2 1 2 0 2 0 2 0 2 0 2 1 2 0 2
1 2
0 2 0 2 0 2 0 2 0 2 0 2 1 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0 2 0
2 0
2 0 2 0 2 0 2 0 2 0 2 0 2 0 2]
# Visualising the clusters
plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 100, c = 'red',
label = 'Careless')
plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 100, c = 'blue',
label = 'standard')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 100, c = 'green',
label = 'Target')
plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 100, c = 'cyan',
label = 'Careful')
plt.scatter(X[y_hc== 4, 0], X[y_hc == 4, 1], s = 100, c = 'magenta',
label = 'Sensible')
plt.title('Clusters of customers')
plt.show()
Annual_Income = 39
Spending_Score = 91
Annual_Income1 = 34
Spending_Score1 = 19
Annual_Income2 = 34
Spending_Score2 = 65
Annual_Income3 = 45
Spending_Score3 = 56
Annual_Income4 = 56
Spending_Score4 = 21
predict= hc.fit_predict([[ Annual_Income,Spending_Score ],
[ Annual_Income1,Spending_Score1 ],
[ Annual_Income2,Spending_Score2 ], [ Annual_Income3,Spending_Score3],
[ Annual_Income4,Spending_Score4 ]])
print(predict)
[2 3 4 1 0]
C:\Users\Shyam Singh\anaconda3\lib\site-packages\sklearn\cluster\
_agglomerative.py:983: FutureWarning: Attribute `affinity` was
deprecated in version 1.2 and will be removed in 1.4. Use `metric`
instead
warnings.warn(
import pickle
filename = "model8.sav"
pickle.dump(hc, open(filename, "wb"))