Download as pdf or txt
Download as pdf or txt
You are on page 1of 1

In [1]: #Write python script to implement KMeans Algorithm over a inputted dataset (Any data take of your own).

import pandas as pd
from sklearn.cluster import KMeans

data = pd.DataFrame({
"age": [25, 32, 40, 28, 35, 48, 38, 22, 27, 30],
"income": [50000, 70000, 85000, 62000, 78000, 95000, 82000, 45000, 52000, 65000]
})

k = 3

kmeans = KMeans(n_clusters=k, random_state=42)

kmeans.fit(data)

cluster_labels = kmeans.labels_

print("Cluster labels:", cluster_labels)

centroids = kmeans.cluster_centers_
print("Centroids:", centroids)

data["cluster"] = cluster_labels

print(data)

import matplotlib.pyplot as plt

plt.scatter(data["age"], data["income"], c=cluster_labels)


plt.xlabel("Age")
plt.ylabel("Income")
plt.title("Customer Clusters")
plt.show()

C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set th
e value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are l
ess chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
warnings.warn(
Cluster labels: [1 2 0 2 0 0 0 1 1 2]
Centroids: [[4.02500000e+01 8.50000000e+04]
[2.46666667e+01 4.90000000e+04]
[3.00000000e+01 6.56666667e+04]]
age income cluster
0 25 50000 1
1 32 70000 2
2 40 85000 0
3 28 62000 2
4 35 78000 0
5 48 95000 0
6 38 82000 0
7 22 45000 1
8 27 52000 1
9 30 65000 2

In [2]: #Write python script to implement Hierarchical clustering Algorithm over a inputted dataset (Any data take of your own).

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage

np.random.seed(42)
data, _ = make_blobs(n_samples=300, centers=4, random_state=42)

k = int(input("Enter the number of clusters (K): "))

hc_model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='ward')


hc_labels = hc_model.fit_predict(data)

plt.scatter(data[:, 0], data[:, 1], c=hc_labels, cmap='viridis', edgecolors='k', s=50)


plt.title('Hierarchical Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

linked = linkage(data, 'ward')


dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Cluster Distance')
plt.show()

Enter the number of clusters (K): 3


C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_agglomerative.py:983: FutureWarning: Attribute `affinity` was deprecated in version 1.2 and will be remo
ved in 1.4. Use `metric` instead
warnings.warn(

In [3]: #Write python script to implement decision tree over a inputted dataset (Any data take of your own).

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

np.random.seed(42)
data = pd.DataFrame({
'Feature1': np.random.rand(100),
'Feature2': np.random.rand(100),
'Label': np.random.choice([0, 1], size=100)
})

X = data[['Feature1', 'Feature2']]
y = data['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dt_classifier = DecisionTreeClassifier(random_state=42)

dt_classifier.fit(X_train, y_train)

y_pred = dt_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:\n', conf_matrix)
print('Classification Report:\n', class_report)

Accuracy: 0.65
Confusion Matrix:
[[4 2]
[5 9]]
Classification Report:
precision recall f1-score support

0 0.44 0.67 0.53 6


1 0.82 0.64 0.72 14

accuracy 0.65 20
macro avg 0.63 0.65 0.63 20
weighted avg 0.71 0.65 0.66 20

In [ ]:

You might also like