lab_Assignment_3_ai

In [1]: #Write python script to implement KMeans Algorithm over a inputted dataset (Any data take of your own).
import pandas as pd
from sklearn.cluster import KMeans
data = pd.DataFrame({
"age": [25, 32, 40, 28, 35, 48, 38, 22, 27, 30],
"income": [50000, 70000, 85000, 62000, 78000, 95000, 82000, 45000, 52000, 65000]
})
k = 3
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(data)
cluster_labels = kmeans.labels_
print("Cluster labels:", cluster_labels)
centroids = kmeans.cluster_centers_
print("Centroids:", centroids)
data["cluster"] = cluster_labels
print(data)
import matplotlib.pyplot as plt
plt.scatter(data["age"], data["income"], c=cluster_labels)

plt.xlabel("Age")
plt.ylabel("Income")
plt.title("Customer Clusters")
plt.show()
C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set th
e value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are l
ess chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
warnings.warn(
Cluster labels: [1 2 0 2 0 0 0 1 1 2]
Centroids: [[4.02500000e+01 8.50000000e+04]
[2.46666667e+01 4.90000000e+04]
[3.00000000e+01 6.56666667e+04]]
age income cluster
0 25 50000 1
1 32 70000 2
2 40 85000 0
3 28 62000 2
4 35 78000 0
5 48 95000 0
6 38 82000 0
7 22 45000 1
8 27 52000 1
9 30 65000 2
In [2]: #Write python script to implement Hierarchical clustering Algorithm over a inputted dataset (Any data take of your own).
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
np.random.seed(42)
data, _ = make_blobs(n_samples=300, centers=4, random_state=42)
k = int(input("Enter the number of clusters (K): "))
hc_model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='ward')

hc_labels = hc_model.fit_predict(data)
plt.scatter(data[:, 0], data[:, 1], c=hc_labels, cmap='viridis', edgecolors='k', s=50)

plt.title('Hierarchical Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
linked = linkage(data, 'ward')

dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Cluster Distance')
plt.show()
Enter the number of clusters (K): 3

C:\tools\Anaconda3\lib\site-packages\sklearn\cluster\_agglomerative.py:983: FutureWarning: Attribute `affinity` was deprecated in version 1.2 and will be remo
ved in 1.4. Use `metric` instead
warnings.warn(
In [3]: #Write python script to implement decision tree over a inputted dataset (Any data take of your own).
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
np.random.seed(42)
data = pd.DataFrame({
'Feature1': np.random.rand(100),
'Feature2': np.random.rand(100),
'Label': np.random.choice([0, 1], size=100)
})
X = data[['Feature1', 'Feature2']]
y = data['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred = dt_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:\n', conf_matrix)
print('Classification Report:\n', class_report)
Accuracy: 0.65
Confusion Matrix:
[[4 2]
[5 9]]
Classification Report:
precision recall f1-score support
0 0.44 0.67 0.53 6

1 0.82 0.64 0.72 14
accuracy 0.65 20
macro avg 0.63 0.65 0.63 20
weighted avg 0.71 0.65 0.66 20
In [ ]:

lab_Assignment_3_ai

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

lab_Assignment_3_ai

Uploaded by

Copyright:

Available Formats

In [1]: #Write python script to implement KMeans Algorithm over a inputted dataset (Any data take of your own).

kmeans = KMeans(n_clusters=k, random_state=42)

print("Cluster labels:", cluster_labels)

import matplotlib.pyplot as plt

plt.scatter(data["age"], data["income"], c=cluster_labels)

k = int(input("Enter the number of clusters (K): "))

hc_model = AgglomerativeClustering(n_clusters=k, affinity='euclidean', linkage='ward')

plt.scatter(data[:, 0], data[:, 1], c=hc_labels, cmap='viridis', edgecolors='k', s=50)

linked = linkage(data, 'ward')

Enter the number of clusters (K): 3

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

accuracy = accuracy_score(y_test, y_pred)

0 0.44 0.67 0.53 6

You might also like