Bilal Ahmad Ai & DSS Assign # 03

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 7

DEPARTMENT OF COMPUTER &

SOFTWARE ENGINEERING
COLLEGE OF E&ME, NUST,
RAWALPINDI

Artificial Intelligence and Decision Support

PROF. DR. ARSLAN


INSTRUCTOR SHAUKAT

STUDENT BILAL AHMAD

REG. NO. 345624

DEGREE 42

SYNDICATE A
DEPARTMENT Computer Engineering

SUBMITTED TO: DR. ARSLAN SHAUKAT


SUBMITTED BY: BILAL AHMAD
ASSIGNMENT QUESTION:
1. Write a script that constructs a training dataset of just ‘3’ and ‘8’ digits.

2. To start, pick 100 of each randomly. Your matrix should end up as 2D, 200
rows by 256 columns. Remember to include the true label for each digit,
in another array, called labels (or whatever you want).

3. Use the KNN rule to classify each of the digits in your training set, and
report the training accuracy. Plot a graph to display the training accuracy
as you vary K from 1 to 20.

4. Now break your training set randomly into 2 equal parts, one part you
will use for training, and one part for testing. Plot a testing accuracy
graph, again varying K.

5. Again perform above task 3 by repeating the random split of data into
training and testing and then reclassify. Do you get the same behavior?
Plot the average and standard deviation as error bars of both testing.
accuracies achieved in task 3 and 4, for all values of K. Remember all
graphs should have axis labels and a title. If you do not know what
MATLAB commands to use, try Googling.

6. When you are done with all this, extend the above to load and predict
digits ‘3’, ‘6’, and ‘8’. You can visualize your classifications using the
showdata function provided.

Code:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.io import loadmat

# Load MATLAB .mat file


mat_data = loadmat(r'G:\BILAL AHMAD AI LAB\SMESTER 7\Assignment
3\usps_main.mat')
main_data = mat_data['maindata']

def calculate_distance(instance1, instance2):


dist = np.sqrt(np.sum((instance1 - instance2) ** 2))
return dist

def find_neighbors(k, instance, x_train, y_train):


distances = [(i, calculate_distance(instance, x_train[i])) for i in
range(len(x_train))]
distances = sorted(distances, key=lambda x: x[1])
neighbors_indices = [index for index, _ in distances[:k]]
return y_train[neighbors_indices]

def get_response(arr):
return np.bincount(arr).argmax()

def show_data(data, labels, predictions=None):


num_examples = data.shape[0]

# Sort data and labels


sort_order = np.argsort(labels)
data = data[sort_order, :]
labels = labels[sort_order]

# Check if predictions are provided


testing = predictions is not None
if testing:
# Flatten predictions if it's a multi-dimensional array
predictions = np.ravel(predictions)
predictions = predictions[sort_order]

# Calculate the number of digits to put in the square


side = int(np.ceil(np.sqrt(num_examples)))

# Set up border parameters


border = 3
frame_width = 16 + 2 * border

# Initialize the main matrix to display


m = np.zeros((side * frame_width, side * frame_width))

n = 0
mistakes = 0

for row in range(0, side * frame_width, frame_width):


for col in range(0, side * frame_width, frame_width):
# Check if we reached the end of examples
if n >= num_examples:
break

# Retrieve the digit pixels


digit = data[n, :].reshape((16, 16))

# Put a black border around it


frame = np.zeros((frame_width, frame_width))
frame[border:border+16, border:border+16] = digit
digit = frame

# Draw a further white border around the digit if there's a mistake


if testing and labels[n] != predictions[n]:
digit[border, border:frame_width-border] = 255 # top of white
'mistake' box
digit[frame_width-border, border:frame_width-border] = 255 #
bottom
digit[border:frame_width-border, border] = 255 # left
digit[border:frame_width-border, frame_width-border] = 255 #
right
mistakes += 1

# Put it in the main matrix


m[row:row+frame_width, col:col+frame_width] = digit

# Increment which example we're dealing with


n += 1

plt.imshow(m, cmap='gray')
plt.axis('off')
plt.title(f"{mistakes} mistakes out of {num_examples} ({(mistakes /
num_examples) * 100:.2f}%)", fontsize=16)
plt.show()
plt.pause(0.001)
input("Press Enter to close the plot...")

# Number of samples
num_samples = 300
samples = np.zeros((num_samples, 256))
labels = np.zeros(num_samples)

# Create samples for digit '3'


for i in range(100):
random_index = np.random.randint(main_data.shape[1])
samples[i] = main_data[:, random_index, 2].reshape((256,))
labels[i] = 0

# Create samples for digit '8'


for i in range(100, 200):
random_index = np.random.randint(main_data.shape[1])
samples[i] = main_data[:, random_index, 7].reshape((256,))
labels[i] = 1

# Create samples for digit '6'


for i in range(200, 300):
random_index = np.random.randint(main_data.shape[1])
samples[i] = main_data[:, random_index, 5].reshape((256,))
labels[i] = 2

# Shuffle the samples and labels


indices = np.random.permutation(num_samples)
samples = samples[indices]
labels = labels[indices]

# Assign the samples and labels arrays to x_train and y_train, respectively
x_train = samples.astype(int)
y_train = labels.astype(int)

print("Loaded and shuffled data successfully.")

train_accuracies = []
ks = list(range(1, 21))

for k in ks:
predictions = [get_response(find_neighbors(k, instance, x_train, y_train))
for instance in x_train]
accuracy = accuracy_score(y_train, predictions)
train_accuracies.append(accuracy)
print(f"Training Accuracy for k={k}: {accuracy:.4f}")

# Plot the training accuracies for different values of k


plt.plot(ks, train_accuracies, marker='o')
plt.title("Training Accuracy vs. k")
plt.xlabel("k (Number of Neighbors)")
plt.ylabel("Training Accuracy")
plt.show()
plt.pause(0.001)
input("Press Enter to close the training plot...")

num_splits = 2
testing_accuracies_all_splits = []

for split in range(num_splits):


# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(samples, labels,
test_size=0.5, random_state=split)
x_train = x_train.astype(int)
x_test = x_test.astype(int)
y_test = y_test.astype(int)
y_train = y_train.astype(int)
testing_accuracies = []

for k in ks:
predictions = [get_response(find_neighbors(k, instance, x_train,
y_train)) for instance in x_test]
accuracy = accuracy_score(y_test, predictions)
testing_accuracies.append(accuracy)
print(f"Testing Accuracy for k={k} (Split {split+1}): {accuracy:.4f}")
testing_accuracies_all_splits.append(testing_accuracies)

# Task 5
testing_accuracies_all_splits = np.array(testing_accuracies_all_splits)
average_accuracies = np.mean(testing_accuracies_all_splits, axis=0)
std_accuracies = np.std(testing_accuracies_all_splits, axis=0)

plt.errorbar(ks, average_accuracies, yerr=std_accuracies, fmt='o', capsize=5)


plt.title("Testing Accuracy vs. k (Multiple Splits)")
plt.xlabel("k (Number of Neighbors)")
plt.ylabel("Testing Accuracy")
plt.show()
plt.pause(0.001)
input("Press Enter to close the testing plot...")

# Example usage of show_data function:


chosen_k = 3
x_train, x_test, y_train, y_test = train_test_split(samples, labels,
test_size=0.5, random_state=split)
x_train = x_train.astype(int)
x_test = x_test.astype(int)
y_test = y_test.astype(int)
y_train = y_train.astype(int)
testing_accuracies = []

predictions = [get_response(find_neighbors(chosen_k, instance, x_train,


y_train)) for instance in x_test]
show_data(x_test, y_test, predictions)

Output:
PLOT:

You might also like