Bilal Ahmad Ai & DSS Assign # 03

DEPARTMENT OF COMPUTER &
SOFTWARE ENGINEERING
COLLEGE OF E&ME, NUST,
RAWALPINDI
Artificial Intelligence and Decision Support
PROF. DR. ARSLAN

INSTRUCTOR SHAUKAT
STUDENT BILAL AHMAD
REG. NO. 345624
DEGREE 42
SYNDICATE A
DEPARTMENT Computer Engineering
SUBMITTED TO: DR. ARSLAN SHAUKAT

SUBMITTED BY: BILAL AHMAD
ASSIGNMENT QUESTION:
1. Write a script that constructs a training dataset of just ‘3’ and ‘8’ digits.
2. To start, pick 100 of each randomly. Your matrix should end up as 2D, 200
rows by 256 columns. Remember to include the true label for each digit,
in another array, called labels (or whatever you want).
3. Use the KNN rule to classify each of the digits in your training set, and
report the training accuracy. Plot a graph to display the training accuracy
as you vary K from 1 to 20.
4. Now break your training set randomly into 2 equal parts, one part you
will use for training, and one part for testing. Plot a testing accuracy
graph, again varying K.
5. Again perform above task 3 by repeating the random split of data into
training and testing and then reclassify. Do you get the same behavior?
Plot the average and standard deviation as error bars of both testing.
accuracies achieved in task 3 and 4, for all values of K. Remember all
graphs should have axis labels and a title. If you do not know what
MATLAB commands to use, try Googling.
6. When you are done with all this, extend the above to load and predict
digits ‘3’, ‘6’, and ‘8’. You can visualize your classifications using the
showdata function provided.
Code:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.io import loadmat
# Load MATLAB .mat file

mat_data = loadmat(r'G:\BILAL AHMAD AI LAB\SMESTER 7\Assignment
3\usps_main.mat')
main_data = mat_data['maindata']
def calculate_distance(instance1, instance2):

dist = np.sqrt(np.sum((instance1 - instance2) ** 2))
return dist
def find_neighbors(k, instance, x_train, y_train):

distances = [(i, calculate_distance(instance, x_train[i])) for i in
range(len(x_train))]
distances = sorted(distances, key=lambda x: x[1])
neighbors_indices = [index for index, _ in distances[:k]]
return y_train[neighbors_indices]
def get_response(arr):
return np.bincount(arr).argmax()
def show_data(data, labels, predictions=None):

num_examples = data.shape[0]
# Sort data and labels

sort_order = np.argsort(labels)
data = data[sort_order, :]
labels = labels[sort_order]
# Check if predictions are provided

testing = predictions is not None
if testing:
# Flatten predictions if it's a multi-dimensional array
predictions = np.ravel(predictions)
predictions = predictions[sort_order]
# Calculate the number of digits to put in the square

side = int(np.ceil(np.sqrt(num_examples)))
# Set up border parameters

border = 3
frame_width = 16 + 2 * border
# Initialize the main matrix to display

m = np.zeros((side * frame_width, side * frame_width))
n = 0
mistakes = 0
for row in range(0, side * frame_width, frame_width):

for col in range(0, side * frame_width, frame_width):
# Check if we reached the end of examples
if n >= num_examples:
break
# Retrieve the digit pixels

digit = data[n, :].reshape((16, 16))
# Put a black border around it

frame = np.zeros((frame_width, frame_width))
frame[border:border+16, border:border+16] = digit
digit = frame
# Draw a further white border around the digit if there's a mistake

if testing and labels[n] != predictions[n]:
digit[border, border:frame_width-border] = 255 # top of white
'mistake' box
digit[frame_width-border, border:frame_width-border] = 255 #
bottom
digit[border:frame_width-border, border] = 255 # left
digit[border:frame_width-border, frame_width-border] = 255 #
right
mistakes += 1
# Put it in the main matrix

m[row:row+frame_width, col:col+frame_width] = digit
# Increment which example we're dealing with

n += 1
plt.imshow(m, cmap='gray')
plt.axis('off')
plt.title(f"{mistakes} mistakes out of {num_examples} ({(mistakes /
num_examples) * 100:.2f}%)", fontsize=16)
plt.show()
plt.pause(0.001)
input("Press Enter to close the plot...")
# Number of samples
num_samples = 300
samples = np.zeros((num_samples, 256))
labels = np.zeros(num_samples)
# Create samples for digit '3'

for i in range(100):
random_index = np.random.randint(main_data.shape[1])
samples[i] = main_data[:, random_index, 2].reshape((256,))
labels[i] = 0

for i in range(100, 200):
labels[i] = 1

for i in range(200, 300):
labels[i] = 2
# Shuffle the samples and labels

indices = np.random.permutation(num_samples)
samples = samples[indices]
labels = labels[indices]
# Assign the samples and labels arrays to x_train and y_train, respectively
x_train = samples.astype(int)
y_train = labels.astype(int)
print("Loaded and shuffled data successfully.")
train_accuracies = []
ks = list(range(1, 21))
for k in ks:
predictions = [get_response(find_neighbors(k, instance, x_train, y_train))
for instance in x_train]
accuracy = accuracy_score(y_train, predictions)
train_accuracies.append(accuracy)
print(f"Training Accuracy for k={k}: {accuracy:.4f}")
# Plot the training accuracies for different values of k

plt.plot(ks, train_accuracies, marker='o')
plt.title("Training Accuracy vs. k")
plt.xlabel("k (Number of Neighbors)")
plt.ylabel("Training Accuracy")
plt.show()
plt.pause(0.001)
input("Press Enter to close the training plot...")
num_splits = 2
testing_accuracies_all_splits = []
for split in range(num_splits):

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(samples, labels,
test_size=0.5, random_state=split)
x_train = x_train.astype(int)
x_test = x_test.astype(int)
y_test = y_test.astype(int)
y_train = y_train.astype(int)
testing_accuracies = []
for k in ks:
predictions = [get_response(find_neighbors(k, instance, x_train,
y_train)) for instance in x_test]
accuracy = accuracy_score(y_test, predictions)
testing_accuracies.append(accuracy)
print(f"Testing Accuracy for k={k} (Split {split+1}): {accuracy:.4f}")
testing_accuracies_all_splits.append(testing_accuracies)
# Task 5
testing_accuracies_all_splits = np.array(testing_accuracies_all_splits)
average_accuracies = np.mean(testing_accuracies_all_splits, axis=0)
std_accuracies = np.std(testing_accuracies_all_splits, axis=0)
plt.errorbar(ks, average_accuracies, yerr=std_accuracies, fmt='o', capsize=5)

plt.title("Testing Accuracy vs. k (Multiple Splits)")
plt.xlabel("k (Number of Neighbors)")
plt.ylabel("Testing Accuracy")
plt.show()
plt.pause(0.001)
input("Press Enter to close the testing plot...")
# Example usage of show_data function:

chosen_k = 3
x_train, x_test, y_train, y_test = train_test_split(samples, labels,
test_size=0.5, random_state=split)
x_train = x_train.astype(int)
x_test = x_test.astype(int)
y_test = y_test.astype(int)
y_train = y_train.astype(int)
testing_accuracies = []
predictions = [get_response(find_neighbors(chosen_k, instance, x_train,

y_train)) for instance in x_test]
show_data(x_test, y_test, predictions)
Output:
PLOT:

Bilal Ahmad Ai & DSS Assign # 03

Uploaded by

Copyright:

Available Formats

You might also like

Bilal Ahmad Ai & DSS Assign # 03

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Bilal Ahmad Ai & DSS Assign # 03

Uploaded by

Copyright:

Available Formats

DEPARTMENT OF COMPUTER &

Artificial Intelligence and Decision Support

PROF. DR. ARSLAN

STUDENT BILAL AHMAD

REG. NO. 345624

SUBMITTED TO: DR. ARSLAN SHAUKAT

# Load MATLAB .mat file

def calculate_distance(instance1, instance2):

def find_neighbors(k, instance, x_train, y_train):

def show_data(data, labels, predictions=None):

# Sort data and labels

# Check if predictions are provided

# Calculate the number of digits to put in the square

# Set up border parameters

# Initialize the main matrix to display

for row in range(0, side * frame_width, frame_width):

# Retrieve the digit pixels

# Put a black border around it

# Draw a further white border around the digit if there's a mistake

# Put it in the main matrix

# Increment which example we're dealing with

# Create samples for digit '3'

# Create samples for digit '8'

# Create samples for digit '6'

# Shuffle the samples and labels

print("Loaded and shuffled data successfully.")

# Plot the training accuracies for different values of k

for split in range(num_splits):

plt.errorbar(ks, average_accuracies, yerr=std_accuracies, fmt='o', capsize=5)

# Example usage of show_data function:

predictions = [get_response(find_neighbors(chosen_k, instance, x_train,

You might also like