KNN With MNIST Dataset - 2

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 2

preprocessing(dataset1_trainingValidation, 5)

trainSequence = [[1,2,3,4],[1,2,3,5],[1,2,4,5],[1,3,4,5],[2,3,4,5]]
validationSequence = [5,4,3,2,1]
NN_array = []

for e,v in zip(trainSequence, validationSequence):

print("Running Training Set", e, "And Validation Set", v)

trainList = []

for i in e:
trainList = trainList + foldDict[i]
random.shuffle(foldDict[v])

trainNumpy = [arrayConverter(a) for a in trainList]


validateNumpy = [arrayConverter(a) for a in foldDict[v]]

array = kNearestNeighbour(trainList, foldDict[v], trainNumpy, validateNumpy)

NN_array.append(array)

cross_validation = pd.DataFrame(NN_array,columns = range(1,13))

maximum = 0
number = 0
for i in range(1,13):
if cross_validation[i].mean() > maximum:
maximum = cross_validation[i].mean()
number = i

print("Using Training Data K-NN of {} has Maximum Accuracy of {}".format(number,


maximum))
k_optimal = number

def kNearestNeighbour_optimal(trainingData, Validation, trainNumpy, validateNumpy,


k_optimal):

err = 0

for i in range(len(Validation)):

true_label = int(Validation[i].split('/')[-1].split('_')[1])
training_label = []
distance = []

for j in range(len(trainingData)):
training_label.append(int(trainingData[j].split('/')[-1].split('_')
[1]))
distance.append(euc_dist(trainNumpy[j], validateNumpy[i]))

Zipped = zip(training_label, distance)


Sorted = sorted(Zipped, key = lambda x: x[1])

kNN = [s[0] for s in Sorted[:k_optimal]]


classify = max(set(kNN), key = kNN.count)
if classify != true_label:
err = err + 1

accuracy_array = np.divide(np.subtract(len(Validation), err), len(Validation))

return accuracy_array

preprocessing(dataset1_test, 5)

trainSequence = [[1,2,3,4],[1,2,3,5],[1,2,4,5],[1,3,4,5],[2,3,4,5]]
validationSequence = [5,4,3,2,1]
NN_array_test = []

for e,v in zip(trainSequence, validationSequence):

print("Running Training Set", e, "And Validation Set", v)

trainList = []

for i in e:
trainList = trainList + foldDict[i]
random.shuffle(foldDict[v])

trainNumpy = [arrayConverter(a) for a in trainList]


validateNumpy = [arrayConverter(a) for a in foldDict[v]]

array = kNearestNeighbour_optimal(trainList, foldDict[v], trainNumpy,


validateNumpy, k_optimal)

NN_array_test.append(array)

cross_validation_test = pd.DataFrame(NN_array_test, columns = ['k'])

print("Testing Data with K-NN of {} has Accuracy of {}".format(k_optimal,


cross_validation_test['k'].mean()))

You might also like