Assignment 1

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 17

COMP5318 Assignment 1: Classification

Group number: A1 Part1 Group 43 , SID1: 530242293 SID2: 520597749

In [1]: # Import all libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from warnings import simplefilter
# ignore warnings related to some packages.
simplefilter(action='ignore', category=FutureWarning)
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier

In [2]: # Load dataset

df= pd.read_csv("breast-cancer-wisconsin.csv")

Pre-processing dataset
In [3]: # checking all the rows which have missing values "?"

df[df.values=='?']

Out[3]:
Clump Thickness Uniformity of Cell Size Uniformity of Cell Shape Marginal Adhesion Single Epithelial Cell Size Bare Nuclei Bland Chromatin Normal Nucleoli Mitoses class

23 8 4 5 1 2 ? 7 3 1 class2

40 6 6 6 9 6 ? 7 8 1 class1

139 1 1 1 1 1 ? 2 1 1 class1

145 1 1 3 1 2 ? 2 1 1 class1

158 1 1 2 1 3 ? 1 1 1 class1

164 5 1 1 1 2 ? 3 1 1 class1

235 3 1 4 1 2 ? 3 1 1 class1

249 3 1 1 1 2 ? 3 1 1 class1

275 3 1 3 1 2 ? 2 1 1 class1

292 8 8 8 1 2 ? 6 10 1 class2

294 1 1 1 1 2 ? 2 1 1 class1

297 5 4 3 1 2 ? 2 3 1 class1

315 4 6 5 6 7 ? 4 9 1 class1

321 3 1 1 1 2 ? 3 1 1 class1

411 1 1 1 1 1 ? 2 1 1 class1

617 1 1 1 1 1 ? 1 1 1 class1

In [4]: # converting all ? to NaNs

df=df.replace(['?'],np.nan)
In [5]: # checking if ? have been correctly changed to NaNs in DataFrame

df[df.isna().any(axis=1)]

Out[5]:
Clump Thickness Uniformity of Cell Size Uniformity of Cell Shape Marginal Adhesion Single Epithelial Cell Size Bare Nuclei Bland Chromatin Normal Nucleoli Mitoses class

23 8 4 5 1 2 NaN 7 3 1 class2

40 6 6 6 9 6 NaN 7 8 1 class1

139 1 1 1 1 1 NaN 2 1 1 class1

145 1 1 3 1 2 NaN 2 1 1 class1

158 1 1 2 1 3 NaN 1 1 1 class1

164 5 1 1 1 2 NaN 3 1 1 class1

235 3 1 4 1 2 NaN 3 1 1 class1

249 3 1 1 1 2 NaN 3 1 1 class1

275 3 1 3 1 2 NaN 2 1 1 class1

292 8 8 8 1 2 NaN 6 10 1 class2

294 1 1 1 1 2 NaN 2 1 1 class1

297 5 4 3 1 2 NaN 2 3 1 class1

315 4 6 5 6 7 NaN 4 9 1 class1

321 3 1 1 1 2 NaN 3 1 1 class1

411 1 1 1 1 1 NaN 2 1 1 class1

617 1 1 1 1 1 NaN 1 1 1 class1

In [6]: # changing class1 and class2 to 0 and 1 respectively in the last column of dataframe

LastColumn= df.columns[-1] #extracting the name of the last column in dataframe


df[LastColumn]= df[LastColumn].replace(['class1'],0)
df[LastColumn]= df[LastColumn].replace(['class2'],1)

In [7]: # Splitting dataset into X(all columns except class) and Y(class column).

X= df.drop(LastColumn, axis=1)
y= df[LastColumn]

In [8]: # filling missing Values using SimpleImputer

imputer= SimpleImputer(strategy='mean', missing_values=np.nan)

imputer= imputer.fit(X)
Xfilled= imputer.transform(X)
In [9]: # Normalisation

scaler= MinMaxScaler()
XNormalised= scaler.fit_transform(Xfilled)

print(XNormalised)

[[0.44444444 0. 0. ... 0.22222222 0. 0. ]


[0.44444444 0.33333333 0.33333333 ... 0.22222222 0.11111111 0. ]
[0.22222222 0. 0. ... 0.22222222 0. 0. ]
...
[0.44444444 1. 1. ... 0.77777778 1. 0.11111111]
[0.33333333 0.77777778 0.55555556 ... 1. 0.55555556 0. ]
[0.33333333 0.77777778 0.77777778 ... 1. 0.33333333 0. ]]

In [10]: # Defining a function to Print the first ten rows of pre-processed dataset to 4 decimal places:

def print_data(X, y, n_rows):

for i in range(n_rows):

for feature in X[i]:


print("{:.4f}".format(feature), end=",")

if i == len(X)-1:
print(y[i],end="")
else:
print(y[i])

In [11]: #printing the first ten rows of pre-processed dataset to 4 decimal places using the above function:

print_data(XNormalised,y,10)

0.4444,0.0000,0.0000,0.0000,0.1111,0.0000,0.2222,0.0000,0.0000,0
0.4444,0.3333,0.3333,0.4444,0.6667,1.0000,0.2222,0.1111,0.0000,0
0.2222,0.0000,0.0000,0.0000,0.1111,0.1111,0.2222,0.0000,0.0000,0
0.5556,0.7778,0.7778,0.0000,0.2222,0.3333,0.2222,0.6667,0.0000,0
0.3333,0.0000,0.0000,0.2222,0.1111,0.0000,0.2222,0.0000,0.0000,0
0.7778,1.0000,1.0000,0.7778,0.6667,1.0000,0.8889,0.6667,0.0000,1
0.0000,0.0000,0.0000,0.0000,0.1111,1.0000,0.2222,0.0000,0.0000,0
0.1111,0.0000,0.1111,0.0000,0.1111,0.0000,0.2222,0.0000,0.0000,0
0.1111,0.0000,0.0000,0.0000,0.1111,0.0000,0.0000,0.0000,0.4444,0
0.3333,0.1111,0.0000,0.0000,0.1111,0.0000,0.1111,0.0000,0.0000,0

Part 1: Cross-validation without parameter tuning


In [12]: ## Setting the 10 fold stratified cross-validation
cvKFold= StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
In [13]: # creating the training and test data splits

X_train, X_test, y_train, y_test = train_test_split(


XNormalised, y, stratify=y, random_state=0)

# Since XNormalised being passed to the train_test_split function here is already normalised.
# Therefore X_train and y_train are automatically normalised.
# Thus no need to separately normalise the training data sets further.

In [14]: # creating and applying KNN Classifier to dataset

knn= KNeighborsClassifier() #load an instance of the classifier.


knn.fit(X_train, y_train) #creating a model
PredictionKNN = knn.predict(X_test) #making prediction using the model

print("Test set predictions:\n", PredictionKNN)


print("Accuracy on test set: {:.4f}".format(knn.score(X_test, y_test)))

Test set predictions:


[1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0 1
1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 1 0 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 0 0
0 1 1 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 0 1 1 0 0
0 0 0 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 0 1 1 0 0 0]
Accuracy on test set: 0.9600

In [15]: # Defining function for KNN cross-validation score

def KNNClassifier(X, y):

knn = KNeighborsClassifier() #load an instance of the classifier.

scores= cross_val_score(knn, X,y,cv=cvKFold) # perform cross validation

return round(scores.mean(),4) # return mean score of cross validation

In [16]: #Running the above function for KNN cross-validation score

print(KNNClassifier(XNormalised, y))

0.9671

In [ ]:
In [17]: # creating and applying Logistic regression Classifier to dataset

logreg= LogisticRegression() #load an instance of the classifier.


logreg.fit(X_train, y_train) #creating a model
PredictionLR= logreg.predict(X_test) #making prediction using the model

print("Test set predictions:\n", PredictionLR)


print("Accuracy on test set: {:.4f}".format(logreg.score(X_test, y_test)))

Test set predictions:


[1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0 1
1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0
0 1 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 1 0 0
0 0 0 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 0 1 1 0 0 0]
Accuracy on test set: 0.9600

In [18]: # Defining function for Logistic regression cross-validation score

def logregClassifier(X, y):

logreg= LogisticRegression()

scores= cross_val_score(logreg, X,y,cv=cvKFold)

return round(scores.mean(),4)

In [19]: # Running the above function for Logistic regression cross-validation score
logregClassifier(XNormalised, y)

Out[19]: 0.9642

In [ ]:

In [20]: # creating and applying Naive Bayes Classifier to dataset

nb= GaussianNB() #load an instance of the classifier.


nb.fit(X_train, y_train) #creating a model
PredictionNB = nb.predict(X_test) #making prediction using the model

print("Test set predictions:\n", PredictionNB)


print("Accuracy on test set: {:.4f}".format(nb.score(X_test, y_test)))

Test set predictions:


[1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0 1
1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 1 0 1 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 0 0
0 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 1 1 0 0
0 0 0 1 1 1 0 1 0 0 1 0 0 1 0 1 0 1 1 0 0 0 1 1 0 0 0]
Accuracy on test set: 0.9657
In [21]: # Defining function for Naive Bayes cross-validation score

def nbClassifier(X, y):

nb= GaussianNB() #load an instance of the classifier.


scores= cross_val_score(nb, X,y,cv=cvKFold) # perform cross validation

return round(scores.mean(),4)

In [22]: # Running the above function for Naive Bayes cross-validation score

nbClassifier(XNormalised, y)

Out[22]: 0.9585

In [ ]:

In [23]: # creating and applying Decision Tree Classifier to dataset

tree= DecisionTreeClassifier(criterion='entropy', random_state=0) #load an instance of the classifier.


tree.fit(X_train, y_train) #creating a model

print("Accuracy on test set: {:.4f}".format(tree.score(X_test, y_test)))

Accuracy on test set: 0.9600

In [24]: # Defining function for Decision Tree cross-validation score

def dtClassifier(X, y):

tree= DecisionTreeClassifier(criterion='entropy', random_state=0)


scores= cross_val_score(tree, X, y,cv=cvKFold) # perform cross validation

return round(scores.mean(),4)

In [25]: # Running the above function for Decision Tree cross-validation score

dtClassifier(XNormalised, y)

Out[25]: 0.9385

In [ ]:
In [26]: # creating and applying Bagging Classifier to dataset

bagC= BaggingClassifier(
DecisionTreeClassifier(criterion='entropy', max_depth=6, random_state=0), n_estimators=60,
max_samples=100, bootstrap=True, random_state=0) #load an instance of the classifier.
bagC.fit(X_train, y_train) #creating a model

print("Accuracy on test set: {:.4f}".format(bagC.score(X_test, y_test)))

Accuracy on test set: 0.9543

In [27]: # Defining function for Bagging cross-validation score

def bagClassifier(X, y, n_estimators, max_samples, max_depth):

bagC = BaggingClassifier(
DecisionTreeClassifier(criterion='entropy', max_depth=max_depth, random_state=0), n_estimators=n_estimators,
max_samples=max_samples, bootstrap=True, random_state=0) #load an instance of the classifier.

scores= cross_val_score(bagC, X, y,cv=cvKFold) # perform cross validation

return round(scores.mean(),4)

In [28]: # Running the above function for Bagging cross-validation score

bagClassifier(XNormalised, y, 60, 100, 6)

Out[28]: 0.9571

In [ ]:

In [29]: # creating and applying AdaBoost Classifier to dataset

adaB = AdaBoostClassifier(
DecisionTreeClassifier(criterion='entropy', max_depth=6), n_estimators=60, learning_rate=
0.5, random_state=0)
adaB.fit(X_train, y_train) #creating a model

print("Accuracy on test set: {:.4f}".format(adaB.score(X_test, y_test)))

Accuracy on test set: 0.9429


In [30]: # Defining function for AdaBoost cross-validation score

def adaBClassifier(X, y, n_estimators, learning_rate, max_depth):

adaB = AdaBoostClassifier(
DecisionTreeClassifier(criterion='entropy', max_depth=max_depth), n_estimators=n_estimators, learning_rate=
learning_rate, random_state=0)

scores= cross_val_score(adaB, X, y,cv=cvKFold) # perform cross validation

return round(scores.mean(),4)

In [31]: # Running the above function for AdaBoost cross-validation score

adaBClassifier(XNormalised, y, 60, 0.5, 6)

Out[31]: 0.9542

In [ ]:

In [32]: # creating and applying Gradient Boosting Classifier to dataset

GB= GradientBoostingClassifier(n_estimators=60, learning_rate=0.5, random_state=0) #load an instance of the classifier.

GB.fit(X_train, y_train) #creating a model

print("Accuracy on test set: {:.4f}".format(GB.score(X_test, y_test)))

Accuracy on test set: 0.9600

In [33]: # Defining function for Gradient Boosting cross-validation score

def gbClassifier(X, y, n_estimators, learning_rate):

GB = GradientBoostingClassifier(max_depth=1, n_estimators=n_estimators, learning_rate=learning_rate,


random_state=0)

scores= cross_val_score(GB, X, y,cv=cvKFold) # perform cross validation

return round(scores.mean(),4)

In [34]: # Running the function for AdaBoost cross-validation score


print(gbClassifier(XNormalised, y, 60, 0.5))

0.9571

In [ ]:

Part 1 Results
In [35]: # Parameters for Part 1:

#Bagging
bag_n_estimators = 60
bag_max_samples = 100
bag_max_depth = 6

#AdaBoost
ada_n_estimators = 60
ada_learning_rate = 0.5
ada_bag_max_depth = 6

#GB
gb_n_estimators = 60
gb_learning_rate = 0.5

# Print results for each classifier in part 1 to 4 decimal places:


print("LR average cross-validation accuracy: ","{:.4f}".format(logregClassifier(XNormalised, y))) # using function created above
print("NB average cross-validation accuracy: ","{:.4f}".format(nbClassifier(XNormalised, y))) # using function created above
print("DT average cross-validation accuracy: ","{:.4f}".format(dtClassifier(XNormalised, y))) # using function created above
print("Bagging average cross-validation accuracy: ","{:.4f}".format(bagClassifier(XNormalised, y, 60, 100, 6))) # using function created above
print("AdaBoost average cross-validation accuracy: ","{:.4f}".format(adaBClassifier(XNormalised, y, 60, 0.5, 6))) # using function created above
print("GB average cross-validation accuracy: ","{:.4f}".format(gbClassifier(XNormalised, y, 60, 0.5))) # using function created above

LR average cross-validation accuracy: 0.9642


NB average cross-validation accuracy: 0.9585
DT average cross-validation accuracy: 0.9385
Bagging average cross-validation accuracy: 0.9571
AdaBoost average cross-validation accuracy: 0.9542
GB average cross-validation accuracy: 0.9571

Part 2: Cross-validation with parameter tuning


In [36]: # defining KNN function with parameter tuning and cross-validation

parameters= {'n_neighbors': [1, 3, 5, 7, 9], 'p': [1, 2]}

def bestKNNClassifier(X, y, S):

# X for normalised data set and y for class


# S=1 for calling KNN best k from function
# S=2 for calling KNN best p from function
# S=3 for calling KNN cross-validation from function
# S=4 for calling KNN test accuracy from function

X_train, X_test, y_train, y_test = train_test_split(


X, y, stratify=y, random_state=0)

grid= GridSearchCV(KNeighborsClassifier(), parameters, cv=cvKFold,return_train_score=True)

grid.fit(X_train, y_train)
BestParams=grid.best_params_

if S==1:
return print(BestParams['n_neighbors']) # for KNN best k
elif S==2:
return print(BestParams['p']) # for KNN best p
elif S==3:
return print("{:.4f}".format(grid.best_score_)) # for KNN cross-validation accuracy
elif S==4:
return print("{:.4f}".format(grid.score(X_test, y_test))) # for KNN Test set accuracy
else:
return (print("please input S=1-4 and try again"))

In [37]: # finding best k using function


bestKNNClassifier(XNormalised,y,1)

In [38]: # finding best p using function


bestKNNClassifier(XNormalised,y,2)

In [39]: # finding KNN cross-validation accuracy using function


bestKNNClassifier(XNormalised,y,3)

0.9695

In [40]: # finding KNN Test set accuracy using function


bestKNNClassifier(XNormalised,y,4)

0.9543
In [ ]:

In [41]: # creating and applying SVM Classifier to dataset

svmR = SVC(kernel="rbf") #using rbf kernel to create SVM classifier


svmR.fit(X_train, y_train)
PredictionSVMR= svmR.predict(X_test)
print("SVM(rbf) - test set accuracy:", "{:.4f}".format(accuracy_score(y_test, PredictionSVMR)))

svmL = SVC(kernel="linear") #using linear kernel to create SVM classifier


svmL.fit(X_train, y_train)
PredictionSVML= svmL.predict(X_test)
print("SVM(linear) - test set accuracy: ","{:.4f}".format(accuracy_score(y_test, PredictionSVML)))

SVM(rbf) - test set accuracy: 0.9657


SVM(linear) - test set accuracy: 0.9714

In [42]: # defining SVM function with parameter tuning and cross-validation

SVMgrid = {'C': [0.01, 0.1, 1, 5, 15],'gamma': [0.01, 0.1, 1, 10, 50]}

def bestSVMClassifier(X, y, S):

# X for normalised data set and y for class


# S=1 for calling SVM best parameter C
# S=2 for calling SVM best parameter Gamma
# S=3 for calling SVM cross-validation accuracy
# S=4 for calling SVM test set accuracy

X_train, X_test, y_train, y_test = train_test_split(


X, y, stratify=y, random_state=0)

svm=SVC(kernel='rbf') # kernel set to rbf


SVM= GridSearchCV (svm, SVMgrid, cv=cvKFold,return_train_score=True)

SVM.fit(X_train, y_train)
BestParams=SVM.best_params_

if S==1:
return print(BestParams['C']) # SVM best C
elif S==2:
return print(BestParams['gamma']) # SVM best Gamma
elif S==3:
return print("{:.4f}".format(SVM.best_score_)) # SVM cross-validation accuracy
elif S==4:
return print("{:.4f}".format(SVM.score(X_test, y_test))) # SVM test set accuracy
else:
return (print("please input S=1-4 and try again"))
In [43]: # Finding SVM best C using function
bestSVMClassifier(XNormalised, y,1)

In [44]: # Finding SVM best Gamma using function


bestSVMClassifier(XNormalised, y,2)

0.1

In [45]: # Finding SVM cross-validation accuracy using function


bestSVMClassifier(XNormalised, y,3)

0.9676

In [46]: # Finding SVM test set accuracy using function


bestSVMClassifier(XNormalised, y,4)

0.9714

In [ ]:

In [47]: # creating and applying Random Forest Classifier to dataset

RF= RandomForestClassifier(criterion='entropy', n_estimators=50, max_leaf_nodes=12, random_state=0)

RF.fit(X_train, y_train)
PredictionRF = RF.predict(X_test)

print("Random forest test set accuracy: ","{:.4f}".format(accuracy_score(y_test, PredictionRF)))

Random forest test set accuracy: 0.9714


In [48]: # Defining Random Forest function with parameter tuning and cross-validation

RFparameters={'n_estimators': [10, 30, 60, 100, 150], 'max_leaf_nodes':[6,12,18]}

def bestRFClassifier(X, y, S):

# X for normalised data set and y for class


# S=1 for calling RF best n_estimators
# S=2 for calling RF best max_leaf_nodes
# S=3 for calling RF cross-validation accuracy
# S=4 for calling RF Test set accuracy

X_train, X_test, y_train, y_test = train_test_split(


X, y, stratify=y, random_state=0)

rf = RandomForestClassifier(criterion='entropy', max_features='sqrt',random_state=0) #information gain and max_features set to 'sqrt'


RF= GridSearchCV(rf, RFparameters, cv=cvKFold, return_train_score=True)

RF.fit(X_train,y_train)
BestParams=RF.best_params_

actual = y_test
predicted = RF.predict(X_test)

if S==1:
return print(BestParams['n_estimators']) #RF best n_estimators
elif S==2:
return print(BestParams['max_leaf_nodes']) #RF best max_leaf_nodes
elif S==3:
return print("{:.4f}".format(RF.best_score_)) #RF cross-validation accuracy
elif S==4:
return print("{:.4f}".format(RF.score(X_test, y_test))) #RF Test set accuracy
elif S==5:
return print("{:.4f}".format(f1_score(actual, predicted, average='macro'))) #RF test set macro average F1
elif S==6:
return print("{:.4f}".format(f1_score(actual, predicted, average='weighted'))) #RF test set weighted average F1
else:
return (print("please input S=1-4 and try again"))

In [49]: # finding RF best n_estimators using fucntion


bestRFClassifier(XNormalised, y,1)

150

In [50]: # finding RF best max_leaf_nodes using fucntion


bestRFClassifier(XNormalised, y,2)

6
In [51]: # finding RF cross-validation accuracy using fucntion
bestRFClassifier(XNormalised, y,3)

0.9675

In [52]: # finding RF Test set accuracy using fucntion


bestRFClassifier(XNormalised, y,4)

0.9657

In [53]: # finding RF test set macro average F1 score using fucntion


bestRFClassifier(XNormalised, y,5)

0.9628

In [54]: # finding RF test set weighted average F1 score using fucntion


bestRFClassifier(XNormalised, y,6)

0.9661

Part 2: Results
In [55]:
# printing the results using fucntions defined above

print("KNN best k: ", end="")


bestKNNClassifier(XNormalised,y,1) # calling KNN best k from the function

print("KNN best p: ", end="")


bestKNNClassifier(XNormalised,y,2) # calling KNN best p from the function

print("KNN cross-validation accuracy: ", end='')


bestKNNClassifier(XNormalised,y,3) # calling KNN cross-validation the from function

print("KNN test set accuracy: ",end='')


bestKNNClassifier(XNormalised,y,4) # calling KNN test accuracy from the function
print()

print("SVM best C: ",end='')


bestSVMClassifier(XNormalised, y,1) # calling SVM best C from the function

print("SVM best gamma: ",end='')


bestSVMClassifier(XNormalised, y,2) # calling SVM best gamma from the function

print("SVM cross-validation accuracy: ",end='')


bestSVMClassifier(XNormalised, y,3) # calling SVM cross-validation accuracy from the function

print("SVM test set accuracy: ",end='')


bestSVMClassifier(XNormalised, y,4) # calling SVM test set accuracy from the function

print()

print("RF best n_estimators: ",end='')


bestRFClassifier(XNormalised, y,1) # calling RF best n_estimators from the function

print("RF best max_leaf_nodes: ",end='')


bestRFClassifier(XNormalised, y,2) # calling RF best max leaf nodes from the function

print("RF cross-validation accuracy: ",end='')


bestRFClassifier(XNormalised, y,3) # calling RF cross-validation accuracy from the function

print("RF test set accuracy: ",end='')


bestRFClassifier(XNormalised, y,4) # calling RF test accuracy from the function

print("RF test set macro average F1: ",end='')


bestRFClassifier(XNormalised, y,5) # calling macro F1 score from the function

print("RF test set weighted average F1: ",end='')


bestRFClassifier(XNormalised, y,6) # calling weighted F1 score from the function

KNN best k: 3
KNN best p: 1
KNN cross-validation accuracy: 0.9695
KNN test set accuracy: 0.9543

SVM best C: 5
SVM best gamma: 0.1
SVM cross-validation accuracy: 0.9676
SVM test set accuracy: 0.9714

RF best n_estimators: 150


RF best max_leaf_nodes: 6
RF cross-validation accuracy: 0.9675
RF test set accuracy: 0.9657
RF test set macro average F1: 0.9628
RF test set weighted average F1: 0.9661

In [ ]:

You might also like