ML Assignment 7

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 4

Name- Sandeep kaleshriya

Scholar No: - 201112040


CSE - 1
Machine Learning Assignment-7
Q) Find the optimal accuracy of SVM classifier with ‘RBF’ kernal function on PIMA Dataset.find
5 accuracy for given 5-fold dataset and find average accuracy and find best C (Regularization
parameter) and Sigma (Kernal Width) for the 5-fold Dataset with best Accuracy.

Code →

"""#LIBSVM"""
!pip install libsvm-official
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from libsvm.svmutil import *
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

p1train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-1tra.csv")
p1test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-1tst.csv")
p2train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-2tra.csv")
p2test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-2tst.csv")
p3train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-3tra.csv")
p3test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-3tst.csv")
p4train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-4tra.csv")
p4test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-4tst.csv")
p5train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-5tra.csv")
p5test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-5tst.csv")

print(len(p1train))
p1train.head()
"""##Regularization Parameter varies from 2^-18 , 2^-16 , .... 2^15
##Kernal Wdth(Sigma) varies from 2^-18 , 2^-16 , .... 2^20
"""

regular = []
kwidth=[]
x=-18
x2=1
for i in range(x,x2,2):
regular.append(pow(2,i))
kwidth.append(pow(2,i))
x=1
x2=16
for i in range(x,x2,1):
regular.append(pow(2,i))
x=1
x2=21
for i in range(x,x2,1):
kwidth.append(pow(2,i))

def Preprocess(ptrain, ptest):


from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
ptrain['target']=le.fit_transform(ptrain.target)
ptest['target']=le.fit_transform(ptest.target)

X_train = ptrain.drop('target', axis=1)


X_test =ptest.drop('target', axis=1)
y_train =ptrain.target
y_test =ptest.target

from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

y_test = y_test.values.tolist()
y_train = y_train.values.tolist()

X_tr=[]
X_ts=[]

for l in X_test:
X_ts.append(l.tolist())
for l in X_train:
X_tr.append(l.tolist())
X_train = X_tr
X_test = X_ts

return X_train,X_test,y_train,y_test

"""#We are using Already Made 5 cross-fold Pima Database """

X_train1,X_test1,y_train1,y_test1 = Preprocess(p1train,p1test)
X_train2,X_test2,y_train2,y_test2 = Preprocess(p2train,p2test)
X_train3,X_test3,y_train3,y_test3 = Preprocess(p3train,p3test)
X_train4,X_test4,y_train4,y_test4 = Preprocess(p4train,p4test)
X_train5,X_test5,y_train5,y_test5 = Preprocess(p5train,p5test)

a,rp,kw=0,0,0
results=[]
for x in regular:
for y in kwidth:
mean_acc=[]
model = svm_train(y_train1, X_train1, f'-c {x} -t 2 -g {y}')
p_label, p_acc, p_val = svm_predict(y_test1, X_test1, model)
mean_acc.append(p_acc[0])
model = svm_train(y_train2, X_train2, f'-c {x} -t 2 -g {y}')
p_label, p_acc, p_val = svm_predict(y_test2, X_test2, model)
mean_acc.append(p_acc[0])
model = svm_train(y_train3, X_train3, f'-c {x} -t 2 -g {y}')
p_label, p_acc, p_val = svm_predict(y_test3, X_test3, model)
mean_acc.append(p_acc[0])
model = svm_train(y_train4, X_train4, f'-c {x} -t 2 -g {y}')
p_label, p_acc, p_val = svm_predict(y_test4, X_test4, model)
mean_acc.append(p_acc[0])
model = svm_train(y_train5, X_train5, f'-c {x} -t 2 -g {y}')
p_label, p_acc, p_val = svm_predict(y_test5, X_test5, model)
mean_acc.append(p_acc[0])
mean_acc = np.array(mean_acc)
ma = np.mean(mean_acc)
if(a<ma):
a = ma
rp=x
kw=y
results.append(a)
results.append(rp)
results.append(kw)

print(f"The Best Average Accuracy for Pima Dataset with 5 fold cross validation is :
{round(results[0],2)} %")
print(f"The Best Regularization Parameter for Pima Dataset is : {results[1]} ")
print(f"The Best Kernal Width(Sigma) for Pima Dataset is : {round(results[2],10)} ")
The Best Average Accuracy Comes out to be 77.73% on all 5 datasets.

The Best C comes out to be 256 .

The Best Sigma Comes out to be 0.0009765625.

You might also like