Download as pdf
Download as pdf
You are on page 1of 8
Importing Libraries In [1]: import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import cross_val_score from sklearn.model_selection import StratifiedkFold from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC Loading the dataset In [3]: url=" https: //raw.githubusercontent..com/jbrounlee/Datasets/master/iris.csv' names = ['sepal-length', ‘sepal-width', ‘petal-length', ‘petal-width', ‘class‘] dataFrame = pd.read_csv(url, names=names) dataFrame.head() out[3]: sepal-length sepal-width petalength petal-width class ° 54 35 14 0.2 liesetosa 1 49 30 14 02 bissetosa 2 ar 32 13 02 bissetosa 3 46 34 15 02 bis-setosa ‘4 50 36 14 02 bissetosa ‘Summarizing the Dataset In [4]: #dimension of dataset dataFrame. shape out[4]: (158, 5) In [5] #display stats about data dataFrane.describe() out[5]: count std 28% 50% 75% sep Jength 150.000000 5.843933 0.828066 4.300000 5.100000 5.200000 6.400000 7.900000 In [6]: sepal-width petaltength petal. 150.000000 3054000 0433508 2.000000 2.800000 3.000000 3.300000 4.400000 h 150,000000 150,000000 3.758667 1.198667 1.764420 0.763161 1.000000 0.100000 4.600000 0.300000 4.350000 1.300000 5.100000 7.800000 6.900000 2.500000 #to display basic info about datatype dataFrame. info() RangeIndex: 15@ entries, @ to 149 Data columns (total 5 columns): memory usage: In [7] Column sepal-length sepal-width petal-length petal-width class Non-Null Count 150 150 158 150 150 6.0% KB non-null non-null non-null non-null non-null dtypes: floate4(4), object(1) Dtype Floates floated Floatea floates object ##to display no of samples for each class dataFrame[ ‘class’ ].value_counts() out[7]: Inis-setosa Iris-versicolor Iris-virginica Nam 50 50 50 : class, dtype: intea In [8]: wtcheck for null values dataFrame.isnul1().sum() out[a]: sepal-length sepal-width petal-length petal-width class dtype: intea eo000 Data Visualization In [9]: ttbox and whisker plots dataframe.plot(kind='box', subplots=True, layout=(2,2), sharex-False, sharey-False) pit. show() 8 e fc 3 5 T L 2 sepal-iength sepal-width ‘ : | 4 1 2 = ° es petablength petal-width In [24]: #histograns dataFrame[' sepal-length’ ]-hist() plt.show() 45 50 55 60 65 70 75 80 In [11]: dataFrame[ ‘ sepal-width'].hist() plt.show() Bue oe we 20 25 30 35 40 4s In [12]: dataFrane['petal-Length'].hist() plt.show() In [13]: dataFrame[ ‘petal -width'].hist() plt.show() Sue Rees 2 os 10 as 20 25 In [14]: #Scatter matrix pd.plotting.scatter_matrix(dataFrame) plt.show() petatmatP2!2°e pa yitnpatenath Sepaklengtn ~ sepal-width — (Ss igityy _petabwicth In [16]: #heatmap print("Checking the correlation : ") corr = dataFrame.corr() fig, ax = plt.subplots(figsize =(5,4)) sns.heatmap(corr, annot=True, ax=ax) plt.show() Checking the correlation : -10 sepallength- 2 as 06 sepal-width 04 02 petal-lenoth 00 petalmith 09 1 Br) 1 : 4 $ 8 : 3 & a g Splitting the data In [17]: (= dataFrame.drop(columns=[‘class']) dataFrame[ class" ] x train, x_test, y_train, y test = train_test_split(X,Y,test_size = 0.20,random_state=1) Building Models In [18]: models = [] models.append(('LR', LogisticRegression(solver='liblinear', multi_clas: models.append(("KNN', KNeighborsClassifier())) models. append(('CART', DecisionTreeClassifier())) models.append(('NS', GaussianNB())) models. append(('SVM', SVC(gamma='auto’))) # evaluate each model in turn results = [] names = [] for name, model in models: kfold = StratifiedkFold(n_splits=10, randon_state=1, shuffle=True) cv_results = cross_val_score(model, x_train, y train, cvskfold, scoring='accuracy’) results. append(cv_results) names .append(name) print('%s: %F (%F)' % (name, cv_results.mean(), cv_results.std())) ovr’) LR: @.941667 (0.065085) KNN: @.958333 (0.041667) CART: 0.941667 (0.053359) NB: 8.950000 (2.055277) SVM: 0.983333 (2.033333) In [19]: comparing algorithns and select best model. plt.boxplot(results, labels=names) plt.title( ‘Algorithm Comparison’) pit. show() Algorithm Comparison 1000 0975 0950 0925 0900 0875 0850 0825 ry KNN CART NB uM Make Predictions In [21]: model = SVC(gamma=' auto" ) model.fit(x_train, y_train) predictions = model.predict(x_test) In [22]: #evaluate predictions print(#'Test Accuracy: {accuracy_score(y_test, predictions)}') print(f'Classification Report: \n {classification_report(y test, predictions)}') Test Accuracy: 0.9666666666666667 Classification Report: precision recall f1-score support Inis-setosa 1.00 1.00 1.00 11 Iris-versicolor 1.00 8.92 0.96 13, Inis-virginica 0.86 1.08 @.92 6 accuracy 0.97 30 macro avg 2.95 0.97 0.96 38 weighted avg 0.97 0.97 0.97 30

You might also like