Code+output (Major Project)

Importing Libraries In [1]: import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import cross_val_score from sklearn.model_selection import StratifiedkFold from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC Loading the dataset In [3]: url=" https: //raw.githubusercontent..com/jbrounlee/Datasets/master/iris.csv' names = ['sepal-length', ‘sepal-width', ‘petal-length', ‘petal-width', ‘class‘] dataFrame = pd.read_csv(url, names=names) dataFrame.head() out[3]: sepal-length sepal-width petalength petal-width class ° 54 35 14 0.2 liesetosa 1 49 30 14 02 bissetosa 2 ar 32 13 02 bissetosa 3 46 34 15 02 bis-setosa ‘4 50 36 14 02 bissetosa ‘Summarizing the Dataset In [4]: #dimension of dataset dataFrame. shape out[4]: (158, 5)In [5] #display stats about data dataFrane.describe() out[5]: count std 28% 50% 75% sep Jength 150.000000 5.843933 0.828066 4.300000 5.100000 5.200000 6.400000 7.900000 In [6]: sepal-width petaltength petal. 150.000000 3054000 0433508 2.000000 2.800000 3.000000 3.300000 4.400000 h 150,000000 150,000000 3.758667 1.198667 1.764420 0.763161 1.000000 0.100000 4.600000 0.300000 4.350000 1.300000 5.100000 7.800000 6.900000 2.500000 #to display basic info about datatype dataFrame. info() RangeIndex: 15@ entries, @ to 149 Data columns (total 5 columns): memory usage: In [7] Column sepal-length sepal-width petal-length petal-width class Non-Null Count 150 150 158 150 150 6.0% KB non-null non-null non-null non-null non-null dtypes: floate4(4), object(1) Dtype Floates floated Floatea floates object ##to display no of samples for each class dataFrame[ ‘class’ ].value_counts() out[7]: Inis-setosa Iris-versicolor Iris-virginica Nam 50 50 50 : class, dtype: inteaIn [8]: wtcheck for null values dataFrame.isnul1().sum() out[a]: sepal-length sepal-width petal-length petal-width class dtype: intea eo000 Data Visualization In [9]: ttbox and whisker plots dataframe.plot(kind='box', subplots=True, layout=(2,2), sharex-False, sharey-False) pit. show() 8 e fc 3 5 T L 2 sepal-iength sepal-width ‘ : | 4 1 2 = ° es petablength petal-widthIn [24]: #histograns dataFrame[' sepal-length’ ]-hist() plt.show() 45 50 55 60 65 70 75 80 In [11]: dataFrame[ ‘ sepal-width'].hist() plt.show() Bue oe we 20 25 30 35 40 4sIn [12]: dataFrane['petal-Length'].hist() plt.show() In [13]: dataFrame[ ‘petal -width'].hist() plt.show() Sue Rees 2 os 10 as 20 25In [14]: #Scatter matrix pd.plotting.scatter_matrix(dataFrame) plt.show() petatmatP2!2°e pa yitnpatenath Sepaklengtn ~ sepal-width — (Ss igityy _petabwicth In [16]: #heatmap print("Checking the correlation : ") corr = dataFrame.corr() fig, ax = plt.subplots(figsize =(5,4)) sns.heatmap(corr, annot=True, ax=ax) plt.show() Checking the correlation : -10 sepallength- 2 as 06 sepal-width 04 02 petal-lenoth 00 petalmith 09 1 Br) 1 : 4 $ 8 : 3 & a g Splitting the dataIn [17]: (= dataFrame.drop(columns=[‘class']) dataFrame[ class" ] x train, x_test, y_train, y test = train_test_split(X,Y,test_size = 0.20,random_state=1) Building Models In [18]: models = [] models.append(('LR', LogisticRegression(solver='liblinear', multi_clas: models.append(("KNN', KNeighborsClassifier())) models. append(('CART', DecisionTreeClassifier())) models.append(('NS', GaussianNB())) models. append(('SVM', SVC(gamma='auto’))) # evaluate each model in turn results = [] names = [] for name, model in models: kfold = StratifiedkFold(n_splits=10, randon_state=1, shuffle=True) cv_results = cross_val_score(model, x_train, y train, cvskfold, scoring='accuracy’) results. append(cv_results) names .append(name) print('%s: %F (%F)' % (name, cv_results.mean(), cv_results.std())) ovr’) LR: @.941667 (0.065085) KNN: @.958333 (0.041667) CART: 0.941667 (0.053359) NB: 8.950000 (2.055277) SVM: 0.983333 (2.033333) In [19]: comparing algorithns and select best model. plt.boxplot(results, labels=names) plt.title( ‘Algorithm Comparison’) pit. show() Algorithm Comparison 1000 0975 0950 0925 0900 0875 0850 0825 ry KNN CART NB uMMake Predictions In [21]: model = SVC(gamma=' auto" ) model.fit(x_train, y_train) predictions = model.predict(x_test) In [22]: #evaluate predictions print(#'Test Accuracy: {accuracy_score(y_test, predictions)}') print(f'Classification Report: \n {classification_report(y test, predictions)}') Test Accuracy: 0.9666666666666667 Classification Report: precision recall f1-score support Inis-setosa 1.00 1.00 1.00 11 Iris-versicolor 1.00 8.92 0.96 13, Inis-virginica 0.86 1.08 @.92 6 accuracy 0.97 30 macro avg 2.95 0.97 0.96 38 weighted avg 0.97 0.97 0.97 30

Code+output (Major Project)

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Code+output (Major Project)

Uploaded by

Copyright:

Available Formats

You might also like