Final Project Report1

Final Report
Paper Title: Feature Extraction and Classification of Greater Bean Leaf disease
Course Code: CSE323

Course Title: Data Mining & Machine learning
Submitted by:
Name: Abdullah Al Rafi
Id: 212-15-4218
Section: 60_B
Submitted to:
Dr. Md. Taimur Ahad
Associate Professor and Associate Head
Department of Computer Science and Engineering
Dataset:
Dataset link:
https://www.kaggle.com/datasets/sivm205/Greaterbean-diseased-leaf-dataset
https://www.kaggle.com/code/lorresprz/leaf-diseases-classification-inceptionv3-network
Description:
The dataset comprises images of diseased Greater Bean leaves, showcasing 10 distinct
Greater Bean ailments such as bacterial blight, brown spot, frog eye, rust, and sudden death
syndrome. These images are of excellent quality and meticulously annotated, rendering
them optimal for training machine-learning models aimed at classifying plant diseases.There
are different types of Classes like Bacterial blight ,Brown spot, Crestamento ,Ferrugen
,Mossaic Virus ,Powdery mildew ,Septoria ,Southern blight ,Sudden Death
Syndrone,Yellow Mosaic
Usage:
The Greater bean Diseased Plant Leaf Images Dataset is primarily valuable for developing and testing
models to identify Greaterbean diseases. It also serves multiple research purposes, such as estimating
the prevalence of various diseases on Greaterbean plants and examining the impact of environmental
factors on leaf health. Furthermore, it holds educational significance by providing a valuable learning
platform for both machine learning and plant pathology. Additionally, it highlights agricultural
applications like disease detection and decision-making, contributing to early disease detection in
Greaterbean cultivation.
Importance:
The Greater Bean Afflicted Plant Leaf Images Dataset holds significant value within the agricultural
technology sphere. It possesses the capability to revolutionize the control of Greaterbean diseases by
facilitating the creation of precise and effective detection tools, thereby increasing productivity and
minimizing losses experienced by farmers globally. This dataset plays a crucial role in advancing
sophisticated methods within precision agriculture, aiding farmers in the early detection of
Greaterbean crop diseases. Its impact extends beyond research facilities, fostering substantial
improvements in agricultural efficiency and sustainability on a global scale.
Code:
import time
st=time.time()
import numpy as np
import pandas as pd
import os import
cv2 as cv
import matplotlib.pyplot as plt
import seaborn as sns import
tensorflow as tf
gpu_available = tf.config.list_physical_devices('GPU') if gpu_available:

print("GPU(s) available:") for gpu
in gpu_available: print("gpu")
print(gpu)
with tf.device('/device:GPU:0'): # Code to
be executed on GPU pass #
Placeholder for the actual code
else: print("No GPU(s) available, using CPU.")
BB = "./images/bacterial_blight" BS =
"./images/brown_spot"
CR = "./images/crestamento" FE =
"./images/ferrugen"
MV = "./images/Mossaic Virus" PM =
"./images/powdery_mildew" SE =
"./images/septoria"
SB = "./images/Southern blight"
SDS = "./images/Sudden Death Syndrone" YM =
"./images/Yellow Mosaic"
for i in os.listdir ('./images'): print(i,len(os.listdir('./images/'+i)))
def get_path_image(folder): image_paths = []

image_fnames = os.listdir(folder) for img_id in range(len(image_fnames)):
img = os.path.join(folder,image_fnames[img_id])
image_paths.append(img) return image_paths img_data = []
for i in [BB,BS,CR,FE,MV,PM,SE,SB,SDS,YM]:
paths = get_path_image(i)
img_data.extend(paths)
print(len(img_data))
data = {"img_data":img_data,
"labels":[np.nan for x in range(len(img_data))]} data =
pd.DataFrame(data)
data["labels"][0:88] = 0 # bacterial_blight data["labels"][89:169] = 1 #

brown_spot data["labels"][170:174] = 2 # crestamento
data["labels"][175:240] = 3 # ferrugen data["labels"][241:262] = 4 #
Mossaic Virus data["labels"][263:400] = 5 # powdery_mildew
data["labels"][401:422] = 6 # septoria data["labels"][423:484] = 7 #
Southern blight data["labels"][485:594] = 8 # Sudden Death Syndrone
data["labels"][595:704] = 9 # Yellow Mosaic data
data["labels"] = data["labels"].astype("float64")
image = cv.imread(data["img_data"][500]) plt.imshow(image)

plt.title("Sample image before cropping") plt.show()
img_list = []
for i in range(len(img_data)): img_path = data["img_data"][i] # Get the image path

print("Image path:", img_path) # Print the image path for
debugging image =
cv.imread(img_path)
if image is None: print("Failed

to load image:", img_path)
continue # Skip to the next iteration if image loading fails
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

thresh = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV +cv.THRESH_OTSU)[1]
result = cv.bitwise_and(image, image, mask=thresh) result[thresh==0] = [255,255,255]

(x, y, z_) = np.where(result > 0) mnx =
(np.min(x)) mxx = (np.max(x)) mny = (np.min(y)) mxy =
(np.max(y)) crop_img = image[mnx:mxx,mny:mxy,:]
crop_img_r = cv.resize(crop_img, (320,240))
img_list.append(crop_img_r)
plt.imshow(img_list[500]) plt.title("Sample image after

cropping") plt.show()
import tensorflow
from keras import applications, models, preprocessing
from tensorflow.keras.applications import ResNet50, ResNet101

from keras.applications.vgg19 import VGG19 from
tensorflow.keras.applications import InceptionV3 from
tensorflow.keras.applications import DenseNet121 from
tensorflow.keras.applications import MobileNetV2 #from transformers
import ViTImageProcessor, ViTModel from
tensorflow.keras.preprocessing import image from tensorflow.keras.models
import load_model
from tensorflow.keras.applications.resnet50 import preprocess_input
def feature_extract(model):
if model == "VGG19": model = VGG19(weights='imagenet',include_top=False,
pooling="avg")
elif model == "ResNet50": model =
ResNet50(weights='imagenet',include_top=False,pooling="avg") elif
model == "ResNet101": model =
ResNet101(weights='imagenet',include_top=False,pooling="avg") elif model
== "InceptionV3": model = InceptionV3(weights='imagenet',
include_top=False, pooling="avg") elif model == "DenseNet121": model =
DenseNet121(weights='imagenet',
include_top=False, pooling="avg") elif model == "MobileNetV2": model =
MobileNetV2(weights='imagenet',
include_top=False, pooling="avg") else:
raise ValueError("Unsupported model name: " + model) return
model
model = feature_extract("DenseNet121") # or "VGG19",

"ResNet101","InceptionV3","DenseNet121","MobileNetV2"
from PIL import Image
features_list = [] for i in
range(len(img_list)):
image = img_list[i].reshape(-1, 320, 240, 3) image =

preprocess_input(image)
"""
# Reshaping when VGG19 model is selected features =
model.predict(image).reshape(512,) """
#Reshaping when ResNet50 or ResNet101 model or InceptionV3 is selected 2048

#for vgg19 shape is 512 #DenseNet121 shape
is 1024 #MobileNetV2 shape is
1280
features = model.predict(image).reshape(1024,) '''#Only for ViT

image = img_list[i] image = preprocess_input(image)
image = (image - image.min()) / (image.max() - image.min()) # Rescale to the range [0,
1]
# Convert to PIL image
image = (image * 255).astype(np.uint8) image =
Image.fromarray(image) inputs = processor(images=image,
return_tensors="pt") outputs =
model(**inputs)
features = outputs.last_hidden_state
features = features.squeeze(0) # Remove batch dimension'''
features_list.append(features) features_df = pd.DataFrame(features_list) features_df["labels"]
= data["labels"]
x = features_df.drop(['labels'], axis = 1) y =
features_df.loc[:,"labels"].values
print(f"Number of features before feature selection: {x.shape[1]}") from sklearn.impute
import SimpleImputer
# Reshape y if needed y_reshaped = y.reshape(-1,

1)
# Impute NaN values with the mean of the column imputer =

SimpleImputer(strategy='mean') y_imputed =
imputer.fit_transform(y_reshaped)
# Round the imputed values to integers y_imputed_rounded =

y_imputed.round().astype(int) # y_imputed_rounded
y_imputed_reshaped = y_imputed_rounded.ravel() y=y_imputed_reshaped
ets=time.time() et=ets-st
print(f"Execution time:{et}s")
from sklearn.preprocessing import MinMaxScaler scaler =
MinMaxScaler()
scaler.fit(x)
x_ = scaler.transform(x) x_ =
pd.DataFrame(x_)
from sklearn.feature_selection import SelectKBest from

sklearn.feature_selection import f_classif
def anova_fs():
selector = SelectKBest(f_classif, k=500) # k is number of features selector.fit(x_, y)
cols = selector.get_support(indices=True) anova_x = x_[cols]

return anova_x
from sklearn.feature_selection import RFE

from sklearn.ensemble import RandomForestClassifier
def RFE_fs():
rfe_selector = RFE(estimator=RandomForestClassifier(), n_features_to_select=200, step=0.1)
rfe_selector.fit(x_, y)
rfe_support = rfe_selector.support_ rfe_feature =

x_.columns[rfe_support]
rfe_x = x_[rfe_feature] return

rfe_x
from sklearn.feature_selection import SelectFromModel from

sklearn.ensemble import RandomForestClassifier
def rf_fs():
embeded_rf_selector = SelectFromModel(RandomForestClassifier(n_estimators=200,
random_state=5), threshold='1.25*median') embeded_rf_selector.fit(x_,
y)
embeded_rf_support = embeded_rf_selector.get_support()
embeded_rf_feature = x_.loc[:,embeded_rf_support].columns.tolist()
rf_x = x_[embeded_rf_feature] return rf_x
from sklearn.linear_model import LassoCV def ls_fs():

lasso_selector = LassoCV(max_iter=5000) lasso_selector.fit(x_, y)
selected_features = x_.columns[lasso_selector.coef_ != 0] ls_X =
x_[selected_features]
return ls_X from
sklearn.decomposition import PCA def
pca_fs():
pca = PCA(n_components=500) X_pca = pca.fit_transform(x_)
return X_pca
fs_x = rf_fs() # feature selection methods "rf_fs", "anova_fs", "RFE_fs","pca_fs(x, n_components=10)

","ls_fs" print(f"Number of features after feature selection:
{fs_x.shape[1]}") from
sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(fs_x, y, test_size
= 0.2, random_state = 42)
from sklearn.model_selection import cross_val_score,cross_val_predict from

sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier, VotingClassifier from
sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier from sklearn import
svm
from sklearn.metrics import confusion_matrix from
sklearn.metrics import
f1_score,precision_score,recall_score,accuracy_score from
sklearn.model_selection import GridSearchCV
import numpy as np
# Convert x_train and x_test to NumPy arrays if they are not already x_train
= np.array(x_train)
x_test = np.array(x_test)
# Now, proceed with your K-nearest neighbors code neig

= np.arange(1, 25) train_accuracy = []
test_accuracy = []
for i, k in enumerate(neig):
knn = KNeighborsClassifier(n_neighbors=k) knn.fit(x_train, y_train)
prediction_ = knn.predict(x_test) train_accuracy.append(knn.score(x_train, y_train))
test_accuracy.append(knn.score(x_test, y_test))
print("Best accuracy is {} with K = {}".format(np.max(test_accuracy),
1 + test_accuracy.index(np.max(test_accuracy))))
knn = KNeighborsClassifier(n_neighbors=17)
knn.fit(x_train,y_train) predicted = knn.predict(x_test) score =
knn.score(x_test, y_test) knn_score_ =
np.mean(score)
print('Accuracy : %.3f' % (knn_score_))
p=precision_score(y_test, predicted, average='weighted') print('Precision : %.3f' % (p))
r=recall_score(y_test, predicted, average='weighted') print('Recall : %.3f' %

(r))
f1=f1_score(y_test, predicted, average='weighted') print('F1-score:

%.3f' % (f1))
f1_w=f1_score(y_test, predicted, average='weighted') print('Weighted

f1score: %.3f' % (f1_w))
cf_matrix = confusion_matrix(y_test, predicted) sns.heatmap(cf_matrix,

cmap="PuBu", annot=True, fmt='.0f') plt.show()
param_grid_svm = {'C': [0.1, 1, 10, 100], #1000
'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
'kernel': ['rbf', 'poly']}
SVM_grid = GridSearchCV(svm.SVC(), param_grid_svm, cv=5) SVM_grid.fit(x_train,

y_train) svm_clf = svm.SVC(C=100, gamma=0.01, kernel='rbf') svm_clf.fit(x_train,y_train)
predicted = svm_clf.predict(x_test) score = svm_clf.score(x_test, y_test) svm_score_ =
np.mean(score)
print('Accuracy : %.3f' % (svm_score_))
p=precision_score(y_test, predicted, average='weighted') print('precision : %.3f' % (p))
r=recall_score(y_test, predicted, average='weighted') print('recall : %.3f' %

(r))
f1=f1_score(y_test, predicted, average='weighted') print('f1-score:

%.3f' % (f1))
f1_w=f1_score(y_test, predicted, average='weighted') print('weighted
f1score: %.3f' % (f1_w))

param_grid_rf = { 'n_estimators': [200,

500],
'max_depth' : [4,5,6,7,8]}
RF_grid = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_grid_rf, cv= 5)

RF_grid.fit(x_train, y_train)
r_forest = RandomForestClassifier(500,max_depth=8, random_state=5)

r_forest.fit(x_train,y_train) predicted = r_forest.predict(x_test) score = r_forest.score(x_test,
y_test) rf_score_ = np.mean(score)
print('Accuracy : %.3f' % (rf_score_))

(r))
f1=f1_score(y_test, predicted, average='weighted') print('f1-score:

%.3f' % (f1))

f1score: %.3f' % (f1_w))

nb_model = GaussianNB()
nb_model.fit(x_train,y_train) predicted =
nb_model.predict(x_test) score =
nb_model.score(x_test, y_test)
print('Accuracy : %.3f' % (score))
(r)) f1=f1_score(y_test, predicted, average='weighted') print('f1-
score:
%.3f' % (f1))
f1score: %.3f' % (f1_w))
cf_matrix = confusion_matrix(y_test, predicted)

sns.heatmap(cf_matrix, cmap="PuBu", annot=True, fmt='.0f') plt.show()
ets=time.time() et=ets-st
print(f"Execution time:{et}s")
Overview of Image Processing and Model Performance:
DenseNET121:
KNN SVM:
Random Forest: Naïve Bayes:
Model Name Accuracy Precision recall f1-score
KNN 86% 0.865 0.854 0.850

SVM 91% 0.910 0.902 0.901
Random Forest Classifier 90.9% 0.900 0.908 0..902

Naive Bayes 84.6% 0.858 0.846 0.844
Execution Time: 621.824086099243s

InceptionV3
KNN SVM
Random forest: Naïve Bayes:
KNN 83.3% 0.801 0.865 0.801

SVM 86.7% 0.870 0.861 0.872
Random Forest Classifier 85.9% 0.833 0.855 0.845

Naive Bayes 82.4% 0.816 0.824 0.815

ResNet101
Knn Svm
Random forest Naïve Bayes:
KNN 89.2% 0.864 0.882 0.862
SVM 91.3% 0.937 0.931 0.912
Naive Bayes 81.4% 0.829 0.814 0.816

ResNet50
KNN SVM
KNN 89.6% 0.881 0.889 0.887

SVM 91.6% 0.926 0.916 0.918

Naive Bayes 85.7% 0.852 0.857 0.852

MobileNetV2
KNN SVM
KNN 86.4% 0.871 0.864 0.856

SVM 87.1% 0.870 0.871 0.868
RandomForestClassifier 91.4% 0.908 0.914 0.910

Naive Bayes 85.7% 0.881 0.857 0.861

VGG19
KNN Svm
Random forest
Naïve Bayes:
KNN 78.9% 0.792 0.799 0.783

SVM 90.2% 0.907 0.902 0.901

Naive Bayes 81.0% 0.853 0.802 0.806

Model Name:
This system detects infections in Greaterbeans through a detailed process. It begins by loading
images, then converts them to grayscale before applying a threshold value for cropping.
DenseNet121 extracts features, which are then refined with RandomForestClassifier. The dataset
is split into training and test sets. Various classifiers like KNN, SVM, Random Forest, and Naïve Bayes
are utilized and assessed based on accuracy, precision, recall, and F1-score. Confusion matrices provide
visual evaluation of predictor classifiers. Execution time is recorded, and the model's design is adaptable
to incorporate other architectures like ResNet50, ResNet101, InceptionV3, MobileNetV2, or VGG19 for
feature extraction. Accuracy:
The total number of instances in the record multiplied by the percentage shown by model out of
the total and rounded off to two decimal places. It also gives one form of an overall measure of
how well the model is performing.
Precision:
Precision is a ratio of accurate positive cases to all the positive cases the model has
predicted to be positive. It means how precise the model is in predicting positive class
samples.
Recall:
Recall, which is sometimes referred to as sensitivity or true positive rate, determines the true
positive fraction calculated as all actual positive results acknowledged by the model. It shows
the rate of detecting all cases of an illness with a certain group of people to another one
according to the model.
F1-Score:
Precision and recall both have strengths and weakness; their average is F1 score,
which harmonises the two metrics. It is especially useful when high retrieval accuracy
is required while at the same time maximizing recall.
Analysis
For the assessment of the performance of the different architectures (DenseNet121, InceptionV3, MobileNetV2,
ResNet101, ResNet50, and VGG19) for the identification of Greaterbean diseases, the evaluation metrics such as
accuracy, precision, recall, and F1 score from various classifiers namely KNN, SVM, Random Forest Classifier,
and Naïve Bayes, are used.
Best Models: Random Forest Classifier consistently exhibits robust performance across most architectures, particularly
DenseNet121, MobileNetV2, and ResNet101.
SVM: Demonstrates outstanding performance, especially with ResNet101, ResNet50, and VGG19 architectures.
KNN and Naive Bayes: Generally perform adequately but are surpassed by SVM and Random Forest, with performance
variations observed across different architectures.
DenseNet121 and ResNet101: Offer the highest accuracy and metric balance when paired with Random Forest Classifier and
SVM.
InceptionV3 and VGG19: Display diverse performance, with VGG19 exhibiting lower effectiveness when paired with KNN.
The report outlines the merits and limitations of each model regarding their proficiency in categorizing Greaterbean leaf
ailments. In general, Random Forest Classifier and SVM stand out as the leading classifiers, while DenseNet121 and
ResNet101 prove to be the most efficient feature extraction frameworks for classifying Greaterbean diseases.

Final Project Report1

Uploaded by

Copyright:

Available Formats

You might also like

Final Project Report1

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Final Project Report1

Uploaded by

Copyright:

Available Formats

Final Report

Course Code: CSE323

gpu_available = tf.config.list_physical_devices('GPU') if gpu_available:

for i in os.listdir ('./images'): print(i,len(os.listdir('./images/'+i)))

def get_path_image(folder): image_paths = []

data["labels"][0:88] = 0 # bacterial_blight data["labels"][89:169] = 1 #

image = cv.imread(data["img_data"][500]) plt.imshow(image)

for i in range(len(img_data)): img_path = data["img_data"][i] # Get the image path

if image is None: print("Failed

gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)

result = cv.bitwise_and(image, image, mask=thresh) result[thresh==0] = [255,255,255]

plt.imshow(img_list[500]) plt.title("Sample image after

from tensorflow.keras.applications import ResNet50, ResNet101

model = feature_extract("DenseNet121") # or "VGG19",

image = img_list[i].reshape(-1, 320, 240, 3) image =

#Reshaping when ResNet50 or ResNet101 model or InceptionV3 is selected 2048

features = model.predict(image).reshape(1024,) '''#Only for ViT

features_list.append(features) features_df = pd.DataFrame(features_list) features_df["labels"]

print(f"Number of features before feature selection: {x.shape[1]}") from sklearn.impute

# Reshape y if needed y_reshaped = y.reshape(-1,

# Impute NaN values with the mean of the column imputer =

# Round the imputed values to integers y_imputed_rounded =

y_imputed_reshaped = y_imputed_rounded.ravel() y=y_imputed_reshaped

from sklearn.feature_selection import SelectKBest from

cols = selector.get_support(indices=True) anova_x = x_[cols]

from sklearn.feature_selection import RFE

rfe_support = rfe_selector.support_ rfe_feature =

rfe_x = x_[rfe_feature] return

from sklearn.feature_selection import SelectFromModel from

rf_x = x_[embeded_rf_feature] return rf_x

from sklearn.linear_model import LassoCV def ls_fs():

return ls_X from

sklearn.decomposition import PCA def

fs_x = rf_fs() # feature selection methods "rf_fs", "anova_fs", "RFE_fs","pca_fs(x, n_components=10)

sklearn.model_selection import train_test_split

from sklearn.model_selection import cross_val_score,cross_val_predict from

# Now, proceed with your K-nearest neighbors code neig

print('Accuracy : %.3f' % (knn_score_))

p=precision_score(y_test, predicted, average='weighted') print('Precision : %.3f' % (p))

r=recall_score(y_test, predicted, average='weighted') print('Recall : %.3f' %

f1=f1_score(y_test, predicted, average='weighted') print('F1-score:

f1_w=f1_score(y_test, predicted, average='weighted') print('Weighted

cf_matrix = confusion_matrix(y_test, predicted) sns.heatmap(cf_matrix,

SVM_grid = GridSearchCV(svm.SVC(), param_grid_svm, cv=5) SVM_grid.fit(x_train,

print('Accuracy : %.3f' % (svm_score_))

p=precision_score(y_test, predicted, average='weighted') print('precision : %.3f' % (p))

r=recall_score(y_test, predicted, average='weighted') print('recall : %.3f' %

f1=f1_score(y_test, predicted, average='weighted') print('f1-score:

cf_matrix = confusion_matrix(y_test, predicted) sns.heatmap(cf_matrix,

param_grid_rf = { 'n_estimators': [200,

RF_grid = GridSearchCV(estimator=RandomForestClassifier(), param_grid=param_grid_rf, cv= 5)

r_forest = RandomForestClassifier(500,max_depth=8, random_state=5)

p=precision_score(y_test, predicted, average='weighted') print('precision : %.3f' % (p))

r=recall_score(y_test, predicted, average='weighted') print('recall : %.3f' %

f1=f1_score(y_test, predicted, average='weighted') print('f1-score:

f1_w=f1_score(y_test, predicted, average='weighted') print('weighted

cf_matrix = confusion_matrix(y_test, predicted) sns.heatmap(cf_matrix,

cf_matrix = confusion_matrix(y_test, predicted)

Model Name Accuracy Precision recall f1-score