Download as pdf or txt
Download as pdf or txt
You are on page 1of 35

Ex.

NO: 1
LINEAR REGRESSION
DATE:

AIM
To explore the linear regression model on the usa housing and uci and pima indians
diabetes data set.
ALGORITHM

STEP 1: Start the program

STEP 2: To download the any kind of data set like housing dataset using kaggle.

STEP 3: To read data from downloaded data set.

STEP 4: To find the linear and logistic regression model using the given data set.

STEP 5: Display the output.

STEP 6: Stop the program.

PROGRAM
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns sns.set_style('darkgrid') %matplotlib inline from

matplotlib.ticker

import FormatStrFormatter

import warnings

warnings.filterwarnings('ignore')

df = pd.read_csv('C:/Users/diabetes.csv')

df.head()
`1
df.shape

df.dtypes

df['Outcome']=df['Outcome'].astype('bool')

fig,axes = plt.subplots(nrows=3,ncols=2,dpi=120,figsize = (8,6))

plot00=sns.countplot('Pregnancies',data=df,ax=axes[0][0],color='green')

axes[0][0].set_title('Count',fontdict={'fontsize':8})

axes[0][0].set_xlabel('Month of Preg.',fontdict={'fontsize':7})

axes[0][0].set_ylabel('Count',fontdict={'fontsize':7})

plt.tight_layout()

plot01=sns.countplot('Pregnancies',data=df,hue='Outcome',ax=axes[0][1])

axes[0][1].set_title('Diab. VS Non-Diab.',fontdict={'fontsize':8})

axes[0][1].set_xlabel('Month of Preg.',fontdict={'fontsize':7})

axes[0][1].set_ylabel('Count',fontdict={'fontsize':7}) plot01.axes.legend(loc=1)

plt.setp(axes[0][1].get_legend().get_texts(), fontsize='6')

plt.setp(axes[0][1].get_legend().get_title(), fontsize='6')

plt.tight_layout()

plot10 = sns.distplot(df['Pregnancies'],ax=axes[1][0])

axes[1][0].set_title('Pregnancies Distribution',fontdict={'fontsize':8})

axes[1][0].set_xlabel('Pregnancy Class',fontdict={'fontsize':7})

axes[1][0].set_ylabel('Freq/Dist',fontdict={'fontsize':7}) plt.tight_layout()

plot11=f[df['Outcome']==False]['Pregnancies'].plot.hist(ax=axes[1][1],label='N

on- Diab.')
`2
plot11_2=df[df['Outcome']==True]['Pregnancies'].plot.hist(ax=axes[1][1],label=

'Diab.')

axes[1][1].set_title('Diab. VS Non-Diab.',fontdict={'fontsize':8})

axes[1][1].set_xlabel('Pregnancy Class',fontdict={'fontsize':7})

axes[1][1].set_ylabel('Freq/Dist',fontdict={'fontsize':7})

plot11.axes.legend(loc=1)

plt.setp(axes[1][1].get_legend().get_texts(), fontsize='6') # for legend text

plt.setp(axes[1][1].get_legend().get_title(), fontsize='6') # for legend title

plt.tight_layout()

plot20 = sns.boxplot(df['Pregnancies'],ax=axes[2][0],orient='v')

axes[2][0].set_title('Pregnancies',fontdict={'fontsize':8})

axes[2][0].set_xlabel('Pregnancy',fontdict={'fontsize':7})

axes[2][0].set_ylabel('Five Point Summary',fontdict={'fontsize':7})

plt.tight_layout()

plot21=sns.boxplot(x='Outcome',y='Pregnancies',data=df,ax=axes[2][1])

axes[2][1].set_title('Diab. VS Non-Diab.',fontdict={'fontsize':8})

axes[2][1].set_xlabel('Pregnancy',fontdict={'fontsize':7})

axes[2][1].set_ylabel('Five Point

Summary',fontdict={'fontsize':7})

plt.xticks(ticks=[0,1],labels=['Non-

Diab.','Diab.'],fontsize=7) plt.tight_layout()

plt.show()
`3
OUTPUT

## Blood Pressure variable

fig,axes = plt.subplots(nrows=2,ncols=2,dpi=120,figsize = (8,6))

plot00=sns.distplot(df['BloodPressure'],ax=axes[0][0],color='green')

axes[0][0].yaxis.set_major_formatter(FormatStrFormatter('%.3f'))

axes[0][0].set_title('Distribution of BP',fontdict={'fontsize':8})

axes[0][0].set_xlabel('BP Class',fontdict={'fontsize':7})

axes[0][0].set_ylabel('Count/Dist.',fontdict={'fontsize':7}) plt.tight_layout()

plot01=sns.distplot(df[df['Outcome']==False]['BloodPressure'],ax=axes[0][1],color='gr

een',label='NonDiab.')

sns.distplot(df[df.Outcome==True]['BloodPressure'],ax=axes[0][1],color='red',label='

Diab')

axes[0][1].set_title('Distribution of BP',fontdict={'fontsize':8})
`4
axes[0][1].set_xlabel('BP Class',fontdict={'fontsize':7})

axes[0][1].set_ylabel('Count/Dist.',fontdict={'fontsize':7})

axes[0][1].yaxis.set_major_formatter(FormatStrFormatter('%.3f'))

plot01.axes.legend(loc=1)

plt.setp(axes[0][1].get_legend().get_texts(), fontsize='6')

plt.setp(axes[0][1].get_legend().get_title(), fontsize='6')

plt.tight_layout()

plot10=sns.boxplot(df['BloodPressure'],ax=axes[1][0],orient='v')

axes[1][0].set_title('Numerical Summary',fontdict={'fontsize':8})

axes[1][0].set_xlabel('BP',fontdict={'fontsize':7})

axes[1][0].set_ylabel(r'Five Point Summary(BP)',fontdict={'fontsize':7})

plt.tight_layout()

plot11=sns.boxplot(x='Outcome',y='BloodPressure',data=df,ax=axes[1][1])

axes[1][1].set_title(r'Numerical Summary (Outcome)',fontdict={'fontsize':8})

axes[1][1].set_ylabel(r'Five Point Summary(BP)',fontdict={'fontsize':7})

plt.xticks(ticks=[0,1],labels=['Non-Diab.','Diab.'],fontsize=7)

axes[1][1].set_xlabel('Category',fontdict={'fontsize':7})

plt.tight_layou

t()plt.show()

fig,axes = plt.subplots(nrows=1,ncols=2,dpi=120,figsize = (8,4))

plot0=sns.distplot(df[df['BloodPressure']!=0]['BloodPressure'],ax=axes[0],co

lor='green') axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.3f'))
`5
axes[0].set_title('Distribution of BP',fontdict={'fontsize':8})

axes[0].set_xlabel('BP

Class',fontdict={'fontsize':7})

axes[0].set_ylabel('Count/Dist.',fontdict={'font

size':7})plt.tight_layout()

plot1=sns.boxplot(df[df['BloodPressure']!=0]['BloodPressure'],ax=axes[1],orien

t='v')axes[1].set_title('Numerical Summary',fontdict={'fontsize':8})

axes[1].set_xlabel('BloodPressure',fontdict={'fontsize':7})

axes[1].set_ylabel(r'Five Point Summary(BP)',fontdict={'fontsize':7})

plt.tight_layout()

OUTPUT

`6
LINEAR REGRESSION MODELLING ON HOUSING DATASET

# Data manipulation libraries

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns %matplotlib inline

USAhousing = pd.read_csv('USA_Housing.csv')

USAhousing.head()

USAhousing.info()

USAhousing.describe()

USAhousing.columns sns.pairplot(USAhousing)

sns.distplot(USAhousing['Price'])sns.heatmap(USAhousing.corr())

`7
X = USAhousing[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area

Number of Rooms','Avg. Area Number of Bedrooms', 'Area Population']]y =

USAhousing['Price']from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,

random_state=101)from sklearn.linear_model

import LinearRegression

lm = LinearRegression()lm.fit(X_train,y_train)

# print the interceptprint(lm.intercept_)

coeff_df = pd.DataFrame(lm.coef_,X.columns,columns=['Coefficient'])coeff_df

predictions = lm.predict(X_test)plt.scatter(y_test,predictions)

sns.distplot((y_test-predictions),bins=50);from sklearn

import metrics

print('MAE:', metrics.mean_absolute_error(y_test, predictions))

print('MSE:', metrics.mean_squared_error(y_test, predictions))

print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,

predictions)))

`8
OUTPUT

`9
RESULT

Exploring various commands for doing Bivariate analytics on the usa housing
Dataset was successfully executed.

`10
EX.NO: 2
BINARY CLASSIFICATION MODEL
DATE:

AIM

To implement a binary classification model with different classification metrics to

determine the models effectiveness.

ALGORITHM:

STEP 1: Import the necessary libraries and load the data


STEP 2: Split the data into features (X) and the target variable (y)
STEP 3: Split the data into training and testing sets.
STEP 4: Train a binary classification model (e.g., logistic regression) on the training set.
STEP 5: Make predictions on the test set using the trained model.
STEP 6: Calculate and evaluate the model's performance with the default classification
threshold (0.5) using various classification metrics such as accuracy, precision, recall, F1-
score, and ROC-AUC score.
STEP 7: Experiment with different classification thresholds (e.g., 0.3, 0.7, etc.).
STEP 8: Analyze and compare the model's performance metrics for different thresholds to
determine the most effective threshold based on your problem requirements.
STEP 9: Choose the threshold that achieves the desired balance between precision and recall
or any other relevant metric for your specific problem.
STEP 10: Implement the chosen threshold in your binary classification model for future
predictions.

`11
PROGRAM

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,

roc_auc_score

# Load the data from exercise 1

data = pd.read_csv('house_data.csv')

# Separate the features (X) and the target variable (y)

X = data.drop('above_price_threshold', axis=1)

y = data['above_price_threshold']

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the logistic regression model

model = LogisticRegression()

model.fit(X_train, y_train)

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model's performance with default threshold (0.5)

accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred)

recall = recall_score(y_test, y_pred)

`12
f1 = f1_score(y_test, y_pred)

probabilities = model.predict_proba(X_test)[:, 1]

roc_auc = roc_auc_score(y_test, probabilities)

print("Performance with default threshold (0.5):")

print("Accuracy:", accuracy)

print("Precision:", precision)

print("Recall:", recall)

print("F1-Score:", f1)

print("ROC-AUC Score:", roc_auc)

# Modify the classification threshold

threshold = 0.3

y_pred_threshold = (probabilities >= threshold).astype(int)

# Evaluate the model's performance with modified threshold (0.3)

accuracy_threshold = accuracy_score(y_test, y_pred_threshold)

precision_threshold = precision_score(y_test, y_pred_threshold)

recall_threshold = recall_score(y_test, y_pred_threshold)

f1_threshold = f1_score(y_test, y_pred_threshold)

roc_auc_threshold = roc_auc_score(y_test, probabilities)

print("\nPerformance with modified threshold (0.3):")

print("Accuracy:", accuracy_threshold)

print("Precision:", precision_threshold)

print("Recall:", recall_threshold)

`13
print("F1-Score:", f1_threshold)

print("ROC-AUC Score:", roc_auc_threshold)

OUTPUT

Accuracy: 0.85

Precision: 0.75

Recall: 0.9

F1-Score: 0.82

ROC-AUC Score: 0.92

RESULT

Thus the Binary Classification model with different classification metrics was

executed and verified successfully.

`14
EX.NO: 3 CLASSIFICATION WITH NEAREST NEIGHBORS WITH
KNN CLASSIFIER
DATE:

AIM

To implement the nearest neighbouring algorithms using KNN classifier.

ALGORITHM

STEP 1: Import the necessary libraries:

 pandas to load and manipulate the dataset.

 TfidfVectorizer from sklearn.feature_extraction.text to convert the text data into

numerical features using TF-IDF vectorization.

 KNeighborsClassifier from sklearn.neighbors to create the KNN classifier.

 train_test_split from sklearn.model_selection to split the data into training and test

sets.

 accuracy_score from sklearn.metrics to calculate the accuracy of the classifier.

STEP 2:Load and preprocess the dataset:

 Use pd.read_csv to load the fake news detection dataset into a pandas DataFrame

named data.

 Split the DataFrame into features (X) and labels (y).

STEP 3: Convert text data to numerical features:

 Initialize a TfidfVectorizer object named vectorizer.

`15
 Use vectorizer.fit_transform to convert the text data (X) into numerical features

using TF-IDF vectorization. This step transforms the text data into a matrix of TF-

IDF features.

STEP 4: Split the data into training and test sets:

 Use train_test_split to split the TF-IDF features (X) and labels (y) into training and

test sets, with a specified test size.

STEP 5: Create and train the KNN classifier:

 Create a KNN classifier object, knn, with the desired number of neighbors (k).

 Train the KNN classifier on the training data (X_train, y_train) using the fit

method.

STEP 6: Make predictions and calculate accuracy:

 Use the trained KNN classifier to make predictions (y_pred) on the test set (X_test).

 Calculate the accuracy of the classifier by comparing the predicted labels with the

true labels using the accuracy_score function.

STEP 7: Print the accuracy:

 Output the calculated accuracy to evaluate the performance of the KNN classifier

on the fake news detection dataset.

PROGRAM

import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

`16
from sklearn.metrics import accuracy_score

# Load the Fake News Detection dataset

data = pd.read_csv('fake_news_dataset.csv')

# Split the dataset into features and labels

X = data['text']

y = data['label']

# Convert the text data into numerical features using TF-IDF vectorization

vectorizer = TfidfVectorizer()

X = vectorizer.fit_transform(X)

# Split the data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier

k=5

knn = KNeighborsClassifier(n_neighbors=k)

# Train the classifier

knn.fit(X_train, y_train)

# Make predictions on the test set

y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

import pandas as pd

`17
from pandas import DataFrame

from sklearn.datasets import load_iris

# sklearn.datasetsincludes common example

datasets# A function to load in the iris

dataset

iris_obj =

load_iris()#

Dataset preview

iris_obj.data

iris = DataFrame(iris_obj.data, columns=iris_obj.feature_names,index=pd.Index([i

for i in range(iris_obj.data.shape[0])])).join(DataFrame(iris_obj.target,

columns=pd.Index(["species"]),index=pd.Index([i for i in

range(iris_obj.target.shape[0])])))

iris # prints iris

data Commands

iris_obj.feature_na

mesiris.count()

iris.mean()

iris.median()

iris.var()

iris.std()

iris.max()

`18
iris.min()

iris.describe()

OUTPUT

RESULT
Thus the implementation of nearest neighboring of KNN classifier was executed

and verified successfully.

`19
Ex.NO: 4
VALIDATION SET&TEST SET
DATE:

AIM

To implement a validation set and test set with a different regression model.

ALGORITHM

STEP 1: Split the dataset:

 Take the original dataset and randomly split it into three subsets: a training set, a

validation set, and a test set.

 Decide on the proportions of the splits (e.g., 70% training, 15% validation, 15%

test) based on the size of your dataset and the desired evaluation strategy.

STEP 2: Train the classifier:

 Use the training set to train the classifier or model of your choice. This involves

fitting the model to the training data and adjusting its parameters or weights.

STEP 3:

 Evaluate on the validation set:

 Apply the trained model to the validation set and obtain predictions for each

instance in the validation set.

 Compare the predicted labels with the true labels in the validation set to measure

the model's performance.

 Calculate evaluation metrics such as accuracy, precision, recall, F1 score, or any

other appropriate metrics based on the problem you are solving.


`20
 Adjust the model's parameters or hyperparameters if necessary and repeat steps 2

and 3 until you achieve satisfactory performance on the validation set.

STEP 4: Final evaluation on the test set:

 Once you have selected the best model based on its performance on the validation

set, apply this model to the test set.

 Obtain predictions for each instance in the test set using the selected model.

 Compare the predicted labels with the true labels in the test set to evaluate the final

performance of the model.

 Calculate evaluation metrics on the test set to assess the generalization capability

of the model.

STEP 5: Analyze and interpret the results:

 Examine the evaluation metrics obtained on the validation and test sets to assess

the performance of the classifier.

 Compare the results to make informed decisions about the model's suitability for

the given task.

 Consider other factors such as computational efficiency, interpretability, and

domain-specific requirements

PROGRAM
# Data manipulation libraries

import numpy as np

import pandas as pd

###scikit Learn Modules needed for Logistic

`21
Regressionfrom sklearn.linear_model

import LogisticRegression

from sklearn.model_selection

import train_test_split,GridSearchCVfrom sklearn.preprocessing

import LabelEncoder,MinMaxScaler,OneHotEncoder,StandardScaler

from sklearn.metrics

import confusion_matrix from sklearn.impute

import SimpleImputer from sklearn.pipeline

import Pipeline from sklearn.compose

import ColumnTransformer

#for plotting

import matplotlib.pyplot as pltimport seaborn as sns %matplotlib inline

sns.set(color_codes=True) import warnings warnings.filterwarnings('ignore')

df=pd.read_csv('C:/Users/diabetes.csv')

df.head()

df.tail()

df.isnull().sum()

df.describe(include='all')

df.corr()

sns.heatmap(df.corr(),annot=True)plt.show()

df.hist() plt.show()

sns.countplot(x=df['Outcome'])

`22
scaler=StandardScaler()

df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin','BMI',

'DiabetesPedigreeFunction', 'Age']]=scaler.fit_transform(df[['Pregnancies',

'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin','BMI',

'DiabetesPedigreeFunction', 'Age']])df_new = df

# Train & Test split

x_train, x_test, y_train, y_test = train_test_split( df_new[['Pregnancies', 'Glucose',

'BloodPressure', 'SkinThickness', 'Insulin','BMI', 'DiabetesPedigreeFunction',

'Age']],df_new['Outcome'],test_size=0.20,random_state=21)

print('Shape of Training Xs:{}'.format(x_train.shape))

print('Shape of Test Xs:{}'.format(x_test.shape))

print('Shape of Training y:{}'.format(y_train.shape))

print('Shape of Test y:{}'.format(y_test.shape))

Shape of Training Xs:(614, 8)

Shape of Test Xs:(154, 8)

Shape of Training y:(614,)

Shape of Test y:(154,)

# Build Model

model = LogisticRegression() model.fit(x_train, y_train) y_predicted =

model.predict(x_test)

score=model.score(x_test,y_test);

print(score)

`23
0.7337662337662337

#Confusion Matrix

# Compute confusion matrix

cnf_matrix = confusion_matrix(y_test, y_predicted)

np.set_printoptions(precision=2)cnf_matrix

OUTPUT

`24
`25
RESULT

Thus the validation sets and test sets using the dataset was executed

successfully.

`26
EX.NO: 5
K-MEANS ALGORITHM
DATE:

AIM

To implement K-means algorithm.

ALGORITHM

 Initialize the number of clusters k and the maximum number of iterations

max_iters.

 Randomly initialize k centroids from the data points.

Repeat the following steps until convergence or until reaching the maximum

number of iterations:

Assign each data point to the nearest centroid by calculating the Euclidean

distance.

 Update the centroids based on the mean of the data points assigned to

each cluster.

 Check for convergence by comparing the current centroids with the

previous centroids. If they are equal, terminate the loop.

 Return the cluster labels assigned to each data point and the final

centroids.

`27
PROGRAM

import numpy as np

def initialize_centroids(X, k):

"""Randomly initialize k centroids from the data points."""indices =

np.random.choice(range(len(X)), size=k, replace=False)

centroids = X[indices] return centroids

def assign_clusters(X, centroids):

"""Assign each data point to the nearest centroid."""distances = np.sqrt(((X -

centroids[:, np.newaxis])**2).sum(axis=2))

cluster_labels = np.argmin(distances, axis=0)

return cluster_labels

def update_centroids(X, cluster_labels, k):

"""Update the centroids based on the mean of the data points in each

cluster."""

centroids = np.array([X[cluster_labels == i].mean(axis=0) for i in range(k)])

return centroids

def k_means(X, k, max_iters=100):

"""Perform k-means clustering on the data points."""

centroids = initialize_centroids(X, k)

for _ in range(max_iters):

prev_centroids = centroids.copy()

`28
# Assign data points to clusters
cluster_labels = assign_clusters(X, centroids)

# Update centroids

centroids = update_centroids(X, cluster_labels, k)

# Check for convergence

if np.allclose(centroids, prev_centroids):

break

return cluster_labels, centroids

# Example usage

X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])

k=2

cluster_labels, centroids = k_means(X, k)

print("Cluster Labels:", cluster_labels)

print("Centroids:", centroids)

`29
OUTPUT

Cluster Labels: [0 0 0 1 1 1]

Centroids: [[1. 2. ]

[3. 2. ]]

RESULT

Thus the implementation of K-means clustering algorithm was executed

and verified successfully.

`30
EX.NO: 6
NAÏVE BAYES CLASSIFIER
DATE:

AIM

To implement the naïve bayes classifier

ALGORITHM

STEP1: Initialize class variables:

 classes: An array to store the unique class labels.

 class_priors: A dictionary to store the prior probabilities of each class.

 feature_probs: A nested dictionary to store the conditional probabilities of

each feature value given the class.

STEP2: Fit the classifier:

 Accept the training data X and corresponding class labels y as inputs.

 Determine the unique class labels and store them in the classes array.

 Calculate the prior probabilities for each class by dividing the count of

samples in each class by the total number of samples.

 For each feature in the dataset:

 Determine the unique feature values and store them in a variable.

 For each class label:

 Create a nested dictionary entry for the feature and class if it doesn't

already exist.

`31
 Calculate the conditional probability of each feature value given the class

by dividing the count of samples with the specific feature value and class

by the count of samples in that class.

STEP 3: Predict the class labels:

 Accept the test data X_test as input.

 Initialize an empty array to store the predicted class labels.

 For each sample in X_test:

 Initialize an empty array to store the posterior probabilities for each class.

 For each class in classes:

 Initialize the posterior probability as the corresponding prior probability.

 For each feature value in the sample:

 If the feature and class combination exists in feature_probs and the

feature value exists in the conditional probabilities:

 Multiply the posterior probability by the conditional probability of the

feature value given the class.

 Otherwise, set the posterior probability to 0 and break the loop.

 Append the posterior probability to the array of posterior probabilities.

 Append the class label with the maximum posterior probability to the

predicted labels array.

STEP 4: Return the predicted class labels.

`32
PROGRAM

import numpy as np

class NaiveBayesClassifier:
def init (self):
self.classes = None
self.class_priors = None
self.feature_probs = None
def fit(self, X, y):
self.classes = np.unique(y)
self.class_priors = np.zeros(len(self.classes))
self.feature_probs = {}
for i, c in enumerate(self.classes):
X_c = X[y == c]
self.class_priors[i] = len(X_c) / len(X)
for j in range(X.shape[1]):
feature_values = np.unique(X[:, j])
self.feature_probs[(j, c)] = {}
for value in feature_values:
count = len(X_c[X_c[:, j] == value])
self.feature_probs[(j, c)][value] = count / len(X_c)
def predict(self, X):
predictions = []
for x in X:
posterior_probs = []
for i, c in enumerate(self.classes):
posterior_prob = self.class_priors[i]
for j, value in enumerate(x):
if (j, c) in self.feature_probs and value in self.feature_probs[(j, c)]:

`33
posterior_prob *= self.feature_probs[(j, c)][value]
else:
posterior_prob = 0
break
posterior_probs.append(posterior_prob)

predictions.append(self.classes[np.argmax(posterior_probs)])

return np.array(predictions)

# Sample dataset

X_train = np.array([[1, 0], [1, 1], [0, 1], [0, 0]])

y_train = np.array([1, 1, 0, 0])

X_test = np.array([[1, 1], [0, 1]])

# Create and train the Naive Bayes classifier

classifier = NaiveBayesClassifier()

classifier.fit(X_train, y_train)

# Make predictions

predictions = classifier.predict(X_test)

print("Predictions:", predictions)

`34
OUTPUT

Predictions: [1 0]

RESULT

Thus the implementation of naïve bayes classifier was executed and

verified successfully.

`35

You might also like