Professional Documents
Culture Documents
ML - LAB - 7 - Jupyter Notebook
ML - LAB - 7 - Jupyter Notebook
In [1]:
dataset = {'Taste':['Salty','Spicy','Spicy','Spicy','Spicy','Sweet','Salty','Sweet','Spicy','Salty'],
'Temperature':['Hot','Hot','Hot','Cold','Hot','Cold','Cold','Hot','Cold','Hot'],
'Texture':['Soft','Soft','Hard','Hard','Hard','Soft','Soft','Soft','Soft','Hard'],
'Eat':['No','No','Yes','No','Yes','Yes','No','Yes','Yes','Yes']}
In [3]:
dataset = {'Taste':['Salty','Spicy','Spicy','Spicy','Spicy','Sweet','Salty','Sweet','Spicy'
'Temperature':['Hot','Hot','Hot','Cold','Hot','Cold','Cold','Hot','Cold','Hot'],
'Texture':['Soft','Soft','Hard','Hard','Hard','Soft','Soft','Soft','Soft','Hard'],
'Eat':['No','No','Yes','No','Yes','Yes','No','Yes','Yes','Yes']}
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 1/7
14/11/2022, 15:37 ML_LAB_7 - Jupyter Notebook
In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
def pre_processing(df):
X = df.drop([df.columns[-1]], axis = 1)
y = df[df.columns[-1]]
return X, y
class NaiveBayes:
def __init__(self):
self.features = list
self.likelihoods = {}
self.class_priors = {}
self.pred_priors = {}
self.X_train = np.array
self.y_train = np.array
self.train_size = int
self.num_feats = int
self.features = list(X.columns)
self.X_train = X
self.y_train = y
self.train_size = X.shape[0]
self.num_feats = X.shape[1]
self._calc_class_prior()
self._calc_likelihoods()
self._calc_predictor_prior()
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 2/7
14/11/2022, 15:37 ML_LAB_7 - Jupyter Notebook
def _calc_class_prior(self):
def _calc_likelihoods(self):
def _calc_predictor_prior(self):
results = []
X = np.array(X)
for query in X:
probs_outcome = {}
for outcome in np.unique(self.y_train):
prior = self.class_priors[outcome]
likelihood = 1
evidence = 1
probs_outcome[outcome] = posterior
return np.array(results)
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 3/7
14/11/2022, 15:37 ML_LAB_7 - Jupyter Notebook
if __name__ == "__main__":
#Weather Dataset
print("\ndataset:")
df = pd.DataFrame(dataset)
#print(df)
nb_clf = NaiveBayes()
nb_clf.fit(X, y)
#Query 1:
query = np.array([['Salty','Hot', 'Soft']])
print("Query 1:- {} ---> {}".format(query, nb_clf.predict(query)))
#Query 2:
query = np.array([['Spicy','Hot', 'Soft']])
print("Query 2:- {} ---> {}".format(query, nb_clf.predict(query)))
#Query 3:
query = np.array([['Salty','Hot', 'Hard']])
print("Query 3:- {} ---> {}".format(query, nb_clf.predict(query)))
dataset:
2. Implement Decision tree on IRIS Dataset using SK Learn library functions. Implement methods to
avoid over-fitting of the data.
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 4/7
14/11/2022, 15:37 ML_LAB_7 - Jupyter Notebook
In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
X,y=load_iris(return_X_y=True)
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0)
clf=DecisionTreeClassifier(random_state=0)
clf.fit(X_train,y_train)
y_train_predicted=clf.predict(X_train)
y_test_predicted=clf.predict(X_test)
accuracy_score(y_train,y_train_predicted)
accuracy_score(y_test,y_test_predicted)
Out[5]:
0.9736842105263158
In [6]:
plt.figure(figsize=(16,8))
tree.plot_tree(clf)
plt.show()
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 5/7
14/11/2022, 15:37 ML_LAB_7 - Jupyter Notebook
In [7]:
path=clf.cost_complexity_pruning_path(X_train,y_train)
#path variable gives two things ccp_alphas and impurities
ccp_alphas,impurities=path.ccp_alphas,path.impurities
print("ccp alpha wil give list of values :",ccp_alphas)
print("***********************************************************")
print("Impurities in Decision Tree :",impurities)
ccp alpha wil give list of values : [0. 0.00869963 0.01339286 0.0357
1429 0.26539835 0.33279549]
***********************************************************
In [8]:
Last node in Decision tree is 1 and ccp_alpha for last node is 0.33279549319
7279
In [9]:
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 6/7
14/11/2022, 15:37 ML_LAB_7 - Jupyter Notebook
In [10]:
clf=DecisionTreeClassifier(random_state=0,ccp_alpha=0.02)
clf.fit(X_train,y_train)
plt.figure(figsize=(12,8))
tree.plot_tree(clf,rounded=True,filled=True)
plt.show()
In [11]:
accuracy_score(y_test,clf.predict(X_test))
Out[11]:
0.9736842105263158
In [ ]:
localhost:8888/notebooks/Downloads/ML_LAB_7.ipynb 7/7