Professional Documents
Culture Documents
# Author - Jyotiraditya Ghatage # Date - 25th Aug 2021 # Title - Decision Tree (Gini Index)
# Author - Jyotiraditya Ghatage # Date - 25th Aug 2021 # Title - Decision Tree (Gini Index)
ipynb - Colaboratory
# Step 1 :Load the libraries
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier # model name with Camel Case
from sklearn import metrics
# Step 2 Load labelled data
#(input feature x=2 (age, bp); output label y: 1(diabetes))
df = pd.read_csv("/content/Decision-Tree-Classification-Data.csv")
# understand dataset (No. of samples: 0 to 986 = 987 )
# 100% - 80%(training) , 20%(testing)
# 987 - 789(training), 198(testing)
df.head()
age bp diabetes
0 65 65 1
1 45 82 0
2 35 73 1
3 45 90 0
4 50 68 1
df.tail()
age bp diabetes
982 45 87 0
983 40 83 0
984 40 83 0
985 40 60 1
986 45 82 0
# seperate features(x: age, bp) from labels (y: diabetes)
# x -
https://colab.research.google.com/drive/1ZOq3uBmMQ1TtJ6vHMdAH2TgXgp4xVsHs?authuser=1#scrollTo=r5s1AWxcl1qU&printMode=true 1/3
9/10/2021 DMDW_Expt-3_DT.ipynb - Colaboratory
# x
x = df.drop("diabetes",axis = 1) # age, bp
y = df.diabetes # diabetes
x.head()
age bp
0 65 65
1 45 82
2 35 73
3 45 90
4 50 68
y.head()
0 1
1 0
2 1
3 0
4 1
# Adequate model fitting (80%,20%) avoid overfiiting, underfitting
x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=15)
# random_state(to shuffle the data), test_size(percent of test cases)
x_train.shape # training : 789
(789, 2)
x_test.shape # testing : 198
(198, 2)
# Model building/model training/model creation
model = DecisionTreeClassifier()
model.fit(x_train,y_train)
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=None, splitter='best')
# Model testing
y_predict=model.predict(x_test)
https://colab.research.google.com/drive/1ZOq3uBmMQ1TtJ6vHMdAH2TgXgp4xVsHs?authuser=1#scrollTo=r5s1AWxcl1qU&printMode=true 2/3
9/10/2021 DMDW_Expt-3_DT.ipynb - Colaboratory
accuracy = (metrics.accuracy_score(y_test,y_predict))*100
print(accuracy)
91.41414141414141
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_predict)
print(cm)
sns.heatmap(cm,annot=True)
[[93 4]
[13 88]]
<matplotlib.axes._subplots.AxesSubplot at 0x7fb2ff5211d0>
https://colab.research.google.com/drive/1ZOq3uBmMQ1TtJ6vHMdAH2TgXgp4xVsHs?authuser=1#scrollTo=r5s1AWxcl1qU&printMode=true 3/3