Download as pdf or txt
Download as pdf or txt
You are on page 1of 3

9/10/2021 DMDW_Expt-3_DT.

ipynb - Colaboratory

# Author - jyotiraditya ghatage


# Date - 25th Aug 2021
# Title -Decision Tree (Gini Index)

# Step 1 :Load the libraries
 
import numpy as np
import pandas as pd
import seaborn as sns
 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier # model name with Camel Case
from sklearn import metrics

# Step 2 Load labelled data
#(input feature x=2 (age, bp); output label y: 1(diabetes))
 
df = pd.read_csv("/content/Decision-Tree-Classification-Data.csv")

# understand dataset (No. of samples: 0 to 986 = 987 )
# 100% - 80%(training) , 20%(testing)
# 987 - 789(training), 198(testing)
 
df.head()

age bp diabetes

0 65 65 1

1 45 82 0

2 35 73 1

3 45 90 0

4 50 68 1

df.tail()

age bp diabetes

982 45 87 0

983 40 83 0

984 40 83 0

985 40 60 1

986 45 82 0

# seperate features(x: age, bp) from labels (y: diabetes)
# x - 
https://colab.research.google.com/drive/1ZOq3uBmMQ1TtJ6vHMdAH2TgXgp4xVsHs?authuser=1#scrollTo=r5s1AWxcl1qU&printMode=true 1/3
9/10/2021 DMDW_Expt-3_DT.ipynb - Colaboratory
# x   
x = df.drop("diabetes",axis = 1) # age, bp
y = df.diabetes # diabetes

x.head()

age bp

0 65 65

1 45 82

2 35 73

3 45 90

4 50 68

y.head()

0 1

1 0

2 1

3 0

4 1

Name: diabetes, dtype: int64

# Adequate model fitting (80%,20%) avoid overfiiting, underfitting
x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=15)
# random_state(to shuffle the data), test_size(percent of test cases)

x_train.shape # training : 789

(789, 2)

x_test.shape # testing : 198

(198, 2)

# Model building/model training/model creation
 
model = DecisionTreeClassifier()
model.fit(x_train,y_train) 

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',

max_depth=None, max_features=None, max_leaf_nodes=None,

min_impurity_decrease=0.0, min_impurity_split=None,

min_samples_leaf=1, min_samples_split=2,

min_weight_fraction_leaf=0.0, presort='deprecated',

random_state=None, splitter='best')

# Model testing
y_predict=model.predict(x_test)

https://colab.research.google.com/drive/1ZOq3uBmMQ1TtJ6vHMdAH2TgXgp4xVsHs?authuser=1#scrollTo=r5s1AWxcl1qU&printMode=true 2/3
9/10/2021 DMDW_Expt-3_DT.ipynb - Colaboratory

accuracy = (metrics.accuracy_score(y_test,y_predict))*100

print(accuracy)

91.41414141414141

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_predict)
print(cm)
sns.heatmap(cm,annot=True)

[[93 4]

[13 88]]

<matplotlib.axes._subplots.AxesSubplot at 0x7fb2ff5211d0>

check 0s completed at 8:32 PM

https://colab.research.google.com/drive/1ZOq3uBmMQ1TtJ6vHMdAH2TgXgp4xVsHs?authuser=1#scrollTo=r5s1AWxcl1qU&printMode=true 3/3

You might also like