Download as pdf or txt
Download as pdf or txt
You are on page 1of 6

5/2/22, 2:35 PM BECOA122_ML_4.

ipynb - Colaboratory

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from IPython.display import Image

data = pd.read_csv("data.csv")

data

Age Income Gender MaritialStatus Buys

0 <21 High Male Single No

1 <21 High Male Married No

2 21-35 High Male Single Yes

3 >35 Medium Male Single Yes

4 >35 Low Female Single Yes

5 >35 Low Female Married No

6 21-35 Low Female Married Yes

7 <21 Medium Male Single No

8 <21 Low Female Married Yes

9 >35 Medium Female Single Yes

10 <21 Medium Female Married Yes

11 21-35 Medium Male Married Yes

12 21-35 High Female Single Yes

13 >35 Medium Male Married No

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 1/12

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

le=LabelEncoder();

x=data.iloc[:,:-1]

x=x.apply(le.fit_transform)

print("Age:",list( zip(data.iloc[:,0], x.iloc[:,0])))

print("\nIncome:",list( zip(data.iloc[:,1], x.iloc[:,1])))

print("\nGender:",list( zip(data.iloc[:,2], x.iloc[:,2])))

print("\nmaritialStatus:",list( zip(data.iloc[:,3], x.iloc[:,3])))

Age: [('<21', 1), ('<21', 1), ('21-35', 0), ('>35', 2), ('>35', 2), ('>35', 2), ('21-35', 0), ('<21', 1), ('<21', 1), ('>35', 2

Income: [('High', 0), ('High', 0), ('High', 0), ('Medium', 2), ('Low', 1), ('Low', 1), ('Low', 1), ('Medium', 2), ('Low', 1), (

Gender: [('Male', 1), ('Male', 1), ('Male', 1), ('Male', 1), ('Female', 0), ('Female', 0), ('Female', 0), ('Male', 1), ('Female

maritialStatus: [('Single', 1), ('Married', 0), ('Single', 1), ('Single', 1), ('Single', 1), ('Married', 0), ('Married', 0), ('

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 2/12
5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

Age Income Gender MaritialStatus

0 1 0 1 1

1 1 0 1 0

2 0 0 1 1

3 2 2 1 1
y=data.iloc[:,-1]

4 2 1 0 1

5 2 1 0 0
y

6 0 1 0 0
0 No

17 1
No
2 1 1
2 Yes

8 1 1 0 0
3 Yes

49 Yes

2 2 0 1
5 No

610 Yes

1 2 0 0
7 No

811 0
Yes
2 1 0
9 Yes

12
10 0
Yes
0 0 1
11 Yes

13 2 2 1 0
12 Yes

13 No

Name: Buys, dtype: object

dt=DecisionTreeClassifier()
dt.fit(x,y)

DecisionTreeClassifier()

query=np.array([1,1,0,0])

pred=dt.predict([query])

pred[0]

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 3/12

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have v


"X does not have valid feature names, but"

'Yes'

export_graphviz(dt,out_file="data1.dot",feature_names=x.columns,class_names=["No","Yes"])
!dot -Tpng data1.dot -o tree1.png
Image("tree1.png")

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 4/12
5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

Decision Tree Classifier on Titanic dataset

import numpy as np

import pandas as pd

from sklearn.preprocessing import LabelEncoder

from sklearn.tree import DecisionTreeClassifier

from sklearn.tree import export_graphviz

from IPython.display import Image

from sklearn.compose import make_column_transformer

from sklearn.model_selection import train_test_split

from  sklearn.metrics  import accuracy_score

from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sns

df = pd.read_csv("titanic.csv")

df

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 5/12

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked

0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S

Cumings, Mrs. John Bradley


1 2 1 1 female 38.0 1 0 PC 17599 71.2833 C85 C
(Florence Briggs Th...

STON/O2.
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 7.9250 NaN S
3101282

Futrelle, Mrs. Jacques Heath


3 4 1 1 female 35.0 1 0 113803 53.1000 C123 S
(Lily May Peel)

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

... ... ... ... ... ... ... ... ... ... ... ... ...

886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S

887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S

Johnston, Miss. Catherine


888 889 0 3 female NaN 1 2 W./C. 6607 23.4500 NaN S
Helen "Carrie"

889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
df.describe()

890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 6/12
5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

PassengerId Survived Pclass Age SibSp Parch Fare

count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000


drop_elements = ['PassengerId', 'Name', 'Ticket']

df = df.drop(drop_elements, axis = 1)

mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208

std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429


df.isnull().sum()

min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000


Survived
25% 0

223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400


Pclass 0

Sex
50% 0

446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200


Age 177

75%
SibSp 668.500000
0
1.000000 3.000000 38.000000 1.000000 0.000000 31.000000
Parch 0

max
Fare 891.000000
0
1.000000 3.000000 80.000000 8.000000 6.000000 512.329200
Cabin 687

Embarked 2

dtype: int64

cols = ['Pclass', 'Sex']

age_class_sex = df.groupby(cols)['Age'].mean().reset_index()

df['Age'] = df['Age'].fillna(df[cols].reset_index().merge(age_class_sex, how='left', on=cols).set_index('index')['Age'])

df['Embarked'] = df['Embarked'].fillna('S')

df['Cabin'] = df["Cabin"].apply(lambda x: 0 if type(x) == float else 1)  

df['Embarked'] = df['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)

df['Sex'] = df['Sex'].map( {'female': 0, 'male': 1} ).astype(int)

df.loc[ df['Fare'] <= 7.91, 'Fare'] = 0

df.loc[(df['Fare'] > 7.91) & (df['Fare'] <= 14.454), 'Fare'] = 1

df.loc[(df['Fare'] > 14.454) & (df['Fare'] <= 31), 'Fare']   = 2

df.loc[ df['Fare'] > 31, 'Fare'] = 3

df['Fare'] = df['Fare'].astype(int)

df.loc[ df['Age'] <= 16, 'Age'] = 0

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 7/12

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

df.loc[(df['Age'] > 16) & (df['Age'] <= 32), 'Age'] = 1

df.loc[(df['Age'] > 32) & (df['Age'] <= 48), 'Age'] = 2

df.loc[(df['Age'] > 48) & (df['Age'] <= 64), 'Age'] = 3

df.loc[ df['Age'] > 64, 'Age'] = 4;

df['Age'] = df['Age'].astype(int)

df

Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked

0 0 3 1 1 1 0 0 0 0

1 1 1 0 2 1 0 3 1 1

2 1 3 0 1 0 0 1 0 0

3 1 1 0 2 1 0 3 1 0

4 0 3 1 2 0 0 1 0 0

... ... ... ... ... ... ... ... ... ...

886 0 2 1 1 0 0 1 0 0

887 1 1 0 1 0 0 2 1 0

888 0 3 0 1 1 2 2 0 0

889 1 1 1 1 0 0 2 1 1

890 0 3 1 1 0 0 0 0 2

891 rows × 9 columns

y = df['Survived']
x = df.drop(['Survived'], axis=1).values
x_features = df.iloc[:,1:]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=1)

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 8/12
5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)

DecisionTreeClassifier()

y_pred = dt.predict(x_test)

print("Accuracy:",accuracy_score(y_test,y_pred))

Accuracy: 0.7661016949152543

res = pd.DataFrame(list(zip(y_test, y_pred)), columns =['Actual', 'Predicted']) 
res.head(100)

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 9/12

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

Actual Predicted

0 1 1
conf_matrix = confusion_matrix(y_test, y_pred)

fig, ax = plt.subplots(figsize=(8,6))

1 0 0
sns.heatmap(conf_matrix,annot=True,cbar=True)

plt.ylabel('True Label')

2 1 1
plt.xlabel('Predicted Label')

3 0 1
plt.title('Confusion Matrix')

conf_matrix

4 1 1
array([[153,
... ... 21],
...
[ 48, 73]])
95 0 1

96 0 0

97 1 1

98 0 0

99 0 0

100 rows × 2 columns

export_graphviz(dt,out_file="data2.dot",feature_names=x_features.columns,class_names=["Survived","Died"])

!dot -Tpng data2.dot -o tree2.png
Image("tree2.png")

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 10/12
5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 11/12

You might also like