Ml-A4 Code

5/2/22, 2:35 PM BECOA122_ML_4.
ipynb - Colaboratory
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from IPython.display import Image
data = pd.read_csv("data.csv")
data
Age Income Gender MaritialStatus Buys
0 <21 High Male Single No
1 <21 High Male Married No
2 21-35 High Male Single Yes
3 >35 Medium Male Single Yes
4 >35 Low Female Single Yes
5 >35 Low Female Married No
6 21-35 Low Female Married Yes
7 <21 Medium Male Single No
8 <21 Low Female Married Yes
9 >35 Medium Female Single Yes
10 <21 Medium Female Married Yes
11 21-35 Medium Male Married Yes
12 21-35 High Female Single Yes
13 >35 Medium Male Married No
https://colab.research.google.com/drive/1NZTjs-wF9uyC6-4G1sbrdBs0xJHcvYXI#scrollTo=jXFiGt-YJnfi&printMode=true 1/12
5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory
le=LabelEncoder();
x=data.iloc[:,:-1]
x=x.apply(le.fit_transform)
print("Age:",list( zip(data.iloc[:,0], x.iloc[:,0])))
print("\nIncome:",list( zip(data.iloc[:,1], x.iloc[:,1])))
print("\nGender:",list( zip(data.iloc[:,2], x.iloc[:,2])))
print("\nmaritialStatus:",list( zip(data.iloc[:,3], x.iloc[:,3])))
Age: [('<21', 1), ('<21', 1), ('21-35', 0), ('>35', 2), ('>35', 2), ('>35', 2), ('21-35', 0), ('<21', 1), ('<21', 1), ('>35', 2
Income: [('High', 0), ('High', 0), ('High', 0), ('Medium', 2), ('Low', 1), ('Low', 1), ('Low', 1), ('Medium', 2), ('Low', 1), (
Gender: [('Male', 1), ('Male', 1), ('Male', 1), ('Male', 1), ('Female', 0), ('Female', 0), ('Female', 0), ('Male', 1), ('Female
maritialStatus: [('Single', 1), ('Married', 0), ('Single', 1), ('Single', 1), ('Single', 1), ('Married', 0), ('Married', 0), ('
Age Income Gender MaritialStatus
0 1 0 1 1
1 1 0 1 0
2 0 0 1 1
3 2 2 1 1
y=data.iloc[:,-1]
4 2 1 0 1
5 2 1 0 0
y
6 0 1 0 0
0 No
17 1
No
2 1 1
2 Yes
8 1 1 0 0
3 Yes
49 Yes
2 2 0 1
5 No
610 Yes
1 2 0 0
7 No
811 0
Yes
2 1 0
9 Yes
12
10 0
Yes
0 0 1
11 Yes
13 2 2 1 0
12 Yes
13 No
Name: Buys, dtype: object
dt=DecisionTreeClassifier()
dt.fit(x,y)
DecisionTreeClassifier()
query=np.array([1,1,0,0])
pred=dt.predict([query])
pred[0]
/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have v

"X does not have valid feature names, but"
'Yes'
export_graphviz(dt,out_file="data1.dot",feature_names=x.columns,class_names=["No","Yes"])
!dot -Tpng data1.dot -o tree1.png
Image("tree1.png")
Decision Tree Classifier on Titanic dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
from IPython.display import Image
from sklearn.compose import make_column_transformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("titanic.csv")
df
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
Cumings, Mrs. John Bradley

1 2 1 1 female 38.0 1 0 PC 17599 71.2833 C85 C
(Florence Briggs Th...
STON/O2.
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 7.9250 NaN S
3101282
Futrelle, Mrs. Jacques Heath

3 4 1 1 female 35.0 1 0 113803 53.1000 C123 S
(Lily May Peel)
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
... ... ... ... ... ... ... ... ... ... ... ... ...
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
Johnston, Miss. Catherine

888 889 0 3 female NaN 1 2 W./C. 6607 23.4500 NaN S
Helen "Carrie"
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
df.describe()
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q
PassengerId Survived Pclass Age SibSp Parch Fare
count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000

drop_elements = ['PassengerId', 'Name', 'Ticket']
df = df.drop(drop_elements, axis = 1)
mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208
std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429

df.isnull().sum()
min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000

Survived
25% 0
223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400

Pclass 0
Sex
50% 0
446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200

Age 177
75%
SibSp 668.500000
0
1.000000 3.000000 38.000000 1.000000 0.000000 31.000000
Parch 0
max
Fare 891.000000
0
1.000000 3.000000 80.000000 8.000000 6.000000 512.329200
Cabin 687
Embarked 2
dtype: int64
cols = ['Pclass', 'Sex']
age_class_sex = df.groupby(cols)['Age'].mean().reset_index()
df['Age'] = df['Age'].fillna(df[cols].reset_index().merge(age_class_sex, how='left', on=cols).set_index('index')['Age'])
df['Embarked'] = df['Embarked'].fillna('S')
df['Cabin'] = df["Cabin"].apply(lambda x: 0 if type(x) == float else 1)
df['Embarked'] = df['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)
df['Sex'] = df['Sex'].map( {'female': 0, 'male': 1} ).astype(int)
df.loc[ df['Fare'] <= 7.91, 'Fare'] = 0
df.loc[(df['Fare'] > 7.91) & (df['Fare'] <= 14.454), 'Fare'] = 1
df.loc[(df['Fare'] > 14.454) & (df['Fare'] <= 31), 'Fare'] = 2
df.loc[ df['Fare'] > 31, 'Fare'] = 3
df['Fare'] = df['Fare'].astype(int)
df.loc[ df['Age'] <= 16, 'Age'] = 0
df.loc[(df['Age'] > 16) & (df['Age'] <= 32), 'Age'] = 1
df.loc[ df['Age'] > 64, 'Age'] = 4;
df['Age'] = df['Age'].astype(int)
df
Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked
0 0 3 1 1 1 0 0 0 0
1 1 1 0 2 1 0 3 1 1
2 1 3 0 1 0 0 1 0 0
3 1 1 0 2 1 0 3 1 0
4 0 3 1 2 0 0 1 0 0
... ... ... ... ... ... ... ... ... ...
886 0 2 1 1 0 0 1 0 0
887 1 1 0 1 0 0 2 1 0
888 0 3 0 1 1 2 2 0 0
889 1 1 1 1 0 0 2 1 1
890 0 3 1 1 0 0 0 0 2
891 rows × 9 columns
y = df['Survived']
x = df.drop(['Survived'], axis=1).values
x_features = df.iloc[:,1:]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=1)
dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)
DecisionTreeClassifier()
y_pred = dt.predict(x_test)
print("Accuracy:",accuracy_score(y_test,y_pred))
Accuracy: 0.7661016949152543
res = pd.DataFrame(list(zip(y_test, y_pred)), columns =['Actual', 'Predicted'])
res.head(100)
Actual Predicted
0 1 1
conf_matrix = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(8,6))
1 0 0
sns.heatmap(conf_matrix,annot=True,cbar=True)
plt.ylabel('True Label')
2 1 1
plt.xlabel('Predicted Label')
3 0 1
plt.title('Confusion Matrix')
conf_matrix
4 1 1
array([[153,
... ... 21],
...
[ 48, 73]])
95 0 1
96 0 0
97 1 1
98 0 0
99 0 0
100 rows × 2 columns
export_graphviz(dt,out_file="data2.dot",feature_names=x_features.columns,class_names=["Survived","Died"])
!dot -Tpng data2.dot -o tree2.png
Image("tree2.png")

Ml-A4 Code

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Ml-A4 Code

Uploaded by

Copyright:

Available Formats

5/2/22, 2:35 PM BECOA122_ML_4.

Age Income Gender MaritialStatus Buys

0 <21 High Male Single No

1 <21 High Male Married No

2 21-35 High Male Single Yes

3 >35 Medium Male Single Yes

4 >35 Low Female Single Yes

5 >35 Low Female Married No

6 21-35 Low Female Married Yes

7 <21 Medium Male Single No

8 <21 Low Female Married Yes

9 >35 Medium Female Single Yes

10 <21 Medium Female Married Yes

11 21-35 Medium Male Married Yes

12 21-35 High Female Single Yes

13 >35 Medium Male Married No

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

Age Income Gender MaritialStatus

Name: Buys, dtype: object

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

/usr/local/lib/python3.7/dist-packages/sklearn/base.py:451: UserWarning: X does not have v

Decision Tree Classifier on Titanic dataset

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

Cumings, Mrs. John Bradley

Futrelle, Mrs. Jacques Heath

4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S

Johnston, Miss. Catherine

PassengerId Survived Pclass Age SibSp Parch Fare

count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000

mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208

std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429

min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000

223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400

446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

Survived Pclass Sex Age SibSp Parch Fare Cabin Embarked

891 rows × 9 columns

5/2/22, 2:35 PM BECOA122_ML_4.ipynb - Colaboratory

100 rows × 2 columns

You might also like