Download as pdf or txt
Download as pdf or txt
You are on page 1of 4

3/9/23, 11:14 AM Decision Tree

Omkar Singh

102

A1

Decision Tree

In [87]: import pandas


from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
df = pandas.read_csv('mushrooms.csv')
df.head()

Out[87]: stalk- st
cap- cap- cap- gill- gill- gill- gill- surface- co
class bruises odor ...
shape surface color attachment spacing size color below- abo
ring

0 p x s n t p f c n k ... s

1 e x s y t a f c b k ... s

2 e b s w t l f c b n ... s

3 p x y w t p f c n n ... s

4 e x s g f n f w b k ... s

5 rows × 23 columns

In [88]: df.drop(['cap-shape','cap-color','cap-surface','gill-attachment','gill-spacing','gi

In [89]: df.head()

Out[89]: class bruises odor population habitat

0 p t p s u

1 e t a n g

2 e t l n m

3 p t p s u

4 e f n a g

In [90]: print(df)

localhost:8888/nbconvert/html/Decision Tree.ipynb?download=false 1/4


3/9/23, 11:14 AM Decision Tree

class bruises odor population habitat


0 p t p s u
1 e t a n g
2 e t l n m
3 p t p s u
4 e f n a g
.. ... ... ... ... ...
598 p t p s g
599 p t p s u
600 e f n a g
601 e t l y g
602 e f n a g

[603 rows x 5 columns]

In [91]: df.describe()

Out[91]: class bruises odor population habitat

count 603 603 603 603 603

unique 2 2 4 5 5

top e t a s g

freq 541 494 221 247 257

In [92]: d = {'p': 0, 'e': 1}


df['class'] = df['class'].map(d)
# edible=e, poisonous=p

d = {'t': 1, 'f': 0}
df['bruises'] = df['bruises'].map(d)
# bruises=t,no=f

d = {'p': 0, 'a': 1, 'l':2, 'n':3 }


df['odor'] = df['odor'].map(d)
# almond=a,anise=l,none=n,pungent=p

d = {'s': 0, 'n': 1, 'a':2, 'v':3, 'y':4 }


df['population'] = df['population'].map(d)
# abundant=a,numerous=n,scattered=s,several=v,solitary=y

d = {'u': 0, 'g': 1, 'm':2, 'p':3, 'd':4 }


df['habitat'] = df['habitat'].map(d)
# grasses=g,meadows=m,paths=p,urban=u,woods=d

print(df)

class bruises odor population habitat


0 0 1 0 0 0
1 1 1 1 1 1
2 1 1 2 1 2
3 0 1 0 0 0
4 1 0 3 2 1
.. ... ... ... ... ...
598 0 1 0 0 1
599 0 1 0 0 0
600 1 0 3 2 1
601 1 1 2 4 1
602 1 0 3 2 1

[603 rows x 5 columns]

localhost:8888/nbconvert/html/Decision Tree.ipynb?download=false 2/4


3/9/23, 11:14 AM Decision Tree

In [94]: from sklearn.model_selection import train_test_split

In [95]: inputs = df.drop('class',axis='columns')


target = df.bruises

In [96]: from sklearn import tree


model = tree.DecisionTreeClassifier()

In [97]: X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.3)

In [98]: len(X_train)

422
Out[98]:

In [99]: len(X_test)

181
Out[99]:

In [100… from sklearn import tree


model = tree.DecisionTreeClassifier()

In [101… model.fit(X_train,y_train)

DecisionTreeClassifier()
Out[101]:

In [105… model.score(X_test,y_test)

1.0
Out[105]:

In [93]: features = ['odor', 'habitat', 'population', 'class']

x = df[features]
y = df['bruises']

dtree = DecisionTreeClassifier()
dtree = dtree.fit(x, y)

tree.plot_tree(dtree, feature_names=features)

[Text(0.4, 0.8333333333333334, 'odor <= 2.5\ngini = 0.296\nsamples = 603\nvalue =


Out[93]:
[109, 494]'),
Text(0.2, 0.5, 'gini = 0.0\nsamples = 490\nvalue = [0, 490]'),
Text(0.6, 0.5, 'habitat <= 2.5\ngini = 0.068\nsamples = 113\nvalue = [109, 4]'),
Text(0.4, 0.16666666666666666, 'gini = 0.0\nsamples = 109\nvalue = [109, 0]'),
Text(0.8, 0.16666666666666666, 'gini = 0.0\nsamples = 4\nvalue = [0, 4]')]

localhost:8888/nbconvert/html/Decision Tree.ipynb?download=false 3/4


3/9/23, 11:14 AM Decision Tree

localhost:8888/nbconvert/html/Decision Tree.ipynb?download=false 4/4

You might also like