Random Forest

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 4

3/9/23, 11:43 AM Random Forest

Omkar Singh

102

A1

Random Forest

In [14]: import pandas


from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
df = pandas.read_csv('mushrooms.csv')
df.head()

Out[14]: stalk- st
cap- cap- cap- gill- gill- gill- gill- surface- co
class bruises odor ...
shape surface color attachment spacing size color below- abo
ring

0 p x s n t p f c n k ... s

1 e x s y t a f c b k ... s

2 e b s w t l f c b n ... s

3 p x y w t p f c n n ... s

4 e x s g f n f w b k ... s

5 rows × 23 columns

In [15]: df.drop(['cap-shape','cap-color','cap-surface','gill-attachment','gill-spacing','gi

In [16]: df.head()

Out[16]: class bruises odor population habitat

0 p t p s u

1 e t a n g

2 e t l n m

3 p t p s u

4 e f n a g

In [17]: print(df)

localhost:8888/nbconvert/html/Random Forest.ipynb?download=false 1/4


3/9/23, 11:43 AM Random Forest

class bruises odor population habitat


0 p t p s u
1 e t a n g
2 e t l n m
3 p t p s u
4 e f n a g
.. ... ... ... ... ...
598 p t p s g
599 p t p s u
600 e f n a g
601 e t l y g
602 e f n a g

[603 rows x 5 columns]

In [18]: df.describe()

Out[18]: class bruises odor population habitat

count 603 603 603 603 603

unique 2 2 4 5 5

top e t a s g

freq 541 494 221 247 257

In [19]: d = {'p': 0, 'e': 1}


df['class'] = df['class'].map(d)
# edible=e, poisonous=p

d = {'t': 1, 'f': 0}
df['bruises'] = df['bruises'].map(d)
# bruises=t,no=f

d = {'p': 0, 'a': 1, 'l':2, 'n':3 }


df['odor'] = df['odor'].map(d)
# almond=a,anise=l,none=n,pungent=p

d = {'s': 0, 'n': 1, 'a':2, 'v':3, 'y':4 }


df['population'] = df['population'].map(d)
# abundant=a,numerous=n,scattered=s,several=v,solitary=y

d = {'u': 0, 'g': 1, 'm':2, 'p':3, 'd':4 }


df['habitat'] = df['habitat'].map(d)
# grasses=g,meadows=m,paths=p,urban=u,woods=d

print(df)

class bruises odor population habitat


0 0 1 0 0 0
1 1 1 1 1 1
2 1 1 2 1 2
3 0 1 0 0 0
4 1 0 3 2 1
.. ... ... ... ... ...
598 0 1 0 0 1
599 0 1 0 0 0
600 1 0 3 2 1
601 1 1 2 4 1
602 1 0 3 2 1

[603 rows x 5 columns]

localhost:8888/nbconvert/html/Random Forest.ipynb?download=false 2/4


3/9/23, 11:43 AM Random Forest

In [20]: from sklearn.model_selection import train_test_split

In [55]: inputs = df.drop('class',axis='columns')


target = df.habitat

In [56]: X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.25)

In [57]: len(X_train)

452
Out[57]:

In [58]: X_train.head()

Out[58]: bruises odor population habitat

473 0 3 4 0

598 1 0 0 1

72 1 2 3 4

467 1 2 1 1

378 1 2 1 1

In [59]: len(X_test)

151
Out[59]:

In [60]: X_test.head()

Out[60]: bruises odor population habitat

543 1 1 0 1

456 1 1 1 1

454 1 2 1 2

338 0 3 3 0

444 1 2 1 2

In [61]: from sklearn.ensemble import RandomForestClassifier

In [62]: rfc = RandomForestClassifier(random_state=0)

In [63]: rfc.fit(X_train, y_train)

RandomForestClassifier(random_state=0)
Out[63]:

In [64]: y_pred = rfc.predict(X_test)

In [65]: from sklearn.ensemble import RandomForestClassifier


model = RandomForestClassifier(n_estimators=20)
model.fit(X_train, y_train)

RandomForestClassifier(n_estimators=20)
Out[65]:

localhost:8888/nbconvert/html/Random Forest.ipynb?download=false 3/4


3/9/23, 11:43 AM Random Forest

In [66]: model.score(X_test, y_test)

1.0
Out[66]:

In [67]: y_predicted = model.predict(X_test)

In [68]: from sklearn.metrics import confusion_matrix


cm = confusion_matrix(y_test, y_predicted)
cm

array([[23, 0, 0, 0, 0],
Out[68]:
[ 0, 66, 0, 0, 0],
[ 0, 0, 34, 0, 0],
[ 0, 0, 0, 9, 0],
[ 0, 0, 0, 0, 19]], dtype=int64)

In [72]: import matplotlib.pyplot as plt


import seaborn as sn
plt.figure(figsize=(7,5))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Original')

Text(58.222222222222214, 0.5, 'Original')


Out[72]:

In [ ]:

In [ ]:

localhost:8888/nbconvert/html/Random Forest.ipynb?download=false 4/4

You might also like