Download as pdf or txt
Download as pdf or txt
You are on page 1of 5

from sklearn.

datasets import load_iris


import pandas as pd
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
print(df.shape)
print(df.head(3))

(150, 5)
sepal length (cm) sepal width (cm) petal length (cm) petal width
(cm) \
0 5.1 3.5 1.4
0.2
1 4.9 3.0 1.4
0.2
2 4.7 3.2 1.3
0.2

target
0 0
1 0
2 0

labels = iris.target_names
feature_names = iris.feature_names
print('Number of Classes: {}'.format(len(labels)))
print(labels)
print('Number of Features: {}'.format(len(feature_names)))
print(feature_names)

Number of Classes: 3
['setosa' 'versicolor' 'virginica']
Number of Features: 4
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal
width (cm)']

from sklearn.model_selection import train_test_split


X = df[feature_names]
Y = df['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
random_state=0, train_size=0.7, test_size=0.3)
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)
print(X_train.head(3))
print(X_test.head(3))

(105, 4) (105,)
(45, 4) (45,)
sepal length (cm) sepal width (cm) petal length (cm) petal
width (cm)
60 5.0 2.0 3.5
1.0
116 6.5 3.0 5.5
1.8
144 6.7 3.3 5.7
2.5
sepal length (cm) sepal width (cm) petal length (cm) petal
width (cm)
114 5.8 2.8 5.1
2.4
62 6.0 2.2 4.0
1.0
33 5.5 4.2 1.4
0.2

from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
print(X_train[0:3,:])

[[-1.02366372 -2.37846268 -0.18295039 -0.29318114]


[ 0.69517462 -0.10190314 0.93066067 0.7372463 ]
[ 0.92435306 0.58106472 1.04202177 1.63887031]]

from sklearn.tree import DecisionTreeClassifier


clf = DecisionTreeClassifier(max_depth = 2, random_state = 0)
clf.fit(X_train, Y_train)

DecisionTreeClassifier(max_depth=2, random_state=0)

print('Input: ' + str(X_test[0:9]))


print('Output: ' + str(clf.predict(X_test[0:9,:])))
print('True value: ' + str(Y_test[0:9].values))

Input: [[ 0.10652036 -0.6802299 0.90797473 1.77175914]


[ 0.39703042 -2.10396689 0.24507283 -0.18650096]
[-0.32924474 2.64182309 -1.32178623 -1.30550673]
[ 2.28534586 -0.4429404 1.63114045 0.93250481]
[-1.05551991 0.7435071 -1.26152242 -1.30550673]
[ 0.83279553 0.5062176 1.45034902 1.91163486]
[-1.05551991 0.9807966 -1.38205004 -1.16563101]
[ 1.41381566 0.0316386 0.66691949 0.51287764]
[ 1.55907069 -0.6802299 0.7271833 0.37300192]]
Output: [2 1 0 2 0 2 0 2 2]
True value: [2 1 0 2 0 2 0 1 1]

from sklearn.metrics import confusion_matrix


Y_predict = clf.predict(X_test)
Y_true = Y_test.values
print(confusion_matrix(Y_true, Y_predict))

[[16 0 0]
[ 0 13 5]
[ 0 0 11]]

from sklearn.metrics import accuracy_score, f1_score


accuracy = accuracy_score(Y_true, Y_predict)
f1 = f1_score(Y_true, Y_predict, average=None)
print(accuracy, f1)

0.8888888888888888 [1. 0.83870968 0.81481481]

#Visualizing using sklearn built-in tools


#required scklearn version >=0.20
import sklearn
if sklearn.__version__<'0.20.0':
raise Exception('Error: required scklearn version >=0.20')
from sklearn import tree
text_representation = tree.export_text(clf)
print(text_representation)
tree.plot_tree(clf,
feature_names = feature_names,
class_names=labels,
filled = True);

|--- feature_3 <= -0.62


| |--- class: 0
|--- feature_3 > -0.62
| |--- feature_2 <= 0.62
| | |--- class: 1
| |--- feature_2 > 0.62
| | |--- class: 2
#using graphviz
import graphviz
#cai dat graphviz va python-graphviz
from sklearn import tree
dot_data = tree.export_graphviz(clf, out_file=None, feature_names =
iris.feature_names, class_names = iris.target_names, filled = True,
rounded = True, special_characters = True)
graph = graphviz.Source(dot_data)
#graph.render("iris")
graph

You might also like