Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

3/12/24, 6:46 PM MlYtLec8.2.

ipynb - Colaboratory

keyboard_arrow_down Question from the video


from sklearn.datasets import load_digits # sklearn has some predefined datasets
import matplotlib.pyplot as plt
%matplotlib inline

dig = load_digits()
dir(dig) # dir() shows all the attributes of the object

output ['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

dig.data[0] # the value of each instance is 1D array with 64 values (8x8)

array([ 0., 0., 5., 13., 9., 1., 0., 0., 0., 0., 13., 15., 10.,
15., 5., 0., 0., 3., 15., 2., 0., 11., 8., 0., 0., 4.,
12., 0., 0., 8., 8., 0., 0., 5., 8., 0., 0., 9., 8.,
0., 0., 4., 11., 0., 1., 12., 7., 0., 0., 2., 14., 5.,
10., 12., 0., 0., 0., 0., 6., 13., 10., 0., 0., 0.])

for i in range (5):


plt.matshow(dig.images[i]) # matshow is used to represent an array as a matrix (8x8 matrix)

from sklearn.model_selection import train_test_split


from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

X_train, X_test, y_train, y_test = train_test_split(dig.data, dig.target, test_size=0.2)

lr.fit(X_train, y_train)

/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
n_iter_i = _check_optimize_result(
▾ LogisticRegression
LogisticRegression()

lr.predict(dig.data[0:5])

array([0, 1, 2, 3, 4])

lr.score(X_test, y_test)

0.9694444444444444

keyboard_arrow_down Confusion Matrix


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, lr.predict(X_test))
cm

array([[36, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 30, 1, 0, 1, 0, 0, 0, 0, 0],
[ 0, 0, 35, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 2, 31, 0, 1, 0, 0, 0, 0],
[ 0, 0, 0, 0, 33, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 42, 0, 1, 0, 1],
[ 0, 1, 0, 0, 0, 0, 40, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 31, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 35, 0],
[ 0, 0, 0, 1, 0, 1, 0, 0, 1, 36]])

https://colab.research.google.com/drive/1sFsAjOuU9oqkLgQZg3JOntcAwkZf_Nok#scrollTo=n54rzhT2zU8I&printMode=true 1/2
3/12/24, 6:46 PM MlYtLec8.2.ipynb - Colaboratory
import seaborn as sn
plt.figure(figsize = (10,7)) # specifies the width and the height of each cell
sn.heatmap(cm, annot=True) # annot=True indicates that the number will be mentioned inside the cell
plt.xlabel("Predicted")
plt.ylabel("True Value")

Text(95.72222222222221, 0.5, 'True Value')

keyboard_arrow_down Exercise
from sklearn.datasets import load_iris
ir = load_iris()

dir(ir)

['DESCR',
'data',
'data_module',
'feature_names',
'filename',
'frame',
'target',
'target_names']

ir.feature_names

['sepal length (cm)',


'sepal width (cm)',
'petal length (cm)',
'petal width (cm)']

ir.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

ir.data[0]

array([5.1, 3.5, 1.4, 0.2])

from sklearn.model_selection import train_test_split


from sklearn.linear_model import LogisticRegression
lr1 = LogisticRegression()

X_train, X_test, y_train, y_test = train_test_split(ir.data, ir.target, test_size=0.2)

lr1.fit(X_train, y_train)

▾ LogisticRegression
LogisticRegression()

lr1.predict(X_test)

array([2, 0, 1, 0, 2, 2, 0, 1, 2, 1, 2, 0, 0, 2, 2, 1, 0, 2, 2, 0, 0, 2,
2, 1, 1, 1, 0, 0, 0, 1])

lr1.score(X_test, y_test)

0.9666666666666667

https://colab.research.google.com/drive/1sFsAjOuU9oqkLgQZg3JOntcAwkZf_Nok#scrollTo=n54rzhT2zU8I&printMode=true 2/2

You might also like