Download as pdf or txt
Download as pdf or txt
You are on page 1of 11

10/2/22, 11:51 PM saurabh_verma_9919102005.

ipynb - Colaboratory

#SAURABH VERMA 9919102005 E1

QUESTION 1: Write a program to train a model that represents a linear relationship between 

import numpy as np
import matplotlib.pyplot as plt 

import pandas as pd  
import seaborn as sns 

from sklearn.datasets import load_boston
boston_dataset = load_boston()
print(boston_dataset.keys())

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
boston.head()

CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B L

0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90

1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90

2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83

3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63

4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90

boston['MEDV'] = boston_dataset.target

X = pd.DataFrame(np.c_[boston['LSTAT'], boston['RM']], columns = ['LSTAT','RM'])

Y = boston['MEDV']

from sklearn.model_selection import train_test_split

# splits the training and test data set in 80% : 20%

# assign random_state to any value.This ensures consistency.

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)

print(X_train.shape)

print(X_test.shape)

print(Y_train.shape)

print(Y_test.shape)

(404, 2)

(102, 2)

(404,)

(102,)

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 1/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

lin_model = LinearRegression()

lin_model.fit(X_train, Y_train)

LinearRegression()

y_train_predict = lin_model.predict(X_train)

rmse = (np.sqrt(mean_squared_error(Y_train, y_train_predict)))

r2 = r2_score(Y_train, y_train_predict)

Q2. Write a program to train logistic regression as a classifier model on iris dataset.

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report

from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split

data = sns.load_dataset("iris")

data.head()

sepal_length sepal_width petal_length petal_width species

0 5.1 3.5 1.4 0.2 setosa

1 4.9 3.0 1.4 0.2 setosa

2 4.7 3.2 1.3 0.2 setosa

3 4.6 3.1 1.5 0.2 setosa

4 5.0 3.6 1.4 0.2 setosa

# X = feature values, all the columns except the last column

X = data.iloc[:, :-1]

# y = target values, last column of the data frame

y = data.iloc[:, -1]

plt.xlabel('Features')

plt.ylabel('Species')

pltX = data.loc[:, 'sepal_length']

pltY = data.loc[:,'species']

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 2/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

plt.scatter(pltX, pltY, color='blue', label='sepal_length')

pltX = data.loc[:, 'sepal_width']

pltY = data.loc[:,'species']

plt.scatter(pltX, pltY, color='green', label='sepal_width')

pltX = data.loc[:, 'petal_length']

pltY = data.loc[:,'species']

plt.scatter(pltX, pltY, color='red', label='petal_length')

pltX = data.loc[:, 'petal_width']

pltY = data.loc[:,'species']

plt.scatter(pltX, pltY, color='black', label='petal_width')

plt.legend(loc=4, prop={'size':8})

plt.show()

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()

model.fit(x_train, y_train) 

/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:818: Converg
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:

https://scikit-learn.org/stable/modules/preprocessing.html

Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression

extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,

LogisticRegression()

predictions = model.predict(x_test)

print(predictions)

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 3/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

print()

print( classification_report(y_test, predictions) )

print( accuracy_score(y_test, predictions))

['versicolor' 'setosa' 'virginica' 'versicolor' 'versicolor' 'setosa'

'versicolor' 'virginica' 'versicolor' 'versicolor' 'virginica' 'setosa'

'setosa' 'setosa' 'setosa' 'versicolor' 'virginica' 'versicolor'

'versicolor' 'virginica' 'setosa' 'virginica' 'setosa' 'virginica'

'virginica' 'virginica' 'virginica' 'virginica' 'setosa' 'setosa']

precision recall f1-score support

setosa 1.00 1.00 1.00 10

versicolor 1.00 1.00 1.00 9

virginica 1.00 1.00 1.00 11

accuracy 1.00 30

macro avg 1.00 1.00 1.00 30

weighted avg 1.00 1.00 1.00 30

1.0

Q5. Write a program to train a Naïve bayes classifier for continuous features, for ex Iris data.

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

dataset = pd.read_csv('https://raw.githubusercontent.com/mk-gurucharan/Classification/mast
X = dataset.iloc[:,:4].values

y = dataset['species'].values

dataset.head(5)

sepal_length sepal_width petal_length petal_width species

0 5.1 3.5 1.4 0.2 setosa

1 4.9 3.0 1.4 0.2 setosa

2 4.7 3.2 1.3 0.2 setosa

3 4.6 3.1 1.5 0.2 setosa

4 5.0 3.6 1.4 0.2 setosa

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 4/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

from sklearn.naive_bayes import GaussianNB

classifier = GaussianNB()

classifier.fit(X_train, y_train)

GaussianNB()

y_pred = classifier.predict(X_test) 

y_pred

array(['virginica', 'virginica', 'virginica', 'setosa', 'setosa',

'setosa', 'virginica', 'setosa', 'versicolor', 'versicolor',

'virginica', 'versicolor', 'setosa', 'setosa', 'virginica',

'versicolor', 'versicolor', 'versicolor', 'versicolor',

'versicolor', 'setosa', 'virginica', 'versicolor', 'setosa',

'setosa', 'versicolor', 'virginica', 'virginica', 'setosa',

'virginica'], dtype='<U10')

df = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})

df

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 5/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

Real Values Predicted Values

0 virginica virginica

1 virginica virginica

2 virginica virginica

3 setosa setosa

4 setosa setosa

5 setosa setosa

6 virginica virginica

7 setosa setosa

8 versicolor versicolor

9 versicolor versicolor

10 virginica virginica
Q6. Write
11
a program to train a Naïve
versicolor
bayes classifier for discrete features.
versicolor

12 setosa setosa
from sklearn.datasets import fetch_20newsgroups

13 setosa setosa
data = fetch_20newsgroups()

14 virginica virginica
data.target_names

15 versicolor versicolor
['alt.atheism',

16 versicolor
'comp.graphics',
versicolor
'comp.os.ms-windows.misc',

17 versicolor versicolor
'comp.sys.ibm.pc.hardware',

'comp.sys.mac.hardware',

18 versicolor versicolor
'comp.windows.x',

'misc.forsale',

19 versicolor versicolor
'rec.autos',

'rec.motorcycles',

20 setosa setosa
'rec.sport.baseball',

21 virginica
'rec.sport.hockey',
virginica
'sci.crypt',

22 versicolor
'sci.electronics',
versicolor
'sci.med',

23 setosa setosa
'sci.space',

'soc.religion.christian',

24 setosa setosa
'talk.politics.guns',

'talk.politics.mideast',

25 versicolor versicolor
'talk.politics.misc',

26 versicolor
'talk.religion.misc'] virginica

27 virginica virginica
categories = ['talk.politics.misc', 'talk.religion.misc', 'sci.med', 'sci.space', 'rec.aut
28 setosa setosa

train = fetch_20newsgroups(subset='train', categories=categories)

29 virginica virginica
test = fetch_20newsgroups(subset='test', categories=categories)

print(train.data[1])

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 6/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

From: kxgst1@pitt.edu (Kenneth Gilbert)

Subject: Re: Can't Breathe

Article-I.D.: blue.7936

Lines: 23

X-Newsreader: TIN [version 1.1 PL8]

David Nye (nyeda@cnsvax.uwec.edu) wrote:

: [reply to ron.roth@rose.com (ron roth)]

: >While you're right that the S vertebrae are attached to each other,

: >the sacrum, to my knowledge, *can* be adjusted either directly, or

: >by applying pressure on the pubic bone...

: Ron, you're an endless source of misinformation! There ARE no sacral

: vertebrae. There is a bone called the sacrum at the end of the spine.

: It is a single, solid bone except in a few patients who have a

: lumbarized S1 as a normal variant. How do you adjust a solid bone,

: break it? No, don't tell me, I don't want to know.

Oh come now, surely you know he only meant to measure the flow of

electromagnetic energy about the sacrum and then adjust these flows

with a crystal of chromium applied to the right great toe. Don't

you know anything?

--

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-|-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

= Kenneth Gilbert __|__ University of Pittsburgh =

= General Internal Medicine | "...dammit, not a programmer! =

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-|-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn.pipeline import make_pipeline

model = make_pipeline(TfidfVectorizer(), MultinomialNB(alpha=1))

model.fit(train.data, train.target)

labels = model.predict(test.data)

from sklearn.metrics import confusion_matrix

import seaborn as sns

import matplotlib.pyplot as plt

conf_mat = confusion_matrix(test.target, labels, normalize="true")

sns.heatmap(conf_mat.T, annot=True, fmt=".0%", cmap="cividis", xticklabels=train.target_na
plt.xlabel("True label")

plt.ylabel("Predicted label")

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 7/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory

Text(32.99999999999999, 0.5, 'Predicted label')

Colab paid products


-
Cancel contracts here

https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 8/8

You might also like