Professional Documents
Culture Documents
Saurabh Verma 9919102005
Saurabh Verma 9919102005
ipynb - Colaboratory
#SAURABH VERMA 9919102005 E1
QUESTION 1: Write a program to train a model that represents a linear relationship between
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_boston
boston_dataset = load_boston()
print(boston_dataset.keys())
boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
boston.head()
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90
boston['MEDV'] = boston_dataset.target
X = pd.DataFrame(np.c_[boston['LSTAT'], boston['RM']], columns = ['LSTAT','RM'])
Y = boston['MEDV']
from sklearn.model_selection import train_test_split
# splits the training and test data set in 80% : 20%
# assign random_state to any value.This ensures consistency.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
(404, 2)
(102, 2)
(404,)
(102,)
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 1/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
lin_model = LinearRegression()
lin_model.fit(X_train, Y_train)
LinearRegression()
y_train_predict = lin_model.predict(X_train)
rmse = (np.sqrt(mean_squared_error(Y_train, y_train_predict)))
r2 = r2_score(Y_train, y_train_predict)
Q2. Write a program to train logistic regression as a classifier model on iris dataset.
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
data = sns.load_dataset("iris")
data.head()
# X = feature values, all the columns except the last column
X = data.iloc[:, :-1]
# y = target values, last column of the data frame
y = data.iloc[:, -1]
plt.xlabel('Features')
plt.ylabel('Species')
pltX = data.loc[:, 'sepal_length']
pltY = data.loc[:,'species']
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 2/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
plt.scatter(pltX, pltY, color='blue', label='sepal_length')
pltX = data.loc[:, 'sepal_width']
pltY = data.loc[:,'species']
plt.scatter(pltX, pltY, color='green', label='sepal_width')
pltX = data.loc[:, 'petal_length']
pltY = data.loc[:,'species']
plt.scatter(pltX, pltY, color='red', label='petal_length')
pltX = data.loc[:, 'petal_width']
pltY = data.loc[:,'species']
plt.scatter(pltX, pltY, color='black', label='petal_width')
plt.legend(loc=4, prop={'size':8})
plt.show()
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(x_train, y_train)
/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:818: Converg
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,
LogisticRegression()
predictions = model.predict(x_test)
print(predictions)
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 3/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
print()
print( classification_report(y_test, predictions) )
print( accuracy_score(y_test, predictions))
accuracy 1.00 30
1.0
Q5. Write a program to train a Naïve bayes classifier for continuous features, for ex Iris data.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('https://raw.githubusercontent.com/mk-gurucharan/Classification/mast
X = dataset.iloc[:,:4].values
y = dataset['species'].values
dataset.head(5)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 4/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
GaussianNB()
y_pred = classifier.predict(X_test)
y_pred
'virginica'], dtype='<U10')
df = pd.DataFrame({'Real Values':y_test, 'Predicted Values':y_pred})
df
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 5/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
0 virginica virginica
1 virginica virginica
2 virginica virginica
3 setosa setosa
4 setosa setosa
5 setosa setosa
6 virginica virginica
7 setosa setosa
8 versicolor versicolor
9 versicolor versicolor
10 virginica virginica
Q6. Write
11
a program to train a Naïve
versicolor
bayes classifier for discrete features.
versicolor
12 setosa setosa
from sklearn.datasets import fetch_20newsgroups
13 setosa setosa
data = fetch_20newsgroups()
14 virginica virginica
data.target_names
15 versicolor versicolor
['alt.atheism',
16 versicolor
'comp.graphics',
versicolor
'comp.os.ms-windows.misc',
17 versicolor versicolor
'comp.sys.ibm.pc.hardware',
'comp.sys.mac.hardware',
18 versicolor versicolor
'comp.windows.x',
'misc.forsale',
19 versicolor versicolor
'rec.autos',
'rec.motorcycles',
20 setosa setosa
'rec.sport.baseball',
21 virginica
'rec.sport.hockey',
virginica
'sci.crypt',
22 versicolor
'sci.electronics',
versicolor
'sci.med',
23 setosa setosa
'sci.space',
'soc.religion.christian',
24 setosa setosa
'talk.politics.guns',
'talk.politics.mideast',
25 versicolor versicolor
'talk.politics.misc',
26 versicolor
'talk.religion.misc'] virginica
27 virginica virginica
categories = ['talk.politics.misc', 'talk.religion.misc', 'sci.med', 'sci.space', 'rec.aut
28 setosa setosa
train = fetch_20newsgroups(subset='train', categories=categories)
29 virginica virginica
test = fetch_20newsgroups(subset='test', categories=categories)
print(train.data[1])
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 6/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
Article-I.D.: blue.7936
Lines: 23
: >While you're right that the S vertebrae are attached to each other,
: vertebrae. There is a bone called the sacrum at the end of the spine.
Oh come now, surely you know he only meant to measure the flow of
electromagnetic energy about the sacrum and then adjust these flows
--
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-|-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-|-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
model = make_pipeline(TfidfVectorizer(), MultinomialNB(alpha=1))
model.fit(train.data, train.target)
labels = model.predict(test.data)
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
conf_mat = confusion_matrix(test.target, labels, normalize="true")
sns.heatmap(conf_mat.T, annot=True, fmt=".0%", cmap="cividis", xticklabels=train.target_na
plt.xlabel("True label")
plt.ylabel("Predicted label")
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 7/8
10/2/22, 11:51 PM saurabh_verma_9919102005.ipynb - Colaboratory
https://colab.research.google.com/drive/18Difbs9q2yl9CWl7PZ3TJsR04WHFAmH2#scrollTo=90326baf&printMode=true 8/8