MMDS Da3

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 8

School of Computer Science and Engineering

CSE3045- Mathematical Modelling for Data Science


Semester: Fall 2022-23
Slot: L21+L22

LAB Activity-3

Faculty: Dr. Arup Ghosh

1) Implement the Stochastic Gradient Descent Algorithm for


Linear Regression using Python and Test it for some randomly
generated datasets.

Code
import warnings
warnings.filterwarnings("ignore")
from sklearn.datasets import load_boston
from sklearn import preprocessing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prettytable import PrettyTable
from sklearn.linear_model import SGDRegressor
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from numpy import random
from sklearn.model_selection import train_test_split
print("DONE")

# Linear Regression on Boston Housing data


boston_data=pd.DataFrame(load_boston().data,columns=load_boston().feature_names)
Y=load_boston().target
X=load_boston().data
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3)

# standardizing data
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test=scaler.transform(x_test)

## Adding the PRICE Column in the data


train_data=pd.DataFrame(x_train)
train_data['price']=y_train
train_data.head(3)

# implemented SGD Classifier


def GradientDescentRegressor(train_data,learning_rate=0.001,n_itr=1000,k=10):
w_cur=np.zeros(shape=(1,train_data.shape[1]-1))
b_cur=0
cur_itr=1
while(cur_itr<=n_itr):
w_old=w_cur
b_old=b_cur
w_temp=np.zeros(shape=(1,train_data.shape[1]-1))
b_temp=0
temp=train_data.sample(k)
#print(temp.head(3))
y=np.array(temp['price'])
x=np.array(temp.drop('price',axis=1))
for i in range(k):
w_temp+=x[i]*(y[i]-(np.dot(w_old,x[i])+b_old))*(-2/k)
b_temp+=(y[i]-(np.dot(w_old,x[i])+b_old))*(-2/k)
w_cur=w_old-learning_rate*w_temp
b_cur=b_old-learning_rate*b_temp
if(w_old==w_cur).all():
break
cur_itr+=1
return w_cur,b_cur
def predict(x,w,b):
y_pred=[]
for i in range(len(x)):
y=np.asscalar(np.dot(w,x[i])+b)
y_pred.append(y)
return np.array(y_pred)

def plot_(test_data,y_pred):
#scatter plot
plt.scatter(test_data,y_pred)
plt.grid()
plt.title('scatter plot between actual y and predicted y')
plt.xlabel('actual y')
plt.ylabel('predicted y')
plt.show()

w,b = GradientDescentRegressor(train_data,learning_rate=0.001,n_itr=1000)
y_pred_sgd=predict(x_test,w,b)

plot_(y_test,y_pred_sgd)
print('Mean Squared Error :',mean_squared_error(y_test, y_pred_sgd))
OUTPUT:

learning_rate=0.001, n_itr=1000

Mean Squared Error : 37.646704089438025

On Changing learning rate to 0.01 i.e., 1%.


learning_rate=0.01,

n_itr=1000

Mean Squared Error : 23.285739328791426

SCREENSHOT OF CODE and OUTPUT:


Implementing Stochastic Gradient Descent using SKLEARN library:
# SkLearn SGD classifier
n_iter=100
clf_ = SGDRegressor(max_iter=n_iter)
clf_.fit(x_train, y_train)
y_pred_sksgd=clf_.predict(x_test)
plt.scatter(y_test,y_pred_sksgd)
plt.grid()
plt.xlabel('Actual y')
plt.ylabel('Predicted y')
plt.title('Scatter plot from actual y and predicted y')
plt.show()

print('Mean Squared Error :',mean_squared_error(y_test, y_pred_sksgd))

# SkLearn SGD classifier predicted weight matrix


sklearn_w=clf_.coef_
sklearn_w
Comparing Both Methods:

You might also like