Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 7

Experiment No.

:1
Apply Linear Regression for given salary_experience dataset.
Program:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.read_csv('Salary_Data.csv')
df.head()
df.dtypes
df.describe()

fig=plt.figure(figsize=(12,5))
sns.barplot(x='YearsExperience',y='Salary',data=df)

x=df.iloc[:, :-1].values
y=df.iloc[:,1].values

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=1/3)
reg=LinearRegression()
reg.fit(x_train,y_train)

m=reg.coef_
b=reg.intercept_
print("Slope= ",m,"intercept= ",b)

reg.predict([[2]])
y_pred=reg.predict(x_test)

plt.scatter(x_train,y_train,color="red")
plt.plot(x_train,reg.predict(x_train),color="green")
plt.title("Salary VS Experience (Training Set)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()

plt.scatter(x_test,y_test,color="red")
plt.plot(x_train,reg.predict(x_train),color="green")
plt.title("Salary VS Experience (Test Set)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()

print("MAE : ",mean_absolute_error(y_test,y_pred))
print('Mean Squared Error ',mean_squared_error(y_test,y_pred))
print("Root Mean Squared Error : ",np.sqrt(mean_absolute_error(y_test,y_pred)))
print("r2 score ",r2_score(y_test,y_pred)*100)

Outputs:

YearsExperience float64
Salary float64
dtype: object
Slope= [9167.20090168] intercept= 26945.819306778125
array([45280.22111013])

MAE : 5187.450071786506
Mean Squared Error 35606781.265705414
Root Mean Squared Error : 72.02395484688762
r2 score 94.69806002252082
Experiment No.:2
Apply Linear Regression for Boston House Pricing Dataset.
Program:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import matplotlib.pyplot as plt
import seaborn as sns

boston=datasets.load_boston()
print(boston.feature_names)

df=pd.DataFrame(boston.data,columns=boston.feature_names)
df["target"]=boston.target
df.head()
df.dtypes
df.describe()

fig=plt.figure(figsize=(12,5))
sns.barplot(x='CRIM',y='target',data=df)

fig = plt.figure(figsize=(10,7))
sns.residplot(df.TAX, df.target, color='magenta')
plt.title('Residual PLOT', size=24)
plt.xlabel('Target', size=18)
plt.ylabel('TAX', size=18)

x=df.iloc[:, :-1].values
y=df['target'].values

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
reg=LinearRegression()
model=reg.fit(x_train,y_train)
m=reg.coef_
b=reg.intercept_
print("Slope = ",m,"Intercept = ",b)

y_pred=reg.predict(x_test)
print("MAE : ",mean_absolute_error(y_test,y_pred))
print('Mean Squared Error ',mean_squared_error(y_test,y_pred))
print("Root Mean Squared Error : ",np.sqrt(mean_absolute_error(y_test,y_pred)))
print("r2 score ",r2_score(y_test,y_pred)*100)

df2=pd.DataFrame(x_train)
xy=df2[0]
plt.scatter(xy,y_train,color="red")
plt.plot(x_train,reg.predict(x_train),color="green")
plt.title("CRIM VS TARGET(Training Set)")
plt.xlabel("CRIM")
plt.ylabel("Target")
plt.show()

Outputs:
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO' 'B' 'LSTAT']

CRIM float64
ZN float64
INDUS float64
CHAS float64
NOX float64
RM float64
AGE float64
DIS float64
RAD float64
TAX float64
PTRATIO float64
B float64
LSTAT float64
target float64
dtype: object
Slope = [-1.18410318e-01 4.47550643e-02 5.85674689e-03 2.34230117e+00
-1.61634024e+01 3.70135143e+00 -3.04553661e-03 -1.38664542e+00
2.43784171e-01 -1.09856157e-02 -1.04699133e+00 8.22014729e-03
-4.93642452e-01] Intercept = 38.138692713392714

MAE : 3.842810589450491
Mean Squared Error 33.4507089676911
Root Mean Squared Error : 1.9603087995136101
r2 score 58.92011519186442

You might also like