Professional Documents
Culture Documents
Importing Necessary Libraries
Importing Necessary Libraries
Importing Necessary Libraries
dataset = calories_consumed.csv
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
wg_cc=pd.read_csv("C:\\Users\\Raja\\Downloads\\assignments\\simple linear\\calories_con
sumed.csv")
EDA
In [3]:
wg_cc.head()
Out[3]:
wg cc
0 108 1500
1 200 2300
2 900 3400
3 200 2200
4 300 2500
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 1/29
1/16/23, 5:38 PM simple linear
In [4]:
wg_cc.corr()
Out[4]:
wg cc
wg 1.000000 0.946991
cc 0.946991 1.000000
In [5]:
Out[5]:
In [6]:
wg_cc.describe()
Out[6]:
wg cc
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 2/29
1/16/23, 5:38 PM simple linear
In [7]:
In [8]:
model=smf.ols("wg~cc",data=wg_cc).fit()
In [9]:
model.params
Out[9]:
Intercept -625.752356
cc 0.420157
dtype: float64
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 3/29
1/16/23, 5:38 PM simple linear
In [10]:
model.summary()
C:\Users\Raja\Anaconda33\lib\site-packages\scipy\stats\stats.py:1394: User
Warning: kurtosistest only valid for n>=20 ... continuing anyway, n=14
"anyway, n=%i" % int(n))
Out[10]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.28e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
In [11]:
Out[11]:
0 1
cc 0.330506 0.509807
In [12]:
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 4/29
1/16/23, 5:38 PM simple linear
In [13]:
Out[13]:
In [14]:
pred.corr(wg_cc.wg) # 0.81
Out[14]:
0.9469910088554457
In [15]:
model1 = smf.ols('wg~np.log(cc)',data=wg_cc).fit()
In [16]:
model1.params
Out[16]:
Intercept -6955.650125
np.log(cc) 948.371723
dtype: float64
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 5/29
1/16/23, 5:38 PM simple linear
In [17]:
model1.summary()
C:\Users\Raja\Anaconda33\lib\site-packages\scipy\stats\stats.py:1394: User
Warning: kurtosistest only valid for n>=20 ... continuing anyway, n=14
"anyway, n=%i" % int(n))
Out[17]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [18]:
model1.conf_int(0.01)
Out[18]:
0 1
In [19]:
pred1 = model1.predict(wg_cc)
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 6/29
1/16/23, 5:38 PM simple linear
In [20]:
pred1.corr(wg_cc.wg)
Out[20]:
0.8987252805287711
Model having highest R-Squared value is better i.e. (model=0.897 is better than
model1=0.808). There has good relationship>0.85
====================================================================================
dataset = delivery_time.csv
In [21]:
dt_st=pd.read_csv("C:\\Users\\Raja\\Downloads\\assignments\\simple linear\\delivery_tim
e.csv")
EDA
In [22]:
dt_st.head()
Out[22]:
dt st
0 21.00 10
1 13.50 4
2 19.75 6
3 24.00 9
4 29.00 10
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 7/29
1/16/23, 5:38 PM simple linear
In [23]:
dt_st.corr()
Out[23]:
dt st
dt 1.000000 0.825997
st 0.825997 1.000000
In [24]:
Out[24]:
Text(0,0.5,'Delivery time')
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 8/29
1/16/23, 5:38 PM simple linear
In [25]:
plt.boxplot(dt_st.dt)
Out[25]:
In [26]:
plt.hist(dt_st.dt, bins=5)
Out[26]:
In [27]:
model2=smf.ols("dt~st",data=dt_st).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 9/29
1/16/23, 5:38 PM simple linear
In [28]:
model2.params
Out[28]:
Intercept 6.582734
st 1.649020
dtype: float64
In [29]:
model2.summary()
Out[29]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [30]:
model3=smf.ols("dt~np.log(st)",data=dt_st).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 10/29
1/16/23, 5:38 PM simple linear
In [31]:
model3.params
Out[31]:
Intercept 1.159684
np.log(st) 9.043413
dtype: float64
In [32]:
model3.summary()
Out[32]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [33]:
Out[33]:
0 1
st 1.108673 2.189367
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 11/29
1/16/23, 5:38 PM simple linear
In [34]:
Out[34]:
0 1
In [35]:
In [36]:
In [37]:
Out[37]:
Text(0,0.5,'Delivery time')
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 12/29
1/16/23, 5:38 PM simple linear
In [38]:
Out[38]:
Text(0,0.5,'Delivery time')
====================================================================================
dataset = emp_data.csv
In [39]:
sal_churn=pd.read_csv("C:\\Users\\Raja\\Downloads\\assignments\\simple linear\\emp_dat
a.csv")
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 13/29
1/16/23, 5:38 PM simple linear
EDA
In [40]:
sal_churn.head()
Out[40]:
Salary_hike Churn_out_rate
0 1580 92
1 1600 85
2 1610 80
3 1640 75
4 1660 72
In [41]:
sal_churn.corr()
Out[41]:
Salary_hike Churn_out_rate
In [42]:
Out[42]:
Text(0,0.5,'Churn_out_rate')
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 14/29
1/16/23, 5:38 PM simple linear
In [43]:
plt.hist(sal_churn.Salary_hike)
Out[43]:
(array([2., 1., 2., 1., 1., 1., 0., 1., 0., 1.]),
array([1580., 1609., 1638., 1667., 1696., 1725., 1754., 1783., 1812.,
1841., 1870.]),
<a list of 10 Patch objects>)
In [44]:
plt.hist(sal_churn.Churn_out_rate)
Out[44]:
(array([2., 1., 1., 2., 1., 0., 1., 1., 0., 1.]),
array([60. , 63.2, 66.4, 69.6, 72.8, 76. , 79.2, 82.4, 85.6, 88.8, 92.
]),
<a list of 10 Patch objects>)
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 15/29
1/16/23, 5:38 PM simple linear
In [45]:
sal_churn.describe()
Out[45]:
Salary_hike Churn_out_rate
In [46]:
model4=smf.ols("Churn_out_rate~Salary_hike",data=sal_churn).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 16/29
1/16/23, 5:38 PM simple linear
In [47]:
model4.summary()
C:\Users\Raja\Anaconda33\lib\site-packages\scipy\stats\stats.py:1394: User
Warning: kurtosistest only valid for n>=20 ... continuing anyway, n=10
"anyway, n=%i" % int(n))
Out[47]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.27e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [48]:
model4.params
Out[48]:
Intercept 244.364911
Salary_hike -0.101543
dtype: float64
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 17/29
1/16/23, 5:38 PM simple linear
In [49]:
Out[49]:
0 1
In [50]:
In [51]:
model5=smf.ols("Churn_out_rate~np.log(Salary_hike)",data=sal_churn).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 18/29
1/16/23, 5:38 PM simple linear
In [52]:
model5.summary()
C:\Users\Raja\Anaconda33\lib\site-packages\scipy\stats\stats.py:1394: User
Warning: kurtosistest only valid for n>=20 ... continuing anyway, n=10
"anyway, n=%i" % int(n))
Out[52]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.1e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
In [53]:
model5.params
Out[53]:
Intercept 1381.456193
np.log(Salary_hike) -176.109735
dtype: float64
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 19/29
1/16/23, 5:38 PM simple linear
In [54]:
Out[54]:
0 1
In [55]:
In [56]:
Out[56]:
Text(0,0.5,'Churn_out_rate')
Model having highest R-Squared value which is the log transformation - model5
====================================================================================
dataset = Salary_Data.csv
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 20/29
1/16/23, 5:38 PM simple linear
In [57]:
sal_hike=pd.read_csv("C:\\Users\\Raja\\Downloads\\assignments\\simple linear\\Salary_Da
ta.csv")
In [58]:
sal_hike.head()
Out[58]:
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
In [59]:
sal_hike.shape
Out[59]:
(30, 2)
In [60]:
sal_hike.describe()
Out[60]:
YearsExperience Salary
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 21/29
1/16/23, 5:38 PM simple linear
In [61]:
plt.boxplot(sal_hike.YearsExperience)
Out[61]:
In [62]:
plt.boxplot(sal_hike.Salary)
Out[62]:
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 22/29
1/16/23, 5:38 PM simple linear
In [63]:
sal_hike.corr()
Out[63]:
YearsExperience Salary
In [64]:
plt.hist(sal_hike.Salary, bins=20)
Out[64]:
(array([3., 1., 1., 1., 5., 2., 3., 1., 0., 0., 2., 0., 1., 1., 1., 1.,
2.,
2., 1., 2.]),
array([ 37731., 41964., 46197., 50430., 54663., 58896., 63129.,
67362., 71595., 75828., 80061., 84294., 88527., 92760.,
96993., 101226., 105459., 109692., 113925., 118158., 122391.]),
<a list of 20 Patch objects>)
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 23/29
1/16/23, 5:38 PM simple linear
In [65]:
Out[65]:
Text(0,0.5,'Salary')
In [66]:
model6=smf.ols("Salary~YearsExperience",data=sal_hike).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 24/29
1/16/23, 5:38 PM simple linear
In [67]:
model6.summary()
Out[67]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [68]:
model7=smf.ols("Salary~np.log(YearsExperience)",data=sal_hike).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 25/29
1/16/23, 5:38 PM simple linear
In [69]:
model7.summary()
Out[69]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [70]:
model8=smf.ols("Salary~np.exp(YearsExperience)",data=sal_hike).fit()
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 26/29
1/16/23, 5:38 PM simple linear
In [71]:
model8.summary()
Out[71]:
Df Model: 1
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [72]:
model6.params
Out[72]:
Intercept 25792.200199
YearsExperience 9449.962321
dtype: float64
In [73]:
model7.params
Out[73]:
Intercept 14927.97177
np.log(YearsExperience) 40581.98796
dtype: float64
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 27/29
1/16/23, 5:38 PM simple linear
In [74]:
Out[74]:
0 1
In [75]:
In [76]:
Out[76]:
Text(0,0.5,'Salary')
In [77]:
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 28/29
1/16/23, 5:38 PM simple linear
In [78]:
Out[78]:
Text(0,0.5,'Salary')
https://onlinefreetutorial.com/wp-content/uploads/2020/03/simple-linear.html 29/29