Professional Documents
Culture Documents
Linear Regression Explained PDF
Linear Regression Explained PDF
Linear Regression Explained PDF
In [1]:
#Importing libraries
import pandas as pd
import pandas as pd
In [2]:
area=[2600,3000,3200,3600,4000]
price=[550000,565000,610000,680000,725000]
In [3]:
dict={"Area":area,"Price":price}
In [4]:
pro=pd.DataFrame(dict)
pro.head()
0 2600 550000
1 3000 565000
2 3200 610000
3 3600 680000
4 4000 725000
In [5]:
#Save DataFrame as csv
pro.to_csv(r"C:\Users\Chromsy\Downloads\homeprices.csv", index=False)
Equation of line
'y = mx + b'
m= Slope of line
b= Y-Intersect of line
Price = m*Area + b
where Price is Dependent and Area is Independent variable
In [6]:
df=pd.read_csv(r"C:\Users\Chromsy\Downloads\homeprices.csv")
df.head()
0 2600 550000
1 3000 565000
2 3200 610000
3 3600 680000
4 4000 725000
In [7]:
# Find cost of house of area 3300 and 5000 square feet
In [8]:
# Making scatter plot from giving values
plt.scatter(df.Area,df.Price, marker='+',color='g',s=100)
plt.ylabel('Price(USD$)', fontsize=20)
plt.show()
In [9]:
# Add linear Regression
reg = linear_model.LinearRegression()
reg.fit(df[['Area']],df.Price)
Out[9]: LinearRegression()
In [10]:
#Finding price for 3300 square feet
reg.predict([[3300]])
Out[10]: array([628715.75342466])
In [11]:
#Finding price for 5000 square feet
reg.predict([[5000]])
Out[11]: array([859554.79452055])
In [12]:
# This is m(slope of line) in y=m*area+b
m=reg.coef_
Out[12]: array([135.78767123])
In [13]:
# This is b(Y-intercept of line) in y=m*area+b
b=reg.intercept_
Out[13]: 180616.43835616432
In [14]:
# Calculate price manually
m*3300+b
Out[14]: array([628715.75342466])
In [15]:
# Finding Price of many homes at once by creating DataFrame
Area=pd.DataFrame({"Area":(1000,1500,2300,3540,4120,4560,5490,3460,4750,2300,9000,8600,7100)})
Area
Out[15]: Area
0 1000
1 1500
2 2300
3 3540
4 4120
5 4560
6 5490
7 3460
8 4750
9 2300
10 9000
11 8600
12 7100
In [16]:
# Putting DataFrame
h=reg.predict(Area)
1144708.90410959])
In [17]:
# Showing New Figured out Area and Price together
Area['Price']=h
Area
0 1000 3.164041e+05
1 1500 3.842979e+05
2 2300 4.929281e+05
3 3540 6.613048e+05
4 4120 7.400616e+05
5 4560 7.998082e+05
6 5490 9.260908e+05
7 3460 6.504418e+05
8 4750 8.256079e+05
9 2300 4.929281e+05
10 9000 1.402705e+06
11 8600 1.348390e+06
12 7100 1.144709e+06
In [18]:
# Showing actual Regression line with given data(+) and figured out(*)
plt.figure(figsize=(15,10))
plt.scatter(df.Area,df.Price, marker='+',color='g',s=100)
plt.plot(Area['Area'],h)
plt.scatter(Area['Area'],h, marker='*',color='red',s=100)
plt.ylabel('Price(USD$)',fontsize=20)
plt.show()