Professional Documents
Culture Documents
Analysis of Factors Behind Rise of Home Prices Across US
Analysis of Factors Behind Rise of Home Prices Across US
In [3]: df.head()
1987-
0 63.965 241857 6.0 99.902813 9.2040 70.163085 1690.0
01-01
1987-
1 64.424 242005 6.2 99.875864 9.0825 70.289205 1689.0
02-01
1987-
2 64.735 242166 6.0 99.869734 9.0350 70.321678 1704.0
03-01
1987-
3 65.132 242338 6.0 99.882087 9.8325 70.499062 1601.0
04-01
1987-
4 65.565 242516 6.7 99.910371 10.5960 70.808308 1500.0
05-01
In [4]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 425 entries, 0 to 424
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 425 non-null object
1 house_price_index 425 non-null float64
2 population 425 non-null int64
3 house_supply 425 non-null float64
4 gdp 422 non-null float64
5 mortgage_rate 422 non-null float64
6 employment_rate 422 non-null float64
7 permit_new 422 non-null float64
8 ppi_res 422 non-null float64
9 m3 422 non-null float64
10 cci 422 non-null float64
11 delinquency_rate 374 non-null float64
12 hcai 285 non-null float64
dtypes: float64(11), int64(1), object(1)
memory usage: 43.3+ KB
In [5]: df.shape
(425, 13)
Out[5]:
In [6]: df.describe()
Loading [MathJax]/extensions/Safe.js
Out[6]: house_price_index population house_supply gdp mortgage_rate employment_rate permit_n
In [8]: df['Date']=pd.to_datetime(df['Date'],format='%Y/%m/%d')
1987-
0 63.965 241857 6.0 99.902813 9.2040 70.163085 1690
01-01
1987-
1 64.424 242005 6.2 99.875864 9.0825 70.289205 1689
02-01
1987-
2 64.735 242166 6.0 99.869734 9.0350 70.321678 1704
03-01
1987-
3 65.132 242338 6.0 99.882087 9.8325 70.499062 1601
04-01
1987-
4 65.565 242516 6.7 99.910371 10.5960 70.808308 1500
05-01
1987-
5 66.073 242706 6.9 99.954091 10.5375 70.585708 1522
06-01
1987-
6 66.508 242908 6.7 100.016021 10.2780 70.793518 1516
07-01
1987-
7 66.939 243118 6.8 100.098404 10.3300 70.999089 1511
08-01
1987-
8 67.331 243335 6.8 100.197931 10.8875 70.915448 1514
09-01
1987-
9 67.738 243543 6.8 100.299312 11.2600 71.106111 1447
10-01
In [7]: df.isnull()
Loading [MathJax]/extensions/Safe.js
Out[7]: Date house_price_index population house_supply gdp mortgage_rate employment_rate permit_new p
In [10]: df.isnull().sum()
Date 0
Out[10]:
house_price_index 0
population 0
house_supply 0
gdp 3
mortgage_rate 3
employment_rate 3
permit_new 3
ppi_res 3
m3 3
cci 3
delinquency_rate 51
hcai 140
DAY 0
MONTH 0
YEAR 0
dtype: int64
In [11]: df['delinquency_rate'].fillna(df['delinquency_rate'].median(),inplace=True)
df['hcai'].fillna(df['hcai'].median(),inplace=True)
In [13]: df.isnull().sum()
Loading [MathJax]/extensions/Safe.js
Date 0
Out[13]:
house_price_index 0
population 0
house_supply 0
gdp 0
mortgage_rate 0
employment_rate 0
permit_new 0
ppi_res 0
m3 0
cci 0
delinquency_rate 0
hcai 0
DAY 0
MONTH 0
YEAR 0
dtype: int64
In [14]: df.columns
Data Visualiztion
In [16]: for column in cols:
sns.distplot(cols[column])
plt.show()
Loading [MathJax]/extensions/Safe.js
Loading [MathJax]/extensions/Safe.js
Loading [MathJax]/extensions/Safe.js
Loading [MathJax]/extensions/Safe.js
Loading [MathJax]/extensions/Safe.js
Loading [MathJax]/extensions/Safe.js
In [17]: plt_1 = plt.figure(figsize=(10,10))
plt.xticks(rotation=90)
sns.heatmap(df.corr(),annot=True)
<AxesSubplot:>
Out[17]:
Loading [MathJax]/extensions/Safe.js
In [18]: data_set = df[['DAY', 'house_price_index']]
sns.scatterplot(x='DAY', y='house_price_index', data=data_set, color='purple')
<AxesSubplot:xlabel='DAY', ylabel='house_price_index'>
Out[18]:
Loading [MathJax]/extensions/Safe.js
In [19]: data_set = df[['YEAR', 'house_price_index']]
sns.scatterplot(x='YEAR', y='house_price_index', data=data_set, color='orange')
<AxesSubplot:xlabel='YEAR', ylabel='house_price_index'>
Out[19]:
<AxesSubplot:xlabel='MONTH', ylabel='house_price_index'>
Out[20]:
Loading [MathJax]/extensions/Safe.js
Ananlysis on a sample
In [21]: df1=df.sample(20)
df1
Loading [MathJax]/extensions/Safe.js
Out[21]: Date house_price_index population house_supply gdp mortgage_rate employment_rate permit_n
2003-
193 129.355 289606 4.5 98.375113 5.8425 71.520526 185
02-01
2017-
370 195.866 327699 4.8 100.113491 3.9220 70.280019 129
11-01
1988-
15 69.977 244528 6.4 100.590991 10.2020 71.529216 142
04-01
1996-
108 81.835 268258 6.4 99.256632 7.0300 72.337947 138
01-01
2017-
365 190.522 326743 5.3 99.752263 3.9040 70.103544 134
06-01
2004-
208 148.185 292872 3.8 99.683189 6.2700 71.131627 215
05-01
2002-
186 122.888 288051 4.2 98.940396 6.4850 71.748101 173
07-01
2000-
160 103.677 281996 4.4 101.736906 8.5150 74.079531 154
05-01
1996-
116 83.258 270433 5.2 99.628047 8.2300 73.168792 139
09-01
1996-
112 82.611 269247 5.9 99.524263 8.0700 72.710103 145
05-01
1989-
33 76.283 248174 6.9 101.280035 9.9475 72.569040 136
10-01
2002-
188 124.780 288554 3.9 98.726530 6.0925 72.076044 180
09-01
2021-
418 276.429 332598 6.2 100.094880 3.0675 70.587317 172
11-01
1993-
82 78.149 261550 4.8 99.680219 7.1550 71.460550 135
11-01
2021-
414 263.349 332192 6.0 99.493405 2.8680 69.664968 165
07-01
1988-
20 72.240 245579 6.5 100.831870 10.4800 71.812109 143
09-01
1990-
39 77.278 249436 8.3 101.290583 10.3700 72.323204 113
04-01
2009-
272 148.023 307826 7.8 98.070172 5.0575 66.881083 60
09-01
1994-
91 79.782 263871 6.1 100.317324 8.5125 72.097205 137
08-01
2013-
319 156.973 317397 5.5 99.928057 4.4560 67.419391 96
08-01
In [22]: plt.figure(figsize=(10,8))
sns.stripplot(data=df1,x='MONTH',y='house_supply')
plt.grid()
Loading [MathJax]/extensions/Safe.js
In [23]: sns.lineplot(data=df1,x='gdp',y='mortgage_rate')
plt.grid()
plt.show()
Loading [MathJax]/extensions/Safe.js
In [24]: sns.barplot(data=df1,x='house_supply',y='employment_rate')
plt.grid()
plt.show()
Splitting
In [25]: X = df.drop(['house_price_index','Date','DAY','MONTH','YEAR'],axis=1).values
X
Loading [MathJax]/extensions/Safe.js
array([[2.41857000e+05, 6.00000000e+00, 9.99028134e+01, ...,
Out[25]:
1.00462400e+02, 2.47000000e+00, 5.95800000e+00],
[2.42005000e+05, 6.20000000e+00, 9.98758644e+01, ...,
1.00494500e+02, 2.47000000e+00, 5.95800000e+00],
[2.42166000e+05, 6.00000000e+00, 9.98697339e+01, ...,
1.00572000e+02, 2.47000000e+00, 5.95800000e+00],
...,
[3.32640000e+05, 5.60000000e+00, 1.00120622e+02, ...,
9.77348900e+01, 2.33000000e+00, 5.95800000e+00],
[3.32684000e+05, 5.70000000e+00, 1.00091744e+02, ...,
9.74946700e+01, 2.13000000e+00, 5.95800000e+00],
[3.32750000e+05, 6.00000000e+00, 1.00034014e+02, ...,
9.71899600e+01, 2.13000000e+00, 5.95800000e+00]])
In [26]: y = df['house_price_index'].values
y
Loading [MathJax]/extensions/Safe.js
array([ 63.965, 64.424, 64.735, 65.132, 65.565, 66.073, 66.508,
Out[26]:
66.939, 67.331, 67.738, 68.107, 68.506, 68.859, 69.263,
69.639, 69.977, 70.426, 70.888, 71.354, 71.799, 72.24 ,
72.636, 73.072, 73.465, 73.947, 74.383, 74.778, 75.085,
75.306, 75.48 , 75.658, 75.836, 76.057, 76.283, 76.521,
76.705, 76.897, 77.053, 77.201, 77.278, 77.298, 77.258,
77.138, 77.009, 76.85 , 76.7 , 76.37 , 76.185, 75.916,
75.735, 75.57 , 75.567, 75.765, 75.993, 76.083, 76.11 ,
76.194, 76.075, 76.014, 76.056, 76.086, 76.155, 76.276,
76.346, 76.399, 76.332, 76.264, 76.23 , 76.239, 76.377,
76.559, 76.674, 76.784, 76.838, 76.868, 76.937, 77.037,
77.243, 77.429, 77.613, 77.795, 77.942, 78.149, 78.327,
78.592, 78.727, 78.857, 78.988, 79.223, 79.424, 79.596,
79.782, 79.919, 80.065, 80.15 , 80.297, 80.427, 80.529,
80.599, 80.661, 80.705, 80.786, 80.937, 81.111, 81.307,
81.483, 81.62 , 81.737, 81.835, 81.954, 82.194, 82.422,
82.611, 82.753, 82.929, 83.087, 83.258, 83.378, 83.553,
83.722, 83.956, 84.181, 84.453, 84.624, 84.862, 85.081,
85.333, 85.574, 85.851, 86.148, 86.633, 87.094, 87.616,
88.004, 88.443, 88.879, 89.365, 89.845, 90.311, 90.786,
91.26 , 91.719, 92.201, 92.713, 93.209, 93.672, 94.218,
94.785, 95.345, 95.976, 96.593, 97.221, 97.864, 98.524,
99.155, 99.846, 100.552, 101.339, 102.127, 102.922, 103.677,
104.424, 105.054, 105.767, 106.537, 107.382, 108.302, 109.14 ,
109.846, 110.5 , 111.108, 111.651, 112.163, 112.796, 113.491,
114.166, 114.811, 115.308, 115.855, 116.453, 117.143, 117.844,
118.687, 119.611, 120.724, 121.813, 122.888, 123.831, 124.78 ,
125.734, 126.669, 127.622, 128.461, 129.355, 130.148, 130.884,
131.735, 132.649, 133.776, 134.968, 136.294, 137.532, 138.794,
140.18 , 141.646, 143.192, 145.059, 146.592, 148.185, 149.85 ,
151.338, 152.633, 154.179, 155.751, 157.527, 159.33 , 161.288,
163.344, 165.812, 167.501, 169.351, 171.19 , 172.86 , 174.44 ,
176.437, 178.027, 179.681, 180.91 , 182.32 , 183.287, 184.364,
184.329, 184.156, 183.507, 183.067, 182.593, 182.799, 183.2 ,
183.611, 184.141, 184.518, 184.599, 184.15 , 183.011, 181.601,
180.254, 179.111, 178.117, 177.558, 176.624, 175.147, 174.341,
173.133, 171.542, 170.054, 168.337, 166.658, 165.017, 163.567,
161.989, 160.309, 158.329, 156.142, 153.618, 151.504, 150.012,
148.659, 147.949, 147.696, 148.09 , 148.409, 148.276, 148.023,
147.848, 148.133, 147.929, 147.395, 145.632, 145.86 , 146.401,
146.391, 145.717, 144.988, 143.912, 143.015, 142.525, 142.169,
142.061, 141.526, 140.356, 139.987, 140.011, 139.904, 139.86 ,
139.727, 139.306, 138.667, 137.954, 137.154, 136.676, 136.607,
136.529, 137.903, 139.155, 140.156, 141.029, 141.667, 142.277,
142.907, 143.6 , 144.585, 145.501, 146.835, 147.784, 149.965,
151.521, 152.854, 154.194, 155.606, 156.973, 158.234, 159.247,
160.075, 160.997, 161.948, 162.53 , 163.093, 163.4 , 163.666,
164.04 , 164.574, 165.219, 165.909, 166.646, 167.339, 168.058,
168.663, 169.138, 169.812, 170.312, 170.894, 171.437, 172.131,
172.948, 173.84 , 174.802, 175.747, 176.553, 177.302, 177.667,
178.193, 178.8 , 179.46 , 180.057, 180.848, 181.868, 182.837,
183.751, 184.74 , 185.69 , 186.793, 187.313, 188.032, 188.818,
189.707, 190.522, 191.499, 192.723, 193.786, 194.778, 195.866,
197.044, 198.201, 199.184, 200.038, 200.876, 201.646, 202.345,
203.015, 203.783, 204.366, 205.017, 205.464, 205.867, 206.266,
206.712, 207.202, 207.941, 208.58 , 208.947, 209.496, 210.231,
210.911, 211.598, 212.446, 213.434, 214.49 , 215.549, 216.602,
217.464, 217.689, 218.139, 219.702, 222.539, 225.793, 229.403,
232.673, 235.7 , 238.784, 241.845, 245.796, 250.094, 254.556,
259.249, 263.349, 267.028, 270.258, 273.154, 276.429, 280.19 ,
284.767, 290.371])
0.9880181997171065
Out[28]:
In [29]: print(model.predict([[269527,6.0,99.582313,8.3200,72.829141,1429.0,135.5,3.7225,100.7675
[85.9362418]
Loading [MathJax]/extensions/Safe.js