Import: Sys - Executable - M Pip Install

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 23

9/29/2020 L20-2055-Assignment1-Part2

In [1]: # If you want to install any missing packages, then uncomment the lines given
below and run them
# to ensure that you have all the dependencies you need to run the notebook.
import sys
!{sys.executable} -m pip install 'missingno'

Collecting missingno
Downloading missingno-0.4.2-py3-none-any.whl (9.7 kB)
Collecting seaborn
Downloading seaborn-0.11.0-py3-none-any.whl (283 kB)
|████████████████████████████████| 283 kB 4.4 MB/s eta 0:00:01
Requirement already satisfied: scipy in /srv/conda/envs/notebook/lib/python3.
6/site-packages (from missingno) (1.5.2)
Requirement already satisfied: numpy in /srv/conda/envs/notebook/lib/python3.
6/site-packages (from missingno) (1.19.1)
Requirement already satisfied: matplotlib in /srv/conda/envs/notebook/lib/pyt
hon3.6/site-packages (from missingno) (3.3.2)
Requirement already satisfied: pandas>=0.23 in /srv/conda/envs/notebook/lib/p
ython3.6/site-packages (from seaborn->missingno) (1.1.2)
Requirement already satisfied: python-dateutil>=2.1 in /srv/conda/envs/notebo
ok/lib/python3.6/site-packages (from matplotlib->missingno) (2.8.1)
Requirement already satisfied: kiwisolver>=1.0.1 in /srv/conda/envs/notebook/
lib/python3.6/site-packages (from matplotlib->missingno) (1.2.0)
Requirement already satisfied: pillow>=6.2.0 in /srv/conda/envs/notebook/lib/
python3.6/site-packages (from matplotlib->missingno) (7.2.0)
Requirement already satisfied: certifi>=2020.06.20 in /srv/conda/envs/noteboo
k/lib/python3.6/site-packages (from matplotlib->missingno) (2020.6.20)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /s
rv/conda/envs/notebook/lib/python3.6/site-packages (from matplotlib->missingn
o) (2.4.7)
Requirement already satisfied: cycler>=0.10 in /srv/conda/envs/notebook/lib/p
ython3.6/site-packages/cycler-0.10.0-py3.6.egg (from matplotlib->missingno)
(0.10.0)
Requirement already satisfied: pytz>=2017.2 in /srv/conda/envs/notebook/lib/p
ython3.6/site-packages (from pandas>=0.23->seaborn->missingno) (2020.1)
Requirement already satisfied: six>=1.5 in /srv/conda/envs/notebook/lib/pytho
n3.6/site-packages (from python-dateutil>=2.1->matplotlib->missingno) (1.15.
0)
Installing collected packages: seaborn, missingno
Successfully installed missingno-0.4.2 seaborn-0.11.0

In [2]: # Libraries
import pandas as pd
import numpy as np
import missingno as mno
import seaborn as sns
import matplotlib.pyplot as plt

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 1/23
9/29/2020 L20-2055-Assignment1-Part2

In [3]: # Load training data


data = pd.read_csv('train.csv')
data = data.drop(columns=['Unnamed: 0'])
data.head()

Out[3]:
surface_area agricultural_land forest_area armed_forces_total urban_pop_major_cities urb

0 120540.0 2.632839e+06 5.417843e+06 1379000.0 55.747169

1 752610.0 2.403039e+07 4.957554e+07 16500.0 16.890687

2 1396.0 3.000000e+03 8.000000e+01 NaN 18.390090

3 2758812.0 1.228845e+08 1.050943e+08 1518650.0 50.966885

4 340.0 1.100000e+04 1.699000e+04 NaN 5.311885

In [4]: # Dimensions of training data


data.shape

Out[4]: (362, 16)

In [5]: # Explore columns


data.columns

Out[5]: Index(['surface_area', 'agricultural_land', 'forest_area',


'armed_forces_total', 'urban_pop_major_cities',
'urban_pop_minor_cities', 'national_income', 'inflation_annual',
'inflation_monthly', 'inflation_weekly', 'mobile_subscriptions',
'internet_users', 'secure_internet_servers_total',
'improved_sanitation', 'women_parliament_seats_rate',
'life_expectancy'],
dtype='object')

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 2/23
9/29/2020 L20-2055-Assignment1-Part2

In [6]: # Description
data.describe()

Out[6]:
surface_area agricultural_land forest_area armed_forces_total urban_pop_major_cities

count 3.620000e+02 3.580000e+02 3.570000e+02 3.180000e+02 360.000000

mean 4.021884e+06 1.594881e+08 1.204151e+08 9.849864e+05 27.659456

std 1.234491e+07 4.964143e+08 3.796623e+08 2.994686e+06 20.512885

min 3.030000e+01 3.000000e+02 0.000000e+00 5.000000e+01 0.091444

25% 2.783000e+04 1.054198e+06 4.951445e+05 1.218000e+04 10.624625

50% 2.037745e+05 5.360256e+06 3.928535e+06 5.352500e+04 24.459439

75% 1.081610e+06 4.221935e+07 2.241297e+07 2.598000e+05 38.587177

max 1.343253e+08 5.067600e+09 4.132117e+09 2.720662e+07 92.409069

In [7]: # Check Datatypes


data.dtypes

Out[7]: surface_area float64


agricultural_land float64
forest_area float64
armed_forces_total float64
urban_pop_major_cities float64
urban_pop_minor_cities float64
national_income object
inflation_annual float64
inflation_monthly float64
inflation_weekly float64
mobile_subscriptions object
internet_users object
secure_internet_servers_total float64
improved_sanitation object
women_parliament_seats_rate object
life_expectancy float64
dtype: object

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 3/23
9/29/2020 L20-2055-Assignment1-Part2

In [8]: data.head

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 4/23
9/29/2020 L20-2055-Assignment1-Part2

Out[8]: <bound method NDFrame.head of surface_area agricultural_land forest_a


rea armed_forces_total \
0 120540.0 2.632839e+06 5.417843e+06 1379000.0
1 752610.0 2.403039e+07 4.957554e+07 16500.0
2 1396.0 3.000000e+03 8.000000e+01 NaN
3 2758812.0 1.228845e+08 1.050943e+08 1518650.0
4 340.0 1.100000e+04 1.699000e+04 NaN
.. ... ... ... ...
357 243610.0 1.736999e+07 3.131596e+06 159150.0
358 9831510.0 4.357576e+08 3.326943e+08 1433150.0
359 176220.0 1.446148e+07 1.811858e+06 25450.0
360 330972.0 1.160670e+07 1.549346e+07 522000.0
361 350.0 4.000000e+03 1.782000e+04 NaN

urban_pop_major_cities urban_pop_minor_cities national_income \


0 55.747169 4.688831 unknown
1 16.890687 23.136313 very low
2 18.390090 23.139910 unknown
3 50.966885 24.522427 high
4 5.311885 30.271115 unknown
.. ... ... ...
357 42.818424 39.273576 high
358 22.948363 58.328637 very high
359 92.409069 2.573931 medium high
360 27.411815 4.897185 low
361 32.291719 62.772281 unknown

inflation_annual inflation_monthly inflation_weekly \


0 NaN NaN NaN
1 NaN 0.581473 NaN
2 NaN NaN NaN
3 1.374906 NaN NaN
4 -0.044229 NaN NaN
.. ... ... ...
357 2.554547 NaN NaN
358 1.464833 NaN NaN
359 NaN 0.714595 NaN
360 NaN 0.549355 NaN
361 NaN NaN NaN

mobile_subscriptions internet_users \
0 less than 1 per person 0 per 1000 people
1 less than 1 per person 154 per 1000 people
2 more than 1 per person 90 per 100 people
3 more than 1 per person 76 per 100 people
4 more than 1 per person 350 per 1000 people
.. ... ...
357 more than 1 per person 90 per 100 people
358 less than 1 per person 84 per 100 people
359 more than 1 per person 58 per 100 people
360 more than 1 per person 44 per 100 people
361 unknown 45 per 100 people

secure_internet_servers_total improved_sanitation \
0 NaN high access
1 2.623624e+06 low access
2 1.656589e+09 no info
https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 5/23
9/29/2020 L20-2055-Assignment1-Part2

3 6.625072e+08 very high access


4 2.832808e+07 very high access
.. ... ...
357 1.193275e+09 very high access
358 1.304449e+09 very high access
359 7.511805e+07 very high access
360 8.159726e+06 medium access
361 4.010044e+08 very high access

women_parliament_seats_rate life_expectancy
0 [0%-25%) 69.494195
1 [0%-25%) 59.237366
2 unknown 81.300000
3 [25%-50%) 81.373197
4 [25%-50%) 73.193561
.. ... ...
357 [0%-25%) 80.956098
358 [0%-25%) 78.841463
359 [0%-25%) 76.836195
360 [0%-25%) 75.756488
361 unknown 79.624390

[362 rows x 16 columns]>

In [9]: # Check for any null or missing values


data.isnull().values.any()

Out[9]: True

In [10]: # Check missing values in each column of training data


data.isnull().sum()

Out[10]: surface_area 0
agricultural_land 4
forest_area 5
armed_forces_total 44
urban_pop_major_cities 2
urban_pop_minor_cities 2
national_income 0
inflation_annual 216
inflation_monthly 206
inflation_weekly 342
mobile_subscriptions 0
internet_users 0
secure_internet_servers_total 10
improved_sanitation 0
women_parliament_seats_rate 0
life_expectancy 0
dtype: int64

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 6/23
9/29/2020 L20-2055-Assignment1-Part2

In [11]: # print(data['mobile_subscriptions'].dtype)

col_list = [c for c in data.columns if data[c].dtype == 'object']


col_list

Out[11]: ['national_income',
'mobile_subscriptions',
'internet_users',
'improved_sanitation',
'women_parliament_seats_rate']

In [12]: data['women_parliament_seats_rate'].unique().tolist()

Out[12]: ['[0%-25%)', 'unknown', '[25%-50%)', '[50%-75%)']

In [13]: # Make a copy of data


train_data = data.copy()

In [14]: # train_data.loc[train_data['national_income'] == 'unknown', 'national_incom


e'] = -1
# train_data['national_income']

In [15]: # Change data types of columns


for c in col_list:
if c != 'internet_users':
train_data[c] = train_data[c].astype('category')
train_data[c] = train_data[c].cat.codes

In [16]: # Compare Actual and Encoded labels


for c in col_list:
if c != 'internet_users':
print('column:', c)
print(data[c].unique().tolist())
print(train_data[c].unique().tolist(), '\n')

column: national_income
['unknown', 'very low', 'high', 'medium low', 'medium high', 'low', 'very hig
h']
[4, 6, 0, 3, 2, 1, 5]

column: mobile_subscriptions
['less than 1 per person', 'more than 1 per person', 'more than 2 per perso
n', 'unknown', 'more than 3 per person']
[0, 1, 2, 4, 3]

column: improved_sanitation
['high access', 'low access', 'no info', 'very high access', 'medium access',
'very low access']
[0, 1, 3, 4, 2, 5]

column: women_parliament_seats_rate
['[0%-25%)', 'unknown', '[25%-50%)', '[50%-75%)']
[0, 3, 1, 2]

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 7/23
9/29/2020 L20-2055-Assignment1-Part2

In [17]: train_data['internet_users'].head()
Out[17]: 0 0 per 1000 people
1 154 per 1000 people
2 90 per 100 people
3 76 per 100 people
4 350 per 1000 people
Name: internet_users, dtype: object

In [18]: # Convert unknown in internet_uses to -1


train_data.loc[train_data['internet_users'] == 'unknown', 'internet_users'] =
-1

In [19]: train_data.loc[train_data['internet_users'].str.split(' ').str[2] == '1000',


'internet_users'] = train_data['internet_users'].str.split(' ')
.str[0].astype(float)

In [20]: train_data.loc[train_data['internet_users'].str.split(' ').str[2] == '100',


'internet_users'] = train_data['internet_users'].str.split(' ')
.str[0].astype(float) * 10

In [21]: train_data['internet_users'] = train_data['internet_users'].astype(float)

In [22]: train_data.isnull().sum()

Out[22]: surface_area 0
agricultural_land 4
forest_area 5
armed_forces_total 44
urban_pop_major_cities 2
urban_pop_minor_cities 2
national_income 0
inflation_annual 216
inflation_monthly 206
inflation_weekly 342
mobile_subscriptions 0
internet_users 0
secure_internet_servers_total 10
improved_sanitation 0
women_parliament_seats_rate 0
life_expectancy 0
dtype: int64

In [23]: train_data = train_data.drop(columns=['inflation_monthly', 'inflation_weekly'


])

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 8/23
9/29/2020 L20-2055-Assignment1-Part2

In [24]: # Deal with Missing Values


columns = ['agricultural_land', 'forest_area', 'armed_forces_total',
'urban_pop_minor_cities', 'urban_pop_major_cities', 'secure_interne
t_servers_total']

for c in columns:
missing_vals = train_data[c].isnull().sum()
observed_vals = train_data.loc[train_data[c].notnull(), c]
train_data.loc[train_data[c].isnull(), c] = observed_vals.mean()
# df.loc[df[c].isnull(), c] = np.random.choice(observed_vals, missing_val
s, replace = True)

In [25]: train_data.isnull().sum()

Out[25]: surface_area 0
agricultural_land 0
forest_area 0
armed_forces_total 0
urban_pop_major_cities 0
urban_pop_minor_cities 0
national_income 0
inflation_annual 216
mobile_subscriptions 0
internet_users 0
secure_internet_servers_total 0
improved_sanitation 0
women_parliament_seats_rate 0
life_expectancy 0
dtype: int64

In [26]: from sklearn import linear_model

# Regression to impute missing values in inflation_annual


# params = [c for c in train_data.columns if c != 'inflation_annual']
X = train_data[train_data['inflation_annual'].notnull()]
y = train_data.loc[train_data['inflation_annual'].notnull(), 'inflation_annua
l']
X = X.drop(columns=['inflation_annual'])
print(X.shape, y.shape)

(146, 13) (146,)

In [27]: model = linear_model.LinearRegression()


model.fit(X, y)

Out[27]: LinearRegression()

In [28]: X_miss = train_data[train_data['inflation_annual'].isnull()]


X_miss = X_miss.drop(columns = ['inflation_annual'])
print(X_miss.shape)

(216, 13)

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 9/23
9/29/2020 L20-2055-Assignment1-Part2

In [29]: train_data.loc[train_data['inflation_annual'].isnull(), 'inflation_annual'] =


model.predict(X_miss)

In [30]: train_data.isnull().sum()

Out[30]: surface_area 0
agricultural_land 0
forest_area 0
armed_forces_total 0
urban_pop_major_cities 0
urban_pop_minor_cities 0
national_income 0
inflation_annual 0
mobile_subscriptions 0
internet_users 0
secure_internet_servers_total 0
improved_sanitation 0
women_parliament_seats_rate 0
life_expectancy 0
dtype: int64

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 10/23
9/29/2020 L20-2055-Assignment1-Part2

In [31]: # plot joint plots


cols = train_data.columns.values
for c in cols:
if c != "life_expectancy":
sns.jointplot(x=c, y="life_expectancy", data=train_data, kind = 'reg',
height = 5)
plt.show()

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 11/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 12/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 13/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 14/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 15/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 16/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 17/23
9/29/2020 L20-2055-Assignment1-Part2

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 18/23
9/29/2020 L20-2055-Assignment1-Part2

In [32]: # Correlation heatmap


corr = train_data.corr()
plt.figure(figsize=(12, 10))

sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.4)],


cmap='viridis', vmax=1.0, vmin=-1.0, linewidths=0.1,
annot=True, annot_kws={"size": 8}, square=True)

Out[32]: <AxesSubplot:>

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 19/23
9/29/2020 L20-2055-Assignment1-Part2

In [33]: # distribution plot of target variable (life expectancy)


plt.figure(figsize = (10, 6))
plt.title('Life Expectancy Distribuition')
sns.distplot(train_data['life_expectancy'])
plt.show()

/srv/conda/envs/notebook/lib/python3.6/site-packages/seaborn/distributions.p
y:2551: FutureWarning: `distplot` is a deprecated function and will be remove
d in a future version. Please adapt your code to use either `displot` (a figu
re-level function with similar flexibility) or `histplot` (an axes-level func
tion for histograms).
warnings.warn(msg, FutureWarning)

In [34]: # We have already removed inflation monthly and weekly.


# Fetures agricultural_land and forest_area are highly correlated with surface
_area.
# Remove agricultural_land and forest_area and keep surface_area only

train_data = train_data.drop(columns=['agricultural_land', 'forest_area'])


train_data.shape

Out[34]: (362, 12)

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 20/23
9/29/2020 L20-2055-Assignment1-Part2

In [35]: # Standardize data


from sklearn.preprocessing import StandardScaler

# Columns not to be standardized. These are columns with categorical data,


# also we don't standardize our target vraiable
cols = ['national_income', 'mobile_subscriptions', 'life_expectancy',
'improved_sanitation', 'women_parliament_seats_rate']

# Pick remaining columns and standardize them


columns = [c for c in train_data.columns if c not in cols]
scaler = StandardScaler()
scaler.fit(train_data[columns])
train_data[columns] = scaler.transform(train_data[columns])

In [36]: # Train Test Split

from sklearn.model_selection import train_test_split


y = train_data['life_expectancy']
X = train_data.drop(columns=['life_expectancy'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("Training Set Dimensions:", X_train.shape)
print("Validation Set Dimensions:", X_test.shape)

Training Set Dimensions: (289, 11)


Validation Set Dimensions: (73, 11)

In [37]: # Train Random Forest Regressor


from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

randomf = RandomForestRegressor(n_estimators=200)
randomf.fit(X_train, y_train)

Out[37]: RandomForestRegressor(n_estimators=200)

In [38]: # Measure mean absolute error for training and validation sets
print('Mean Absolute Error for Training Set:', mean_absolute_error(y_train, ra
ndomf.predict(X_train)))
print('Mean Absolute Error for Test Set:', mean_absolute_error(y_test, randomf
.predict(X_test)))

Mean Absolute Error for Training Set: 0.9679729920224893


Mean Absolute Error for Test Set: 2.9123488644917677

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 21/23
9/29/2020 L20-2055-Assignment1-Part2

In [39]: # important features for random forest regressor


for name, importance in zip(X.columns, randomf.feature_importances_):
print('feature:', name, "=", importance)

importances = randomf.feature_importances_
indices = np.argsort(importances)
features = X.columns
plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()

feature: surface_area = 0.037917725812982736


feature: armed_forces_total = 0.036362704142151404
feature: urban_pop_major_cities = 0.021111749158996815
feature: urban_pop_minor_cities = 0.027657999378785325
feature: national_income = 0.02616630094421569
feature: inflation_annual = 0.05156030026452149
feature: mobile_subscriptions = 0.00924431438871956
feature: internet_users = 0.6460833768912998
feature: secure_internet_servers_total = 0.11232880056916572
feature: improved_sanitation = 0.024492698534246463
feature: women_parliament_seats_rate = 0.00707402991491494

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 22/23
9/29/2020 L20-2055-Assignment1-Part2

In [ ]: # parameter grid for fine-tuning random forest regressor


# you can take a look at scikit-learn official documentation
# where you'll find the parameters of random forest regressor
from sklearn.model_selection import GridSearchCV
params = {'max_depth': [5, 10, 12],
'max_features': [2, 4, 6],
'min_samples_leaf': [3, 4, 5],
'min_samples_split': [8, 10, 12],
'n_estimators': [100, 200, 300]}

k = 5
score_param = 'neg_mean_absolute_error'
rf = RandomForestRegressor()
rf_grid = GridSearchCV(estimator=rf, param_grid=params, scoring=score_param, n
_jobs=-1, cv=k, verbose=True)
rf_grid.fit(X_train, y_train)

Fitting 5 folds for each of 243 candidates, totalling 1215 fits

[Parallel(n_jobs=-1)]: Using backend SequentialBackend with 1 concurrent work


ers.

In [ ]: # get best model (trained on best set of params) from grid search cv
best_model = rf_grid.best_estimator_

# get best model predictions for training and validation sets


predict_train = best_model.predict(X_train)
predict_test = best_model.predict(X_test)

# print mean absolute error for predictions taken from the best model
print('Mean Absolute Error for Training Set:', metrics.mean_absolute_error(y_t
rain, predict_train))
print('Mean Absolute Error for Validation Set:', metrics.mean_absolute_error(y
_test, predict_test))

In [ ]:

https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-3yryl251/nbconvert/html/L20-2055-Assignment1-Part2.ipynb?download=false 23/23

You might also like