HW 7 Rstudio

You might also like

Download as pdf or txt
Download as pdf or txt
You are on page 1of 1

HW 10/28 NICOLE NIEZABYTOWSKI 002 EDUARDO 3A.

install.packages("ISLR",repos = "http://cran.us.r-project.org")

##

## The downloaded binary packages are in

## /var/folders/s_/49bkwsl502146l1nl7hqgzy00000gn/T//Rtmp0A3Fi3/downloaded_packages

install.packages("leaps",repos = "http://cran.us.r-project.org")

##

## The downloaded binary packages are in

## /var/folders/s_/49bkwsl502146l1nl7hqgzy00000gn/T//Rtmp0A3Fi3/downloaded_packages

install.packages("glmnet",repos = "http://cran.us.r-project.org")

##

## The downloaded binary packages are in

## /var/folders/s_/49bkwsl502146l1nl7hqgzy00000gn/T//Rtmp0A3Fi3/downloaded_packages

install.packages("caret",repos = "http://cran.us.r-project.org")

##

## The downloaded binary packages are in

## /var/folders/s_/49bkwsl502146l1nl7hqgzy00000gn/T//Rtmp0A3Fi3/downloaded_packages

library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.1-4

library(ISLR)

library(leaps)

set.seed(3)

train_index <- sample(1:nrow(College), round(nrow(College) * 0.7))

train <- College[train_index, ]

nrow(train) / nrow(College)

## [1] 0.7001287

test <- College[-train_index, ]

nrow(test) / nrow(College)

## [1] 0.2998713

3B.

model_linear <- lm(Apps ~ ., data = train)

summary(model_linear)

##

## Call:

## lm(formula = Apps ~ ., data = train)

##

## Residuals:

## Min 1Q Median 3Q Max

## -3371.9 -437.6 -63.3 350.2 6233.4

##

## Coefficients:

## Estimate Std. Error t value Pr(>|t|)

## (Intercept) -605.81244 493.25798 -1.228 0.219927

## PrivateYes -584.35163 164.18527 -3.559 0.000406 ***

## Accept 1.32861 0.06124 21.696 < 2e-16 ***

## Enroll -0.24166 0.21699 -1.114 0.265908

## Top10perc 52.92817 6.99182 7.570 1.68e-13 ***

## Top25perc -17.86210 5.44140 -3.283 0.001097 **

## F.Undergrad 0.04199 0.03837 1.094 0.274342

## P.Undergrad 0.04110 0.03435 1.197 0.231996

## Outstate -0.07531 0.02325 -3.239 0.001274 **

## Room.Board 0.17973 0.05972 3.010 0.002742 **

## Books -0.09909 0.26773 -0.370 0.711436

## Personal 0.02642 0.07653 0.345 0.730076

## PhD -9.93047 5.51335 -1.801 0.072249 .

## Terminal -5.68887 6.08867 -0.934 0.350559

## S.F.Ratio 23.09251 15.20844 1.518 0.129514

## perc.alumni -6.52534 5.04819 -1.293 0.196713

## Expend 0.12521 0.01797 6.966 9.77e-12 ***

## Grad.Rate 10.65431 3.52288 3.024 0.002614 **

## ---

## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##

## Residual standard error: 1042 on 526 degrees of freedom

## Multiple R-squared: 0.9215, Adjusted R-squared: 0.9189

## F-statistic: 363 on 17 and 526 DF, p-value: < 2.2e-16

ols_pred <- predict(model_linear, test)

(ols_mse <- mean((ols_pred - test$Apps)^2))

## [1] 1413287

3C.

regfit.fwd = regsubsets(Apps ~. , data=College,nvmax=19, method ="forward")

summary(regfit.fwd)

## Subset selection object

## Call: regsubsets.formula(Apps ~ ., data = College, nvmax = 19, method = "forward")

## 17 Variables (and intercept)

## Forced in Forced out

## PrivateYes FALSE FALSE

## Accept FALSE FALSE

## Enroll FALSE FALSE

## Top10perc FALSE FALSE

## Top25perc FALSE FALSE

## F.Undergrad FALSE FALSE

## P.Undergrad FALSE FALSE

## Outstate FALSE FALSE

## Room.Board FALSE FALSE

## Books FALSE FALSE

## Personal FALSE FALSE

## PhD FALSE FALSE

## Terminal FALSE FALSE

## S.F.Ratio FALSE FALSE

## perc.alumni FALSE FALSE

## Expend FALSE FALSE

## Grad.Rate FALSE FALSE

## 1 subsets of each size up to 17

## Selection Algorithm: forward

## PrivateYes Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad

## 1 ( 1 ) " " "*" " " " " " " " " " "

## 2 ( 1 ) " " "*" " " "*" " " " " " "

## 3 ( 1 ) " " "*" " " "*" " " " " " "

## 4 ( 1 ) " " "*" " " "*" " " " " " "

## 5 ( 1 ) " " "*" "*" "*" " " " " " "

## 6 ( 1 ) " " "*" "*" "*" " " " " " "

## 7 ( 1 ) " " "*" "*" "*" "*" " " " "

## 8 ( 1 ) "*" "*" "*" "*" "*" " " " "

## 9 ( 1 ) "*" "*" "*" "*" "*" " " " "

## 10 ( 1 ) "*" "*" "*" "*" "*" " " " "

## 11 ( 1 ) "*" "*" "*" "*" "*" "*" " "

## 12 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 14 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## Outstate Room.Board Books Personal PhD Terminal S.F.Ratio perc.alumni

## 1 ( 1 ) " " " " " " " " " " " " " " " "

## 2 ( 1 ) " " " " " " " " " " " " " " " "

## 3 ( 1 ) " " " " " " " " " " " " " " " "

## 4 ( 1 ) "*" " " " " " " " " " " " " " "

## 5 ( 1 ) "*" " " " " " " " " " " " " " "

## 6 ( 1 ) "*" "*" " " " " " " " " " " " "

## 7 ( 1 ) "*" "*" " " " " " " " " " " " "

## 8 ( 1 ) "*" "*" " " " " " " " " " " " "

## 9 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 10 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 11 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 12 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 13 ( 1 ) "*" "*" " " " " "*" " " "*" " "

## 14 ( 1 ) "*" "*" " " " " "*" "*" "*" " "

## 15 ( 1 ) "*" "*" " " "*" "*" "*" "*" " "

## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "

## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"

## Expend Grad.Rate

## 1 ( 1 ) " " " "

## 2 ( 1 ) " " " "

## 3 ( 1 ) "*" " "

## 4 ( 1 ) "*" " "

## 5 ( 1 ) "*" " "

## 6 ( 1 ) "*" " "

## 7 ( 1 ) "*" " "

## 8 ( 1 ) "*" " "

## 9 ( 1 ) "*" " "

## 10 ( 1 ) "*" "*"

## 11 ( 1 ) "*" "*"

## 12 ( 1 ) "*" "*"

## 13 ( 1 ) "*" "*"

## 14 ( 1 ) "*" "*"

## 15 ( 1 ) "*" "*"

## 16 ( 1 ) "*" "*"

## 17 ( 1 ) "*" "*"

3D.

regfit.bwd = regsubsets(Apps ~. , data=College,nvmax=19,method ="backward")

summary(regfit.bwd)

## Subset selection object

## Call: regsubsets.formula(Apps ~ ., data = College, nvmax = 19, method = "backward")

## 17 Variables (and intercept)

## Forced in Forced out

## PrivateYes FALSE FALSE

## Accept FALSE FALSE

## Enroll FALSE FALSE

## Top10perc FALSE FALSE

## Top25perc FALSE FALSE

## F.Undergrad FALSE FALSE

## P.Undergrad FALSE FALSE

## Outstate FALSE FALSE

## Room.Board FALSE FALSE

## Books FALSE FALSE

## Personal FALSE FALSE

## PhD FALSE FALSE

## Terminal FALSE FALSE

## S.F.Ratio FALSE FALSE

## perc.alumni FALSE FALSE

## Expend FALSE FALSE

## Grad.Rate FALSE FALSE

## 1 subsets of each size up to 17

## Selection Algorithm: backward

## PrivateYes Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad

## 1 ( 1 ) " " "*" " " " " " " " " " "

## 2 ( 1 ) " " "*" " " "*" " " " " " "

## 3 ( 1 ) " " "*" " " "*" " " " " " "

## 4 ( 1 ) " " "*" " " "*" " " " " " "

## 5 ( 1 ) " " "*" "*" "*" " " " " " "

## 6 ( 1 ) " " "*" "*" "*" " " " " " "

## 7 ( 1 ) "*" "*" "*" "*" " " " " " "

## 8 ( 1 ) "*" "*" "*" "*" " " " " " "

## 9 ( 1 ) "*" "*" "*" "*" "*" " " " "

## 10 ( 1 ) "*" "*" "*" "*" "*" " " " "

## 11 ( 1 ) "*" "*" "*" "*" "*" "*" " "

## 12 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 14 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*"

## Outstate Room.Board Books Personal PhD Terminal S.F.Ratio perc.alumni

## 1 ( 1 ) " " " " " " " " " " " " " " " "

## 2 ( 1 ) " " " " " " " " " " " " " " " "

## 3 ( 1 ) " " " " " " " " " " " " " " " "

## 4 ( 1 ) "*" " " " " " " " " " " " " " "

## 5 ( 1 ) "*" " " " " " " " " " " " " " "

## 6 ( 1 ) "*" "*" " " " " " " " " " " " "

## 7 ( 1 ) "*" "*" " " " " " " " " " " " "

## 8 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 9 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 10 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 11 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 12 ( 1 ) "*" "*" " " " " "*" " " " " " "

## 13 ( 1 ) "*" "*" " " " " "*" " " "*" " "

## 14 ( 1 ) "*" "*" " " " " "*" "*" "*" " "

## 15 ( 1 ) "*" "*" " " "*" "*" "*" "*" " "

## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "

## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"

## Expend Grad.Rate

## 1 ( 1 ) " " " "

## 2 ( 1 ) " " " "

## 3 ( 1 ) "*" " "

## 4 ( 1 ) "*" " "

## 5 ( 1 ) "*" " "

## 6 ( 1 ) "*" " "

## 7 ( 1 ) "*" " "

## 8 ( 1 ) "*" " "

## 9 ( 1 ) "*" " "

## 10 ( 1 ) "*" "*"

## 11 ( 1 ) "*" "*"

## 12 ( 1 ) "*" "*"

## 13 ( 1 ) "*" "*"

## 14 ( 1 ) "*" "*"

## 15 ( 1 ) "*" "*"

## 16 ( 1 ) "*" "*"

## 17 ( 1 ) "*" "*"

3E.

library(dplyr)

##

## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':

##

## filter, lag

## The following objects are masked from 'package:base':

##

## intersect, setdiff, setequal, union

train_mat <- dummyVars(Apps ~ ., data = train, fullRank = F) %>%

predict(newdata = train) %>%

as.matrix()

test_mat <- dummyVars(Apps ~ ., data = test, fullRank = F) %>%

predict(newdata = test) %>%

as.matrix()

set.seed(3)

model_ridge <- cv.glmnet(y = train$Apps,

x = train_mat,

alpha = 0,

lambda = 10^seq(2,-2, length = 100),

standardize = TRUE,

nfolds = 5)

data.frame(lambda = model_ridge$lambda,

cv_mse = model_ridge$cvm) %>%

ggplot(aes(x = lambda, y = cv_mse)) +

geom_point() +

geom_line() +

geom_vline(xintercept = model_ridge$lambda.min, col = "deepskyblue3") +

geom_hline(yintercept = min(model_ridge$cvm), col = "deepskyblue3") +

scale_x_continuous(trans = 'log10', breaks = c(0.01, 0.1, 1, 10, 100), labels = c(0.01, 0.1, 1, 10, 100)) +

scale_y_continuous(labels = scales::comma_format())

model_ridge_best <- glmnet(y = train$Apps,

x = train_mat,

alpha = 0,

lambda = 10^seq(2,-2, length = 100))

ridge_pred <- predict(model_ridge_best, s = model_ridge$lambda.min, newx = test_mat)

(ridge_mse <- mean((ridge_pred - test$Apps)^2))

## [1] 1545921

3F.

```r

set.seed(4)

model_lasso <- cv.glmnet(y = train$Apps,

x = train_mat,

alpha = 1,

lambda = 10^seq(2, -2, length = 100),

standardize = TRUE,

nfolds = 5,

thresh = 1e-12)

data.frame(lambda = model_lasso$lambda,

cv_mse = model_lasso$cvm,

nonzero_coeff = model_lasso$nzero) %>%

ggplot(aes(x = lambda, y = cv_mse, col = nonzero_coeff)) +

geom_point() +

geom_line() +

geom_vline(xintercept = model_lasso$lambda.min) +

geom_hline(yintercept = min(model_lasso$cvm)) +

scale_x_continuous(trans = 'log10', breaks = c(0.01, 0.1, 1, 10, 100), labels = c(0.01, 0.1, 1, 10, 100))

model_lasso_best <- glmnet(y = train$Apps,

x = train_mat,

alpha = 1,

lambda = 10^seq(2,-5, length = 100))

lasso_pred <- predict(model_lasso_best, s = model_lasso$lambda.min, newx = test_mat)

(lasso_mse <- mean((lasso_pred - test$Apps)^2))

## [1] 1444668

lasso_coef <- predict(model_lasso_best, type = "coefficients", s = model_lasso$lambda.min)

round(lasso_coef, 3)

## 19 x 1 sparse Matrix of class "dgCMatrix"

## s1

## (Intercept) -1302.682

## Private.No 568.744

## Private.Yes 0.000

## Accept 1.281

## Enroll .

## Top10perc 46.089

## Top25perc -12.664

## F.Undergrad 0.018

## P.Undergrad 0.035

## Outstate -0.063

## Room.Board 0.164

## Books -0.004

## Personal 0.010

## PhD -8.092

## Terminal -5.653

## S.F.Ratio 18.699

## perc.alumni -7.071

## Expend 0.120

## Grad.Rate 9.274

3G.

#RIDGE PERFORMED THE WORST, THE REST WERE VERY SIMILAR

install.packages("tinytex",repos = "http://cran.us.r-project.org")

##

## The downloaded binary packages are in

## /var/folders/s_/49bkwsl502146l1nl7hqgzy00000gn/T//Rtmp0A3Fi3/downloaded_packages

You might also like