Professional Documents
Culture Documents
Logistic Regression - Default
Logistic Regression - Default
#Delete the sl.no. column, as the serial nos. in R has been created already
Default <- Default[-1]
#Now, we fit the model. The function glm() performs logistic regression on the
training set
#The dependant variable is default
#The dependant variable is on the LHS of the ~ sign. the independent variables are
on the RHS #of the ~ sign
#family = binomial option tells R to fit a logistic model, among the general class
of models covered by glm()
#Individual predictors
glm.fit.balance <- glm(default~balance,data=Default,subset=train,family=binomial)
summary(glm.fit.balance)
glm.probs[1:10]
# the above prints the estimated probabilities for the first ten observations
# ROC curve
library(pROC)
#For training data
r <- roc(Default[train,]$default,glm.probs)
plot.roc(r)
auc(r)
#For validation data
r.valid <- roc(valid$default,glm.probs.valid)
plot.roc(r.valid)
auc(r.valid)
# Lift chart
install.packages("gains")
library(gains)
#For training data
#Create a numeric 1-0 response
train.y <- ifelse(Default[train,]$default=="Yes",1,0)
gain.train <- gains(train.y, glm.probs, groups=dim(Default[train,])[1])
plot(c(0,gain.train$cume.pct.of.total*sum(train.y)) ~ c(0,
gain.train$cume.obs),xlab="No. of cases",ylab="Cumulative",main="Gain
Chart",type="l")
lines(c(0,sum(train.y))~c(0,dim(Default[train,])[1]),col="gray",lty=2)
#For validation data
valid.y <- ifelse(valid$default=="Yes",1,0)
gain.valid <- gains(valid.y, glm.probs.valid, groups=dim(valid)[1])
plot(c(0,gain.valid$cume.pct.of.total*sum(valid.y)) ~ c(0,
gain.valid$cume.obs),xlab="No. of cases",ylab="Cumulative",main="Gain
Chart",type="l")
lines(c(0,sum(valid.y))~c(0,dim(valid)[1]),col="gray",lty=2)