Professional Documents
Culture Documents
Regression Analysis Script
Regression Analysis Script
STT061-M14
# Problem 1: Encode the Anscombe data set using Excel (Save the file)
# Problem 2: Export the xls file into a csv file (Find Export command
setwd(homedir)
mean(Anscombe$X1)
mean(Anscombe$X2)
mean(Anscombe$X3)
mean(Anscombe$X4)
# Problem 5: Use R command to compute the mean of Y1,Y2,Y3,Y4
mean(Anscombe$Y1)
mean(Anscombe$Y2)
mean(Anscombe$Y3)
mean(Anscombe$Y4)
var(Anscombe$Y1)
var(Anscombe$Y2)
var(Anscombe$Y3)
var(Anscombe$Y4)
var(Anscombe$X1)
var(Anscombe$X2)
var(Anscombe$X3)
var(Anscombe$X4)
sd(Anscombe$Y1)
sd(Anscombe$Y2)
sd(Anscombe$Y3)
sd(Anscombe$Y4)
# Problem 9: Use R command to compute the sd X1,X2,X3,X4
sd(Anscombe$X1)
sd(Anscombe$X2)
sd(Anscombe$X3)
sd(Anscombe$X4)
cor(Anscombe$X1,Anscombe$Y1)
cor(Anscombe$X2,Anscombe$Y2)
cor(Anscombe$X3,Anscombe$Y3)
cor(Anscombe$X4,Anscombe$Y4)
# Problem 11: Build the Simple Linear Regression Model for (X1, Y1).
plot(model,1)
plot(model$fitted.values, model$residuals)
plot(model,1)
# approximately horizontal at 0.
# Step4: Verify Assumption3: Normality of Error terms.
# follows a normal distribution. Use also Test for normality and the
lines(density(Anscombe$Y1),col=2,lwd = 3)
# is approximately normal
library(datawizard)
skewness(Anscombe$Y1)
kurtosis(Anscombe$Y1)
# Kurtosis = -0.535 < 0 indicating that the curve is flatter than normal
# Perform the Empirical Normality rule: (This requires that the curve is symmetric)
# because it has already been established that the curve is nearly symmetric.
qqnorm(model$residuals)
qqline(model$residuals)
# Assumption is satisfied.
# and residuals
library(broom)
library(ggplot2)
geom_point() +
# As you can see the shaded band which represents the standard
# of x.
# ---------------------------------------------------------
# ---------------------------------------------------------
lines(density(model$residuals),col=2,lwd = 3)
# ---------------------------------------------------------
# Problem 12: Build the Simple Linear Regression Model for (X2, Y2).
# correlation = 0.8162365 -
plot(model,1)
plot(model$fitted.values, model$residuals)
plot(model,1)
# Conclusion:
# follows a normal distribution. Use also Test for normality and the
# QQ plot and the Normality Rule
lines(density(Anscombe$Y2),col=2,lwd = 3)
# Findings:
library(datawizard)
# Findings:
# Conclusion:
shapiro.test(Anscombe$Y2)
# Perform the Empirical Normality rule: (This requires that the curve is symmetric)
# Findings:
# Decision:
qqnorm(model$residuals)
qqline(model$residuals)
# Findings:
# Decision:
plot(model,3)
# x value.
# and residuals
library(broom)
library(ggplot2)
geom_point() +
# ---------------------------------------------------------
# Final Conclusion:
# ---------------------------------------------------------
lines(density(model$residuals),col=2,lwd = 3)
# ---------------------------------------------------------
# Problem 13: Build the Simple Linear Regression Model for (X3, Y3).
# create the linear regression model# create the linear regression model
# correlation = 0.8162867 -
plot(model,1)
plot(model$fitted.values, model$residuals)
plot(model,1)
# Conclusion:
# follows a normal distribution. Use also Test for normality and the
lines(density(Anscombe$Y3),col=2,lwd = 3)
# Findings:
library(datawizard)
# Findings:
# Conclusion:
shapiro.test(Anscombe$Y3)
#
# Perform the Empirical Normality rule: (This requires that the curve is symmetric)
# Findings:
# Decision:
qqline(model$residuals)
# Findings:
# Decision:
plot(model,3)
# x value.
# and residuals
library(broom)
library(ggplot2)
geom_point() +
stat_smooth(method = lm, se = TRUE) +
# Conclusion:
# ---------------------------------------------------------
# Final Conclusion:
# ---------------------------------------------------------
lines(density(model$residuals),col=2,lwd = 3)
# ---------------------------------------------------------
# Problem 14: Build the Simple Linear Regression Model for (X4, Y4).
# correlation = 0.8165214 -
plot(model,1)
plot(model$fitted.values, model$residuals)
plot(model,1)
# Conclusion:
# follows a normal distribution. Use also Test for normality and the
lines(density(Anscombe$Y4),col=2,lwd = 3)
# Findings:
library(datawizard)
# Findings:
# Conclusion:
shapiro.test(Anscombe$Y4)
# W = 0.87536, p-value = 0.09081
# Perform the Empirical Normality rule: (This requires that the curve is symmetric)
# Findings:
# Decision:
qqnorm(model$residuals)
qqline(model$residuals)
# Findings:
# Decision:
plot(model,3)
# Findings:
# and residuals
library(broom)
library(ggplot2)
ggplot(augment_model, aes(Anscombe$X4, Anscombe$Y4)) +
geom_point() +
# ---------------------------------------------------------
# Final Conclusion:
# ---------------------------------------------------------
lines(density(model$residuals),col=2,lwd = 3)
# ---------------------------------------------------------
# Short Answer:
# ---------------------------------------------------------