Professional Documents
Culture Documents
Stat 240 Exams.r
Stat 240 Exams.r
library(datasets)
### a ###
#Extracting chicks on diet 2 and chicks on diet 1#
#Viewing Chickweight data
head(ChickWeight)
ChickDiet1<-subset(ChickWeight, ChickWeight$Diet==2,"weight")
ChickDiet2<-subset(ChickWeight, ChickWeight$Diet==1, "weight")
## Performing t test##
t.test(ChickDiet1,ChickDiet2, alternative = "less")
##We fail to reject the null hypothesis since zero is in the confidence interval (-Inf, 32.48206)
There is no difference in the average weight of chicks on diet1 and diet2
#### b ####
## Extracting dataset for diet 1 and their respective weight for day 14
attach(ChickWeight)
Diet1D14<-subset(ChickWeight, ChickWeight$Diet==2&Time==14,"weight")
Diet1D20<-subset(ChickWeight, ChickWeight$Diet==2&Time==20,"weight")
## Performing test ##
t.test(Diet1D14,Diet1D20, alternative = "greater", mu=30)
## We fail to reject the null hypothesis##
## For diet 1, the difference in weight of chicks for day 14 and day 20 does not exceed 30gm
### c ###
levels(ChickWeight$Diet)
## ANOVA test ##
Output<-aov(weight~factor(Diet), data = ChickWeight)
summary(Output)
## We reject the null hypothesis. There is a significant difference in weight in the various
diets (1,2,3,4)
### d ###
## Since there is significance difference in weight of the various diets (1,2,3,4), we perform a
post havoc test to know the significant difference between the mean weight of all diets to
the mean weight of every other diet.
TukeyHSD(Output)
## e ##
# From the output, Diet 4 and 3 will be the best to maximize the weight of chicks. Though
the average weight of chicks on diet 3 seems to be greater than that of diet 4, its however
not significant
(There is no much difference in the weight of chicks on diet 4 and 3)
## b ##
hist(A, xlab = "Claim Amounts",
main = "Histogram for claim amounts (in GHC'000)", col=2:8)
## Median since it is not greatly affected by outliers.
## c ##
#Detecting outliers #
## From the histogram, there are outliers in the vector A
# Finding the exact values that are outliers#
Q<-as.data.frame(t(quantile(A)))
colnames(Q)<-c("q0","q1","q2","q3", "q4")
attach(Q)
Outliers<-subset(A, A>q3 + 1.5*(q3-q1))
### d ###
## Normality check ##
shapiro.test(A)
# We fail to reject the null hypothesis that claims amount is normally distributed. Hence the
proposal is right to fit a normal distribution to claims amount
##### Question 3 ######
## a ##
Days<-c(0,4,7,7,11,18,24,30,32,43,43,46,60,64,70,71,73,74,84,88)
Weight<- c(184,182,180,179,177,175,173,170,169,165,
165,163,158,155,154,152,150,147,144,140)
DaWeight<-data.frame(Days,Weight)
plot(Days, Weight, col="red")
## b ##
## There exists a negative correlation between days and weight. Meaning as one spends
more days in the rehabilitation centre, his or her weight reduces.
## c ##
cor(Days, Weight)
cor.test(Days, Weight)
## From the confidence interval, we reject the null hypothesis
## d ##
Model1<-lm(Weight~Days)
summary(Model1)
## The intercept and the slope are both significant. We also have our model to be significant
from the pvalue (2.2e-16). And also, the R-squared indicates that 98.85% of the variation in
weight is explained by Days(time) spent in rehabilitation
## Hence our model: Weight = 183.69150 - 0.46152*Days
## e ##
predict(Model1,newdata = data.frame(Days=c(72,86,180)))
## f ##
### c ###
# Chi Square test #
chisq.test(TAB1)
## There is a highly significant relationship between type of cars and air bags.