Download as pdf or txt
Download as pdf or txt
You are on page 1of 4

SOLUTIONS

##### QUESTION 1 #####

library(datasets)
### a ###
#Extracting chicks on diet 2 and chicks on diet 1#
#Viewing Chickweight data
head(ChickWeight)
ChickDiet1<-subset(ChickWeight, ChickWeight$Diet==2,"weight")
ChickDiet2<-subset(ChickWeight, ChickWeight$Diet==1, "weight")

## Performing t test##
t.test(ChickDiet1,ChickDiet2, alternative = "less")

##We fail to reject the null hypothesis since zero is in the confidence interval (-Inf, 32.48206)
There is no difference in the average weight of chicks on diet1 and diet2

#### b ####
## Extracting dataset for diet 1 and their respective weight for day 14
attach(ChickWeight)
Diet1D14<-subset(ChickWeight, ChickWeight$Diet==2&Time==14,"weight")
Diet1D20<-subset(ChickWeight, ChickWeight$Diet==2&Time==20,"weight")

## Performing test ##
t.test(Diet1D14,Diet1D20, alternative = "greater", mu=30)
## We fail to reject the null hypothesis##
## For diet 1, the difference in weight of chicks for day 14 and day 20 does not exceed 30gm

### c ###
levels(ChickWeight$Diet)
## ANOVA test ##
Output<-aov(weight~factor(Diet), data = ChickWeight)
summary(Output)

## We reject the null hypothesis. There is a significant difference in weight in the various
diets (1,2,3,4)
### d ###
## Since there is significance difference in weight of the various diets (1,2,3,4), we perform a
post havoc test to know the significant difference between the mean weight of all diets to
the mean weight of every other diet.
TukeyHSD(Output)
## e ##
# From the output, Diet 4 and 3 will be the best to maximize the weight of chicks. Though
the average weight of chicks on diet 3 seems to be greater than that of diet 4, its however
not significant
(There is no much difference in the weight of chicks on diet 4 and 3)

###### Question 2 #####


A<-c(3792,2521,2126,3347,9200,2468,
2830,30189,1007,3339,1665,1434,
2333,3419,1870,3748,3225,2650,
3338,4463,11991,3296,3296,3433,
2417,4015,3442,3254,2287,2920)
### a ###
install.packages("psych")
library(psych)
describe(A)

## b ##
hist(A, xlab = "Claim Amounts",
main = "Histogram for claim amounts (in GHC'000)", col=2:8)
## Median since it is not greatly affected by outliers.
## c ##
#Detecting outliers #
## From the histogram, there are outliers in the vector A
# Finding the exact values that are outliers#

Q<-as.data.frame(t(quantile(A)))
colnames(Q)<-c("q0","q1","q2","q3", "q4")
attach(Q)
Outliers<-subset(A, A>q3 + 1.5*(q3-q1))

### d ###
## Normality check ##
shapiro.test(A)

# We fail to reject the null hypothesis that claims amount is normally distributed. Hence the
proposal is right to fit a normal distribution to claims amount
##### Question 3 ######
## a ##
Days<-c(0,4,7,7,11,18,24,30,32,43,43,46,60,64,70,71,73,74,84,88)
Weight<- c(184,182,180,179,177,175,173,170,169,165,
165,163,158,155,154,152,150,147,144,140)
DaWeight<-data.frame(Days,Weight)
plot(Days, Weight, col="red")

## b ##
## There exists a negative correlation between days and weight. Meaning as one spends
more days in the rehabilitation centre, his or her weight reduces.

## c ##
cor(Days, Weight)

cor.test(Days, Weight)
## From the confidence interval, we reject the null hypothesis

## d ##
Model1<-lm(Weight~Days)
summary(Model1)
## The intercept and the slope are both significant. We also have our model to be significant
from the pvalue (2.2e-16). And also, the R-squared indicates that 98.85% of the variation in
weight is explained by Days(time) spent in rehabilitation
## Hence our model: Weight = 183.69150 - 0.46152*Days

## e ##
predict(Model1,newdata = data.frame(Days=c(72,86,180)))

## f ##

##### Question 4 #####


## a ##
library(MASS)
TAB1<-table(Cars93$Type, Cars93$AirBags)
### b ###
## Stacked barplot ##
barplot(TAB1, col = 1:4, ylab = "frequency")
## Grouped barplot ##
barplot(TAB1, col = 1:4, ylab = "frequency", beside = T)

### c ###
# Chi Square test #
chisq.test(TAB1)

## There is a highly significant relationship between type of cars and air bags.

You might also like