Professional Documents
Culture Documents
(1)
(1)
#Chapter 2. C2 P64
dCEO <- read.csv(file = 'E:/THI/ceosal2.csv', header = F)
#read.csv : doc flie du lieu csv, header: co chon dong dau tien lam column (T\F)
#file :duong dan file va ten file (phan biet capslock)
#(i) Find the average salary and the average tenure in the sample
mean(dCEO$V1)
#goi ten: tenDATA$TENCOt, ten DATA phai nhin ro o win3
mean(dCEO$V5)
#(ii) How many CEOs are in their first year as CEO (that is, ceoten 5 0)?
#What is the longest tenure as a CEO?
subset(dCEO, V6==0)
#ket qua la 5
#subset: tao subset voi dk cho truoc, subset(tendata, dk loc)
max(dCEO$V6)
#ket qua la 37
#(iii) Estimate the simple regression model
#log(salary) = b0 + b1*ceoten + u
#V10= b0 +b1*V6 + u
#Find b0^ va b1^
lm(V10 ~ V6, data =dCEO)
#lm(equation, data)
#b0^= 6.50 , b1^= 0.009724
#What is the (approximate) predicted percentage increase in salary given one more
year as a
CEO?
#Neu 2 CEO khac nhau 1 nam nhiem ky thi luong cu ho duoc du bao khac nhau
#0.9724%(0.009724)
# C3
regC2 <- lm(V10 ~ V6,data= dCEO)
summary(regC2)
dSLEEP <- read.csv(file = 'E:/THI/sleep75.csv', header = F)
#(i) Report your results in equation form along with the number of observations and
#22/9/2022
#C4
dWAGE <- read.csv(file = 'E:/THI/wage2.csv', header= F)
#(i) Find the average salary and average IQ in the sample.
mean(dWAGE$V1)
#MEAN of salary is 957.9455
mean(dWAGE$V3)
#mean of IQ is 101.2824
#What is the sample standardeviation of IQ?
#(IQ scores are standardized so that the average in the population
#is 100 with a standard deviation equal to 15.)
sd(dWAGE$V1)
sd(dWAGE$V3)
#delta (IQ)=+15
#delta (wage)= delta(IQ)*b1=8.303*15=124.545
#R2
#DELTA (WAGE) =delta(IQ)*b1=0.0088*15=0.132
# khi delta tang 15 don vi thi tien luong tang 13.2%
#(iii)
#without famic
#bwght=b0 + b1*V10 +u
lm(V4 ~ V10, data= dBWGHT)
regc3c1.iii <- lm(V4 ~V10, data=dBWGHT)
summary(regc3c1.iii)
#sample size =1388 obs, R-square: 0.02273
#with faminc
lm(V4~V10+V1,data=dBWGHT)
#Neu 1 trong 2 gia dinh tang 1 don vi ve thu nhap gia dinh va 1 don vi thoi gian
hut thuoc thi dua tre khi sinh ra
#can nang
regc3c1.iiib <- lm(V4~V10+V1,data=dBWGHT)
summary(regc3c1.iiib)
#Multiple R-squared is 0.0298
#29/9/2022
# bai C2/110
dHPRiCE <- read.csv(file = 'E:/THI/hprice1.csv', header = F)
# price = b0 + b1*sqrft + b2*bdrms + u
#(i) Write out the results in equation form.
#V1= b0 +b1*V5 +b2*V3 + u
lm(V1~ V5+V3, data=dHPRiCE)
#b0 = -19.32 b1 =0.13 b2 = 15.2
#(ii) What is the estimated increase in price for a house with one more bedroom,
holding square footage constant?
reg7<- lm(V1~ V5+V3, data= dHPRiCE)
summary(reg7)
#price^= -19.32+0.13*sqrft^ + 15.2*bdrms^
#delta price^=15.2*delta bdrms
#delta price^ =15.2*1=15.2
# khi gia phong tang len mot don vi thi se con them mot can phong
# iii) What is the estimated increase in price for a house with an additional
bedroom that
# is 140 square feet in size? Compare this to your answer in part (ii)
# khi hai yeu to thay doi, them 1 phng, tang them dien tich\
15.2+0.13*140=33.4
# thay d?i 33.45 ve ggia
#(iv) What percentage of the variation in price is explained by square footage and
number of bedrooms?
#R-square=0.6233=62.33%
#(v) The first house in the sample has sqrft 5 2,438 and bdrms 5 4. Find the
predicted
#selling price for this house from the OLS regression line.
# tim price
-19.32+0.13*2438+15.2*4=358.42
#(vi) The actual selling price of the first house in the sample was $300,000 (so
price 5
# 300). Find the residual for this house. Does it suggest that the buyer
underpaid or
# overpaid for the house?
-19.32+0.13*2438+15.2*4-7.051=351.369
# residual= 300000-351.369=-51.369
# vi gia uoc luong cua ngoi nha la 351.369 cao hon so voi gia ban thuc te la 300000
# cao hon 51.369 => the buyer underpaid
# bai C3/111
dCEO <- read.csv(file = 'E:/THI/ceosal2.csv', header = F)
# (i) Estimate a model relating annual salary to firm sales and market value. Make
the
#model of the constant elasticity variety for both independent variables. Write the
#results out in equation form
#log(salary)= b0 +b1*log(sale)+b2*log(marketvalue)+u
#V10= b0 +b1*V11+b2*V12
lm(V10~ V11+V12, data=dCEO)
regc3.i<- lm(V10~ V11+V12, data=dCEO)
summary(regc3.i)
#logsalary^=4.62+0.16*sale^+0.11*mktval^
#(ii) Add profits to the model from part (i). Why can this variable not be included
in
#logarithmic form? Would you say that these firm performance variables explain
#most of the variation in CEO salaries?
#log(salary)= b0 +b1*log(sale)^+b2*log(marketvalue)^+b3*profit^
lm(V10~V11+V12+V8, data=dCEO)
regc3.iiii<- lm(V10~V11+V12+V8, data=dCEO)
summary(regc3.iiii)
#vi co vai quan sat bi am o profit nen khong the lay log profit
#equation from salary^ =4.69+0.16*sales+0.098*mktval+0.000036*profits
#bien giai thich tot nhat cho salary la bien sales vi no co anh huong lon nhat khi
tang mot don vi sale thi salary
# tang len 0.16 mill
#iii) Add the variable ceoten to the model in part (ii). What is the estimated
percentage
#return for another year of CEO tenure, holding other factors fixed?
# log(salary)= b0 +b1*log(sale)^+b2*log(marketvalue)^+b3*profit^+b4*ceoten+u
lm(V10~V11+V12+V8+V6, data=dCEO)
regc3.iiiii<- lm(V10~V11+V12+V8+V6, data=dCEO)
summary(regc3.iiiii)
#equation: salary^-
4.56+0.16*logsales^+0.01*logmktval^+0.000029*logprofit^+0.1012*ceoten^
#one more year of CEO tenure increase predicted salary by amount of 1.2%
#(iv) Find the sample correlation coefficient between the variables log(mktval) and
#profits. Are these variables highly correlated? What does this say about the OLS
#estimators?
cor(dCEO$V12,dCEO$V8)
#correlation = 0.7768976 is high, this case no bias in the OLS estimator
#C4/111
dATTEND <-read.csv(file="E:/THI/attend.csv", header=F)
#i)
summary(dATTEND[,c(6,3,4)])
# Min Max Mean
# atndrte 6.25 100.00 81.71
# priGPA 0.857 3.93 2.587
# ACT 13 32 22.51
# nhu vay ti le tham gia buoi hoc thap nhat la 6.25 %, cao nhat la 100 %, trung
binh la 81.71 %
# Diem DPA ki truoc thap nhat la 0.875, cao nhat 3.93, trung la 2.587
# Diem ACT thap nhat la 13, cao nhat la 32, trung binh la 22.51
#ii)
summary(lm(V6~V3+V4,data=dATTEND))
# atndrte.hat = 75.700 + 17.261 *priGPA - 1.717 *ACT
# When priGPA = 0 and ACT = 0 then atndrte =75.7 %
# Truong hop nay kha vo li vi GPA ki truoc va ACT deu bang 0 nhung ti le tham gia
lai la 75.7 %
#iii)
# delta(priGPA) = +1 => delta(atndrte) = +17.261
# Nhu vay khi sinh vien A va B co cung cac yeu to khac nhung sinh vien A co nhieu
hon sinh vien B 1 diem GPA
# thi ty le tham du lop hoc cua sinh vien A cao hon B la 17.261%
# delta (ACT) = +10 => delta(atndrte) = -17.17
# Nhu vay khi sinh vien A va B co cung cac yeu to khac nhung sinh vien A co nhieu
hon sinh vien B 10 diem ACT
# thi ty le tham du lop hoc cua sinh vien A thap hon B la 17.17 %
# Em thay bat ngo voi truong hop nay boi vi nguoi diem ACT cao hon lai tham du lop
hoc it hon
#iv)
# priGPA = 3.65 va ACT = 20
# => atndrte.hat = 75.700 + 17.261*3.65 - 1.717*20 = 104.3627 % (vo li)
75.700 + 17.261*3.65 - 1.717*20
# Sinh vien A co diem GPA ki truoc la 3.65 va diem ACT la 20 thi ty le tham du buoi
hoc cua sinh vien A
# la 104.3627 %
# Khong boi vi khong co bat ki truong hop nao vuot qua 100 %
#v)
# priGPA sinh vien A = 3.1 va ACT point = 21
# priGPA sinh vien B = 2.1 va ACT point = 26
# delta (priGPA) = 3.1-2.1= +1
# delta (ACT) = 21 - 26 = -5
# delta (atndrte) = 17.261*1 - 1.717*(-5) = 25.846
17.261*1 - 1.717*(-5)
# ty le tham gia lop hoc cua sinh vien A cao hon ty le tham gia lop hoc cua sinh
vien B la 25.846 `%
#Ngay 6/10/2022
# Bai C6/111
dWAGE <- read.csv(file = 'E:/THI/wage2.csv', header= F)
# (i) Run a simple regression of IQ on educ to obtain the slope coefficient, say, d
lm(V3~V5, data=dWAGE)
regC6 <- lm(V3~V5, data=dWAGE)
summary(regC6)
# the slope coefficient from the regression IQ on educ is 3.53
#gama~=3.35
#(ii)
lm(V17~V5, data=dWAGE)
regC6.1 <- lm(V17~V5, data=dWAGE)
summary(regC6.1)
# the slope coefficient from log(wage) on educ is b^1= 0.059839
#iii Run the multiple regression of log(wage) on educ and IQ, and obtain the
#lope coefficients, b�1 and b�2, respectively
lm(V17~ V5+ V3 , data=dWAGE)
regC6.iii<-( lm(V17~ V5+ V3 , data=dWAGE))
summary(regC6.iii)
#b^1=0.0391199, b^2=0.391199
#ivVerify that b�1 5 b�1 b�2d�1.
0.0391199+ 0.00586*3.5338
#0.391199 + 0.00586*3.5338= 0.05982797
#beta1~=beta^+beta2^gama1~
#Chuong 4 Bai1/164
dVOTE1 <- read.csv(file = 'E:/THI/vote1.csv', header = F)
# (i) What is the interpretation of b1?
# voteA =b0 +b1log(expendA) +b2log(expendB) +b3 prtystrA +u,
#increase 1% of candicate's expenditure will increase 1/b1 % the number of vote
# (ii) In terms of the parameters, state the null hypothesis that a 1% increase in
A�s expenditures is offset by a 1% increase in b�s expenditures.
# h0: b1+b2=0
#(iii) Estimate the given model using the data in VOTE1.RAW and report the results
# in usual form. Do A�s expenditures affect the outcome? What about b�s
expenditures? Can you use these results to test the hypothesis in part (ii)?
#var(b^1+b^2)=varb^1 +varb^2+2*
#Ngay 20/10/2022
#Chapter4_Page165_C5
dMLB1 <- read.csv(file = "E:/THI/mlb1.csv", header = F)
#Chapter4_Page165_C6
#Gia thuyet
#H0: b2 = b3 and H1: b2 =/= b3
#ii) Test the null hypothesis in part (i) against a two-sided alternative, at the
5% significant level,
#by constructing a 95% confidence interval. What do you conclude
#27/10/2022
setwd('E:/THI')
dFF3<- read.csv('F-F_Research_Data_Factors.CSV',skip =3,header =T,nrows=1159-4)
?read.csv
str(dFF3)
d25 <- read.csv('25.csv', skip = 15,header = T,nrows = 1171-16)
#17/11/2022
install.packages('dplyr')
install.packages('openxlsx')
# open
require(dplyr)
require(openxlsx)
for( i in 1:8) {
dtemp <- read.xlsx(xlsxFile = 'E:/THI/Data.xlsx',
sheet = i, startRow = 3,colNames = F)
dtemp$YEAR <- 2008 + i
if(i==1){dBCTC <- dtemp} else{
dBCTC<- rbind(dBCTC, dtemp)
}
}
dTHI <- dBCTC[,c(1,359,2:8)] %>% arrange(X1, YEAR)
rm(dtemp)