Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 8

#Ngay 15/09/2022

#Chapter 2. C2 P64
dCEO <- read.csv(file = 'E:/THI/ceosal2.csv', header = F)
#read.csv : doc flie du lieu csv, header: co chon dong dau tien lam column (T\F)
#file :duong dan file va ten file (phan biet capslock)
#(i) Find the average salary and the average tenure in the sample
mean(dCEO$V1)
#goi ten: tenDATA$TENCOt, ten DATA phai nhin ro o win3
mean(dCEO$V5)
#(ii) How many CEOs are in their first year as CEO (that is, ceoten 5 0)?
#What is the longest tenure as a CEO?
subset(dCEO, V6==0)
#ket qua la 5
#subset: tao subset voi dk cho truoc, subset(tendata, dk loc)
max(dCEO$V6)
#ket qua la 37
#(iii) Estimate the simple regression model
#log(salary) = b0 + b1*ceoten + u
#V10= b0 +b1*V6 + u
#Find b0^ va b1^
lm(V10 ~ V6, data =dCEO)
#lm(equation, data)
#b0^= 6.50 , b1^= 0.009724
#What is the (approximate) predicted percentage increase in salary given one more
year as a
CEO?
#Neu 2 CEO khac nhau 1 nam nhiem ky thi luong cu ho duoc du bao khac nhau
#0.9724%(0.009724)

# C3
regC2 <- lm(V10 ~ V6,data= dCEO)
summary(regC2)
dSLEEP <- read.csv(file = 'E:/THI/sleep75.csv', header = F)
#(i) Report your results in equation form along with the number of observations and

#R2. What does the intercept in this equation mean?


#(i) Report your results in equation form along with the number of observations and
#R2. What does the intercept in this equation mean?

dSLEEP <- read.csv(file = 'E:/ THI /sleep75.csv', header = F)


# sleep = b0 + b1*totwrk + u,
# V21 = b0 + b1*V26 + u
lm(V21 ~ V26, data=dSLEEP)
#b0^ = 3586.3770; b1^ = -0.1507
# Neu thoi gian lam viec tang 1 phut thi thoi gian ngu giam 15.07%
regC3 <- lm(V21 ~ V26, data=dSLEEP)
summary(regC3)
#R-squared is 0.1033
# Khi totwrk = 0,thi thoi gian ngu trong tuan la 3586.37695
#(ii)If totwrk increases by 2 hours, by how much sleep is estimated to fall?
#Delta totwrk = 2 houres ~ 120 mindelta(V21, data=dSLEEP) = -0.15075*120

#22/9/2022
#C4
dWAGE <- read.csv(file = 'E:/THI/wage2.csv', header= F)
#(i) Find the average salary and average IQ in the sample.
mean(dWAGE$V1)
#MEAN of salary is 957.9455
mean(dWAGE$V3)
#mean of IQ is 101.2824
#What is the sample standardeviation of IQ?
#(IQ scores are standardized so that the average in the population
#is 100 with a standard deviation equal to 15.)
sd(dWAGE$V1)
sd(dWAGE$V3)

#(ii) Estimate a simple regression model where a one-point increase in IQ changes


#wage by a constant dollar amount. Use this model to find the predicted increase in
#wage for an increase in IQ of 15 points. Does IQ explain most of the variation in
#wage?
#wage =b0 +b1*IQ + u
#V1 = b0 +b1*V3 + u
lm(V1 ~ V3, data = dWAGE)
regc4.ii <- lm(V1~V3, data= dWAGE)
summary(regc4.ii)

#delta (IQ)=+15
#delta (wage)= delta(IQ)*b1=8.303*15=124.545
#R2
#DELTA (WAGE) =delta(IQ)*b1=0.0088*15=0.132
# khi delta tang 15 don vi thi tien luong tang 13.2%

#bai C1 trang 110


dBWGHT <- read.csv(file= 'E:/THI/bwght.csv', header =F)
#(i) What is the most likely sign for b2?
lm(V1+V10 ~V4, data = dBWGHT)
# neu hai gia dinh co thu nhap khac nhau 1 don vi thi can nang cua dua tre so sinh
se khac nhau
#ii
var(dBWGHT$V10,dBWGHT$V1)
#-19.3678
cor(dBWGHT$V10,dBWGHT$V1)
#Moi quan he cua viec hut thuoc la va thu nhap la ty le nghich voi nhau. Vi thu
nhap cang cao thi viec dan tri cang cao,
#nhan thuc viec hut thuoc la co hai choi suc khoe.

#(iii)
#without famic
#bwght=b0 + b1*V10 +u
lm(V4 ~ V10, data= dBWGHT)
regc3c1.iii <- lm(V4 ~V10, data=dBWGHT)
summary(regc3c1.iii)
#sample size =1388 obs, R-square: 0.02273
#with faminc
lm(V4~V10+V1,data=dBWGHT)
#Neu 1 trong 2 gia dinh tang 1 don vi ve thu nhap gia dinh va 1 don vi thoi gian
hut thuoc thi dua tre khi sinh ra
#can nang
regc3c1.iiib <- lm(V4~V10+V1,data=dBWGHT)
summary(regc3c1.iiib)
#Multiple R-squared is 0.0298

#29/9/2022
# bai C2/110
dHPRiCE <- read.csv(file = 'E:/THI/hprice1.csv', header = F)
# price = b0 + b1*sqrft + b2*bdrms + u
#(i) Write out the results in equation form.
#V1= b0 +b1*V5 +b2*V3 + u
lm(V1~ V5+V3, data=dHPRiCE)
#b0 = -19.32 b1 =0.13 b2 = 15.2
#(ii) What is the estimated increase in price for a house with one more bedroom,
holding square footage constant?
reg7<- lm(V1~ V5+V3, data= dHPRiCE)
summary(reg7)
#price^= -19.32+0.13*sqrft^ + 15.2*bdrms^
#delta price^=15.2*delta bdrms
#delta price^ =15.2*1=15.2
# khi gia phong tang len mot don vi thi se con them mot can phong
# iii) What is the estimated increase in price for a house with an additional
bedroom that
# is 140 square feet in size? Compare this to your answer in part (ii)
# khi hai yeu to thay doi, them 1 phng, tang them dien tich\
15.2+0.13*140=33.4
# thay d?i 33.45 ve ggia
#(iv) What percentage of the variation in price is explained by square footage and
number of bedrooms?
#R-square=0.6233=62.33%
#(v) The first house in the sample has sqrft 5 2,438 and bdrms 5 4. Find the
predicted
#selling price for this house from the OLS regression line.
# tim price
-19.32+0.13*2438+15.2*4=358.42
#(vi) The actual selling price of the first house in the sample was $300,000 (so
price 5
# 300). Find the residual for this house. Does it suggest that the buyer
underpaid or
# overpaid for the house?
-19.32+0.13*2438+15.2*4-7.051=351.369
# residual= 300000-351.369=-51.369
# vi gia uoc luong cua ngoi nha la 351.369 cao hon so voi gia ban thuc te la 300000
# cao hon 51.369 => the buyer underpaid

# bai C3/111
dCEO <- read.csv(file = 'E:/THI/ceosal2.csv', header = F)
# (i) Estimate a model relating annual salary to firm sales and market value. Make
the
#model of the constant elasticity variety for both independent variables. Write the
#results out in equation form
#log(salary)= b0 +b1*log(sale)+b2*log(marketvalue)+u
#V10= b0 +b1*V11+b2*V12
lm(V10~ V11+V12, data=dCEO)
regc3.i<- lm(V10~ V11+V12, data=dCEO)
summary(regc3.i)
#logsalary^=4.62+0.16*sale^+0.11*mktval^

#(ii) Add profits to the model from part (i). Why can this variable not be included
in
#logarithmic form? Would you say that these firm performance variables explain
#most of the variation in CEO salaries?
#log(salary)= b0 +b1*log(sale)^+b2*log(marketvalue)^+b3*profit^
lm(V10~V11+V12+V8, data=dCEO)
regc3.iiii<- lm(V10~V11+V12+V8, data=dCEO)
summary(regc3.iiii)
#vi co vai quan sat bi am o profit nen khong the lay log profit
#equation from salary^ =4.69+0.16*sales+0.098*mktval+0.000036*profits
#bien giai thich tot nhat cho salary la bien sales vi no co anh huong lon nhat khi
tang mot don vi sale thi salary
# tang len 0.16 mill

#iii) Add the variable ceoten to the model in part (ii). What is the estimated
percentage
#return for another year of CEO tenure, holding other factors fixed?
# log(salary)= b0 +b1*log(sale)^+b2*log(marketvalue)^+b3*profit^+b4*ceoten+u
lm(V10~V11+V12+V8+V6, data=dCEO)
regc3.iiiii<- lm(V10~V11+V12+V8+V6, data=dCEO)
summary(regc3.iiiii)
#equation: salary^-
4.56+0.16*logsales^+0.01*logmktval^+0.000029*logprofit^+0.1012*ceoten^
#one more year of CEO tenure increase predicted salary by amount of 1.2%
#(iv) Find the sample correlation coefficient between the variables log(mktval) and
#profits. Are these variables highly correlated? What does this say about the OLS
#estimators?
cor(dCEO$V12,dCEO$V8)
#correlation = 0.7768976 is high, this case no bias in the OLS estimator

#C4/111
dATTEND <-read.csv(file="E:/THI/attend.csv", header=F)

#i)
summary(dATTEND[,c(6,3,4)])
# Min Max Mean
# atndrte 6.25 100.00 81.71
# priGPA 0.857 3.93 2.587
# ACT 13 32 22.51
# nhu vay ti le tham gia buoi hoc thap nhat la 6.25 %, cao nhat la 100 %, trung
binh la 81.71 %
# Diem DPA ki truoc thap nhat la 0.875, cao nhat 3.93, trung la 2.587
# Diem ACT thap nhat la 13, cao nhat la 32, trung binh la 22.51

#ii)
summary(lm(V6~V3+V4,data=dATTEND))
# atndrte.hat = 75.700 + 17.261 *priGPA - 1.717 *ACT
# When priGPA = 0 and ACT = 0 then atndrte =75.7 %
# Truong hop nay kha vo li vi GPA ki truoc va ACT deu bang 0 nhung ti le tham gia
lai la 75.7 %

#iii)
# delta(priGPA) = +1 => delta(atndrte) = +17.261
# Nhu vay khi sinh vien A va B co cung cac yeu to khac nhung sinh vien A co nhieu
hon sinh vien B 1 diem GPA
# thi ty le tham du lop hoc cua sinh vien A cao hon B la 17.261%
# delta (ACT) = +10 => delta(atndrte) = -17.17
# Nhu vay khi sinh vien A va B co cung cac yeu to khac nhung sinh vien A co nhieu
hon sinh vien B 10 diem ACT
# thi ty le tham du lop hoc cua sinh vien A thap hon B la 17.17 %
# Em thay bat ngo voi truong hop nay boi vi nguoi diem ACT cao hon lai tham du lop
hoc it hon

#iv)
# priGPA = 3.65 va ACT = 20
# => atndrte.hat = 75.700 + 17.261*3.65 - 1.717*20 = 104.3627 % (vo li)
75.700 + 17.261*3.65 - 1.717*20
# Sinh vien A co diem GPA ki truoc la 3.65 va diem ACT la 20 thi ty le tham du buoi
hoc cua sinh vien A
# la 104.3627 %
# Khong boi vi khong co bat ki truong hop nao vuot qua 100 %

#v)
# priGPA sinh vien A = 3.1 va ACT point = 21
# priGPA sinh vien B = 2.1 va ACT point = 26
# delta (priGPA) = 3.1-2.1= +1
# delta (ACT) = 21 - 26 = -5
# delta (atndrte) = 17.261*1 - 1.717*(-5) = 25.846
17.261*1 - 1.717*(-5)
# ty le tham gia lop hoc cua sinh vien A cao hon ty le tham gia lop hoc cua sinh
vien B la 25.846 `%
#Ngay 6/10/2022
# Bai C6/111
dWAGE <- read.csv(file = 'E:/THI/wage2.csv', header= F)
# (i) Run a simple regression of IQ on educ to obtain the slope coefficient, say, d
lm(V3~V5, data=dWAGE)
regC6 <- lm(V3~V5, data=dWAGE)
summary(regC6)
# the slope coefficient from the regression IQ on educ is 3.53
#gama~=3.35

#(ii)
lm(V17~V5, data=dWAGE)
regC6.1 <- lm(V17~V5, data=dWAGE)
summary(regC6.1)
# the slope coefficient from log(wage) on educ is b^1= 0.059839
#iii Run the multiple regression of log(wage) on educ and IQ, and obtain the
#lope coefficients, b�1 and b�2, respectively
lm(V17~ V5+ V3 , data=dWAGE)
regC6.iii<-( lm(V17~ V5+ V3 , data=dWAGE))
summary(regC6.iii)
#b^1=0.0391199, b^2=0.391199
#ivVerify that b�1 5 b�1 b�2d�1.
0.0391199+ 0.00586*3.5338
#0.391199 + 0.00586*3.5338= 0.05982797
#beta1~=beta^+beta2^gama1~
#Chuong 4 Bai1/164
dVOTE1 <- read.csv(file = 'E:/THI/vote1.csv', header = F)
# (i) What is the interpretation of b1?
# voteA =b0 +b1log(expendA) +b2log(expendB) +b3 prtystrA +u,
#increase 1% of candicate's expenditure will increase 1/b1 % the number of vote
# (ii) In terms of the parameters, state the null hypothesis that a 1% increase in
A�s expenditures is offset by a 1% increase in b�s expenditures.
# h0: b1+b2=0
#(iii) Estimate the given model using the data in VOTE1.RAW and report the results
# in usual form. Do A�s expenditures affect the outcome? What about b�s
expenditures? Can you use these results to test the hypothesis in part (ii)?

#H0 B1=0 H1: B1#0


lm(V4~V9+V7+V8, data=dVOTE1)
regC1C4 <- lm(V4~V9+V7+V8, data=dVOTE1)
summary(regC1C4)
#tb61=6.08332 B^1=0.38215 seb^1=1591867
6.08332/039215
# p~ value <2e-16 -> voi muc y nghia la 1% ( lon hon p~ value
# (iv) Estimate a model that directly gives the t statistic for testing the
hypothesis in part
# (ii). What do you conclude? (Use a two-sided alternative.)
#H0:B1+b2=0 H1:b1+b2>0
cov(dVOTE1$V8, dVOTE1$V9)
#cov= -0.06234893
var(dVOTE1$V8)
#varb^1=2.565128
var(dVOTE1$V9)
#varb^2= 2.46849

#var(b^1+b^2)=varb^1 +varb^2+2*
#Ngay 20/10/2022

#Chapter4_Page165_C5
dMLB1 <- read.csv(file = "E:/THI/mlb1.csv", header = F)

#i) If we drop rbisyr, the estimated equation becomes


summary(lm(V47 ~ V4 + V31 + V13 + V32,data=dMLB1))
#log(salary.hat) = 11.02 + 0.0677years + 0.0158gamesyr + 0.0014bavg + 0.0359hrunsyr
0.0359/0.0072
#t_value is 4.99
#Now hrunsyr is very statistically significant (t_value ~ 4.99) and its coefficient
has increased by about two
#and one-haft time

#ii) The equation with runsyr, fldperc, and sbasesyr added is


summary(lm(V47 ~ V4 + V31 + V13 + V32 + V38 + V17 + V37,data=dMLB1))
#log(salaty.hat) = 10.41 + 0.07years + 0.0079gamesyr + 0.00053bavg + 0.0232hrunsyr
+ 0.0174runsyr + 0.001fldperc
#- 0.0064sbasesyr
#In three variables only runsyr is statistically signficant t_statistic = 3.43
#sbasesyr wrong sign
#fldperc effect very small
0.0173921/0.0050641

#Chapter4_Page165_C6

#log(wage) = b0 +b1*educ + b2*exper + b3*tenure + u


#i) State the null hypothesis that another year of general workforce experience
(b2) has
#the same effect on log(wage) (y) as another year of tunure (b3) with the current
employer

#Gia thuyet
#H0: b2 = b3 and H1: b2 =/= b3

#ii) Test the null hypothesis in part (i) against a two-sided alternative, at the
5% significant level,
#by constructing a 95% confidence interval. What do you conclude

#H0: b2 = b3 and H1: b2 =/= b3 <==> H0: b2 - b3 = 0 and H1: b2 - b3 =/= 0


#Define: theta1 = b2 - b3 ==> b2 = theta1 + b3, replace [2]
#log(wage) = b0 + b1*educ + b2*exper + b3*tenure + u [2]
# = b0 + b1*educ + (theta1 + b3)*exper + b3*tenure + u
# = b0 + b1*educ + theta1*exper + b3*(exper + tenure) + u
#Regress log(wage) on educ, exper, and (exper + tenure)
dWAGE2 <- read.csv(file="E:/THI/wage2.csv", header=F)
#5.educ = years of education
#6.exper = years of work experience
#7.tenure = years with current employer
#17.lwage = natural log of wage
#Create a varible "exte" on data: dWAGE2
dWAGE2$exte <- dWAGE2$V6 + dWAGE2$V7
regC4.6.1 <- lm(V17 ~ V5 + V6 + exte, data = dWAGE2)
summary(regC4.6.1)
#theta1: H0: thta1 = 0 (V6)
#t.theta1.hat = 0.412 < C0.05 = 1.96 ==> Accepted H0 at 5% level
inter.UP <- 0.001954 + 1.96*0.004743
inter.DOW <- 0.001954 - 1.96*0.004743

#27/10/2022

setwd('E:/THI')
dFF3<- read.csv('F-F_Research_Data_Factors.CSV',skip =3,header =T,nrows=1159-4)
?read.csv
str(dFF3)
d25 <- read.csv('25.csv', skip = 15,header = T,nrows = 1171-16)

dCAPM2710 <- merge(x=dFM3, y=d25, by=c('X'))


for(1 to 6:30) {
reg2710 <- lm(dCAPM2710[,1] ~ Mkt.RF + SMB + HML,data=dCAPM2710)
summary(reg2710)

#17/11/2022

install.packages('dplyr')
install.packages('openxlsx')

# open
require(dplyr)
require(openxlsx)

#sheet 8,8,BCTC, 2009 --> 2016

for( i in 1:8) {
dtemp <- read.xlsx(xlsxFile = 'E:/THI/Data.xlsx',
sheet = i, startRow = 3,colNames = F)
dtemp$YEAR <- 2008 + i
if(i==1){dBCTC <- dtemp} else{
dBCTC<- rbind(dBCTC, dtemp)
}
}
dTHI <- dBCTC[,c(1,359,2:8)] %>% arrange(X1, YEAR)
rm(dtemp)

#DR(i,t) = b0 + b1*AS(i,t) +b2*SIZE(i,t) + b3*GROWTH(i,t) +b4*PROFIT(i,t)


+b5*RISK(i,t) +e(i,t)
# DR(it)= Total Debt(i,t)/ Total Asset(i,t)
#SIZE(i,t) = log(Net Sales)
#GROWTH (i,t) = [EBt(i,t)-EBt(i,t-1)]/EBt(i,t-1)
#PROFIT (i,t) = EBIT(i,t)/Total Assets(i,t)
#RISK =sd(EBT)(i,t), t=t, t-1 t-2

#TOTAL dEBT(COL 70), Total Assets (col 69),


#Tangible assets(col 40+col 43 ),
# Net sales (col 131), EBT(Col145), EBIT(Col145 + -col135)

dTHI <- dBCTC[,c(1,359,70,69,40,43,131,135,145) ] %>%


arrange(X1, YEAR)%>% group_by(X1) %>%
mutate(DR=X70/X69, AS = (X40+X43/X69,
PROFIT= (X145-X135)/X69,
GROWTH=(X145-lag(X145))/lag(X145),
SIZE= Log(X131/10^6)) %>% as.data.frame() %>%
select(X1, YEAR, DR, AS, PROFIT, GROWTH, SIZE) %>% subset(GROWTH!="NA"& SIZE!="-
Inf"&
GROWTH!= "Inf" & GROWTH!
="-Inf")
summary(dTHi)

You might also like