Professional Documents
Culture Documents
Chap 35
Chap 35
5 Estimation
Aaron Smith
2023-01-14
This code is modified from Time Series Analysis and Its Applications, by Robert H. Shumway,
David S. Stoffer https://github.com/nickpoison/tsa4
The webpages for the texts and some help on using R for time series analysis can be found at
https://nickpoison.github.io/.
Our goal is to estimate the coefficients and the variance of our ARMA(p,q) process.
Our game plan is to identify statistics whose expected value equals the desired parameter, then
use the statistic in place of the parameter.
\[ x_t = \phi_1 x_{t-1} + \phi_2 x_{t-2} + ... + \phi_p x_{t-p} + w_t \\ x_t x_{t-h} = \phi_1 x_{t-1}
x_{t-h} + \phi_2 x_{t-2} x_{t-h} + ... + \phi_p x_{t-p} x_{t-h} + w_t x_{t-h} \\ E(x_t x_{t-h}) = \phi_1
E(x_{t-1} x_{t-h}) + \phi_2 E(x_{t-2} x_{t-h}) + ... + \phi_p E(x_{t-p} x_{t-h}) + E(w_t x_{t-h}) \\
\gamma(h) = \phi_1 \gamma(h-1) + \phi_2 \gamma(h-2) + ... + \phi_p \gamma(h - p) + E(w_t
x_{t-h}) \\ \]
Now insert the sample autocovariance function for the population autocovariance.
Yule–Walker estimators
Switching from autocovariance to autocorrelation we get the Yule–Walker estimators.
When the sample size is large, the Yule–Walker estimators are approximately normally
distributed and the variance estimate is close to the population variance.
Load data
rm(
list = ls()
)
options(
digits = 3,
scipen = 999
)
data(
list = "rec",
package = "astsa"
)
Example 3.28
ar.yw_rec = ar.yw(
x = rec,
order = 2
)
ar.yw_rec$x.mean # = 62.26278 (mean estimate)
## [1] 62.3
sqrt(
x = diag(
x = ar.yw_rec$asy.var.coef
)
) # = .04222637, .04222637 (standard errors)
## [1] 94.8
predict_rec = predict(
object = ar.yw_rec,
n.ahead = 24
)
U = predict_rec$pred + predict_rec$se
L = predict_rec$pred - predict_rec$se
astsa::tsplot(
x = cbind(
rec, predict_rec$pred
),
spag = TRUE,
xlim = c(1980,1990),
ylab = "Recruitment"
)
lines(
x = predict_rec$pred,
col = 2,
type = "o"
)
lines(
x = U,
col = 4,
lty = 2
)
lines(
x = L,
col = 4,
lty = 2
)
Example 3.29
set.seed(
seed = 20230114
)
sarima.sim_ma1 = astsa::sarima.sim(
ma = 0.9,
n = 50
)
astsa::acf1(
series = sarima.sim_ma1,
max.lag = 1,
plot = FALSE
) # [1] .536 (lag 1 sample ACF)
## [1] 0.458
astsa::acf1(
series = sarima.sim_ma1,
max.lag = length(
x = sarima.sim_ma1
) - 1,
plot = TRUE
)
## [1] 0.46 -0.08 -0.25 -0.23 0.04 0.05 -0.07 -0.11 -0.04 0.08
0.09 0.08
## [13] 0.00 0.10 0.10 -0.19 -0.29 -0.18 -0.01 0.08 -0.01 -0.09 -
0.04 0.07
## [25] 0.14 0.07 -0.09 -0.20 -0.06 0.04 0.04 0.03 -0.05 -0.01
0.00 -0.03
## [37] 0.02 -0.03 -0.06 -0.04 -0.02 -0.01 0.01 0.06 0.04 0.03
0.04 0.01
## [49] 0.01
Example 3.31
ar.mle_rec = ar.mle(
x = rec,
order = 2
)
ar.mle_rec$x.mean
## [1] 62.3
ar.mle_rec$ar
sqrt(
x = diag(
x = ar.mle_rec$asy.var.coef
)
) # standard errors
ar.mle_rec$var.pred
## [1] 89.3
Example 3.33
data(
list = "varve",
package = "astsa"
)
diff_log_varve = diff(
x = log(
x = varve
)
) # data
r <- astsa::acf1(
series = diff_log_varve,
max.lag = 1,
plot = FALSE
) # acf(1)
astsa::acf1(
series = diff_log_varve,
max.lag = length(
x = diff_log_varve
) - 1,
plot = TRUE
)
## [1] -0.40 -0.04 -0.06 0.01 0.00 0.04 -0.04 0.04 0.01 -0.05
0.06 -0.06
## [13] -0.04 0.08 -0.02 0.01 0.00 0.03 -0.05 -0.06 0.07 0.04 -
0.06 0.05
## [25] -0.01 -0.04 0.05 -0.05 0.03 -0.02 0.00 0.06 -0.05 -0.03
0.04 -0.05
## [37] 0.03 -0.06 0.09 -0.03 0.03 -0.01 -0.02 -0.04 -0.01 0.08 -
0.06 0.08
## [49] -0.08 0.05 -0.06 0.03 -0.01 0.03 -0.03 0.02 0.01 0.00 -
0.03 0.02
## [61] -0.03 0.01 0.03 0.04 -0.07 -0.03 0.03 0.00 0.02 -0.01
0.00 0.05
## [73] -0.06 -0.02 -0.01 0.07 -0.04 0.03 -0.07 0.04 0.02 0.01 -
0.06 -0.02
## [85] 0.09 -0.04 0.00 -0.02 0.03 0.03 -0.05 0.01 0.00 0.03
0.01 -0.05
## [97] 0.01 0.00 0.04 -0.04 -0.04 0.04 0.02 0.03 -0.03 -0.05 -
0.01 0.04
## [109] -0.02 0.02 0.01 -0.05 0.05 -0.03 0.00 0.02 -0.02 0.02
0.03 -0.04
## [121] -0.03 0.04 -0.05 0.07 -0.06 0.06 -0.04 0.02 -0.01 0.00
0.01 0.00
## [133] -0.01 0.01 0.04 -0.03 -0.04 0.07 -0.04 -0.01 0.00 0.04 -
0.01 -0.02
## [145] 0.02 -0.10 0.10 -0.06 0.04 0.03 -0.05 0.02 0.03 -0.04 -
0.06 0.10
## [157] -0.04 0.06 -0.01 -0.10 0.08 -0.04 -0.01 0.02 0.01 -0.04
0.06 -0.06
## [169] 0.02 0.01 -0.05 0.07 -0.02 0.07 -0.03 -0.03 0.01 -0.07
0.06 0.05
## [181] -0.07 0.00 0.05 -0.04 -0.04 0.03 -0.02 0.07 0.00 -0.09
0.08 -0.05
## [193] 0.02 0.02 0.00 -0.03 0.05 -0.01 -0.05 0.00 -0.01 0.00
0.03 0.04
## [205] -0.05 -0.03 0.00 0.03 0.01 -0.05 0.03 -0.03 0.04 -0.02
0.02 0.02
## [217] -0.04 -0.02 0.03 0.01 0.01 -0.04 0.04 -0.05 0.02 0.00 -
0.01 0.05
## [229] -0.05 0.04 -0.05 0.03 -0.01 0.03 -0.06 0.02 0.01 -0.02
0.04 -0.04
## [241] 0.01 0.02 -0.01 0.02 -0.01 0.01 -0.02 -0.04 0.03 -0.01
0.01 0.02
## [253] -0.07 0.05 -0.01 0.02 -0.02 -0.01 0.01 0.04 -0.03 -0.02
0.01 0.00
## [265] 0.03 -0.03 0.02 -0.01 0.04 -0.09 0.04 -0.02 0.03 0.00 -
0.06 0.06
## [277] -0.03 0.04 -0.06 0.06 -0.05 0.01 0.02 -0.01 0.04 -0.01 -
0.01 0.00
## [289] -0.01 0.01 0.03 -0.03 0.00 0.00 -0.01 0.05 -0.02 -0.05
0.03 -0.01
## [301] 0.04 -0.07 0.03 -0.04 0.07 0.00 -0.03 0.03 -0.03 0.00
0.02 -0.02
## [313] -0.01 0.01 0.02 -0.05 0.06 -0.05 0.01 0.00 0.02 0.02 -
0.04 0.01
## [325] 0.00 0.02 -0.03 0.01 -0.04 0.05 -0.02 0.03 0.00 -0.09
0.07 0.02
## [337] -0.03 -0.01 -0.01 -0.02 0.05 -0.06 0.02 0.05 -0.04 0.00
0.04 -0.06
## [349] 0.05 -0.06 0.01 0.00 0.02 0.02 -0.04 -0.01 0.00 0.04 -
0.04 -0.01
## [361] 0.04 -0.01 0.01 -0.04 0.05 -0.06 0.06 0.00 -0.04 0.03 -
0.01 0.01
## [373] -0.02 -0.01 0.02 0.00 0.01 -0.01 -0.04 0.03 0.00 0.01
0.02 -0.02
## [385] -0.02 0.01 0.01 0.00 0.01 -0.02 0.01 -0.01 0.00 0.00
0.02 0.00
## [397] -0.01 -0.01 0.02 0.00 -0.02 0.02 0.00 0.02 -0.02 -0.02
0.02 -0.02
## [409] 0.03 -0.04 0.03 -0.01 0.01 -0.02 0.02 -0.02 -0.01 0.00
0.03 -0.03
## [421] 0.05 -0.06 0.04 -0.02 0.04 -0.05 0.02 -0.03 0.06 -0.03
0.01 -0.04
## [433] 0.02 0.01 0.04 -0.03 0.00 -0.01 0.00 0.00 0.00 -0.01
0.03 -0.02
## [445] -0.02 0.05 -0.04 0.02 -0.03 0.03 0.00 0.01 -0.01 -0.02
0.01 -0.01
## [457] 0.02 0.00 0.00 0.01 -0.03 0.03 -0.02 0.00 0.00 0.01
0.02 -0.03
## [469] 0.02 -0.01 0.01 0.00 0.03 -0.04 0.02 -0.04 0.05 -0.03
0.01 0.01
## [481] 0.00 -0.02 0.02 -0.01 -0.01 -0.01 0.01 0.01 0.00 -0.03
0.05 -0.05
## [493] 0.02 0.03 -0.04 0.01 0.02 0.00 0.01 -0.01 0.00 0.01 -
0.01 0.02
## [505] 0.03 -0.05 -0.01 0.03 0.00 0.00 -0.01 0.02 -0.01 -0.02
0.00 0.02
## [517] -0.03 0.02 0.01 -0.01 -0.01 0.01 0.00 0.00 -0.01 0.01 -
0.01 0.00
## [529] 0.00 0.02 -0.04 0.03 -0.01 0.01 -0.03 0.01 0.01 -0.01 -
0.01 0.02
## [541] 0.00 -0.03 0.04 -0.01 -0.01 -0.01 0.01 0.00 0.00 0.00
0.02 -0.03
## [553] -0.01 0.03 -0.01 0.00 0.00 -0.01 0.02 -0.02 0.03 -0.01 -
0.02 0.00
## [565] 0.03 -0.02 0.00 0.02 -0.02 0.02 -0.02 0.02 -0.01 0.00
0.00 0.01
## [577] -0.01 -0.01 0.01 0.01 -0.01 -0.01 0.00 0.02 -0.02 0.01 -
0.01 0.02
## [589] -0.02 0.02 -0.01 0.00 0.01 -0.02 0.01 0.00 0.01 -0.01
0.00 0.00
## [601] 0.01 0.00 0.00 0.01 -0.02 0.01 0.01 0.00 -0.01 -0.01
0.02 -0.01
## [613] 0.00 0.00 0.00 -0.01 0.00 0.01 -0.01 0.00 0.00 0.01 -
0.01 0.00
## [625] 0.01 0.00 -0.01 0.00 0.00 0.00 0.00 0.00
w <- 0
z <- 0
Sc <- 0
Sz <- 0
Szw <- 0
para <- 0
length_varve = length(
x = diff_log_varve
) # 633
Gauss-Newton Estimation
niter <- 20
for (j in 1:niter){
for(t in 2:length_varve){
w[t] <- diff_log_varve[t] - para[j]*w[t-1]
z[t] <- w[t-1] - para[j]*z[t-1]
}
Sc[j] <- sum(
x = w^2
)
Sz[j] <- sum(z^2)
Szw[j] <- sum(z*w)
para[j+1] <- para[j] + Szw[j]/Sz[j]
}
Results
cbind(
iteration = 1:niter-1,
thetahat = para[1:niter],
Sc,
Sz
)
## iteration thetahat Sc Sz
w <- 0
cSS <- 0
th = seq(
from = -0.3,
to = -0.94,
by = -0.01
)
for(p in 1:length(th)){
for(t in 2:length_varve){
w[t] <- diff_log_varve[t] - th[p]*w[t-1]
}
cSS[p] <- sum(
x = w^2
)
}
astsa::tsplot(
x = th,
y = cSS,
ylab = expression(S[c](theta)),
xlab = expression(theta)
)
abline(
v = para[1:length(Sc)],
lty = 2,
col = 4
) # add previous results to plot
points(
x = para[1:length(Sc)],
y = Sc,
pch = 16,
col = 4
)
Example 3.36
generate data
set.seed(
seed = 20230115
)
# VGAM::rlaplace would have been better
rexp_0.5 = rexp(
n = 150,
rate = 0.5
)
runif_sign = runif( # sample with -1,1 would have been better
n = 150,
min = -1,
max = 1
)
rlaplace_0.5 = rexp_0.5*sign(
x = runif_sign
)
sarima.sim_laplace = 50 + astsa::sarima.sim(
n = 100,
ar = 0.95,
innov = rlaplace_0.5,
burnin = 50
)
astsa::tsplot(
x = sarima.sim_laplace,
ylab = expression(X[~t])
)
Bootstrap
set.seed(
seed = 20230115
) # not that 666
ar.yw_laplace = ar.yw(
x = sarima.sim_laplace,
order = 1
) # assumes the data were retained
mean_laplace = ar.yw_laplace$x.mean # estimate of mean
phi = ar.yw_laplace$ar # estimate of phi
nboot = 250 # number of bootstrap replicates
resid_laplace = ar.yw_laplace$resid[-1] # the 99 innovations
x.star = sarima.sim_laplace # initialize x*
phi.star.yw = c() # initialize phi*
for (i in 1:nboot) {
resid.star = sample(
x = resid_laplace,
replace = TRUE
)
x.star = astsa::sarima.sim(
n = 99,
ar = phi,
innov = resid.star,
burnin = 0
) + mean_laplace
phi.star.yw[i] <- ar.yw(
x = x.star,
order = 1
)$ar
}
seed = 20230115
)
phi.yw = rep(
x = NA,
times = 1000
)
for (i in 1:1000){
rexp_0.5 <- rexp(
n = 150,
rate = 0.5
);
runif_sign <- runif(
n = 150,
min = -1,
max = 1
);
rlaplace_0.5 <- rexp_0.5*sign(
x = runif_sign
)
arima.sim_laplace <- 50 + arima.sim(
n = 100,
list(
ar = 0.95
),
innov = rlaplace_0.5,
n.start = 50
)
phi.yw[i] <- ar.yw(
x = arima.sim_laplace,
order = 1
)$ar
}
Picture
hist(
x = phi.star.yw,
breaks = 15,
main = "",
prob = TRUE,
xlim = c(
0.65,1.05
),
ylim = c(
0,14
),
col = astsa::astsa.col(
col = 4,
alpha = 0.3
),
xlab = expression(hat(phi))
)
lines(
x = density(
x = phi.yw,
bw = 0.02
),
lwd = 2
)
curve(
expr = dnorm(
x = x,
mean = 0.96,
sd = 0.03
),
from = 0.75,
to = 1.1,
lty = 2,
lwd = 2,
add = TRUE
)
legend(
x = 0.65,
y = 14,
bty = 'n',
lty = c(
1,0,2
),
lwd = c(
2,0,2
),
col = 1,
pch = c(
NA,22,NA
),
pt.bg = c(
NA,astsa::astsa.col(
col = 4,
alpha = 0.3
),NA
),
pt.cex = 2.5,
legend = c(
'true distribution', 'bootstrap distribution', 'normal approximation'
)
)
3 ARIMA Models - 3.5.1 Estimation
- Method of Moments
Aaron Smith
2023-01-14
This code is modified from Time Series Analysis and Its Applications, by Robert H. Shumway,
David S. Stoffer https://github.com/nickpoison/tsa4
The webpages for the texts and some help on using R for time series analysis can be found at
https://nickpoison.github.io/.
Our goal is to estimate the coefficients and the variance of our ARMA(p,q) process.
Our game plan is to identify statistics whose expected value equals the desired parameters,
then find theoretical equations that use the parameters. Then use these equations to estimate
the parameters.
Now insert the sample autocovariance function for the population autocovariance.
Yule–Walker estimators
Switching from autocovariance to autocorrelation we get the Yule–Walker estimators.
When the sample size is large, the Yule–Walker estimators are approximately normally
distributed and the variance estimate is close to the population variance.
Proof:
The proof will be presented separately.
The Yule-Walker estimators are presented using an inverse of the covariance matrix. We can
use the Durbin-Levinson algorithm to calculate \(\widehat{\phi}\) without inverting
\(\widehat{\Gamma}_p\) nor \(\widehat{R}_p\), by replacing \(\gamma(h)\) with
\(\widehat{\gamma}(h)\).
Applying the large sample results for Yule–Walker estimators to the Durbin-Levinson algorithm,
we get the large sample distribution of the PACF \(\widehat{\phi}_{hh}\).
n <- 1e6
## [1] 8.638448
astsa::acf1(
series = v_x,
max.lag = 2
)
## [1] 0.86 0.54
astsa::acf2(
series = v_x,
max.lag = 3
)
## [,1] [,2] [,3]
Manually estimate model parameters. Note: the time series was generated using a random
number generator and the estimated values may change with different runs. Also rounding will
change the output.
solve(matrix(
data = c(1,0.86,0.86,1),
nrow = 2
)) %*% matrix(
data = c(0.86,0.54),
ncol = 1
)
## [,1]
## [1,] 1.5192012
## [2,] -0.7665131
8.64 * (1 - sum(c(0.86,0.54)*c(1.52,-0.77)))
## [1] 0.938304
(1/n)*(0.94/8.67) * solve(matrix(
data = c(1,0.86,0.86,1),
nrow = 2
))
## [,1] [,2]
ar.ols(
x = v_x,
intercept = FALSE,
order.max = 2,
demean = FALSE
)
##
## Call:
## ar.ols(x = v_x, order.max = 2, demean = FALSE, intercept = FALSE)
##
## Coefficients:
## 1 2
## 1.5003 -0.7504
##
## Order selected 2 sigma^2 estimated as 1.001
Load data
rm(
list = ls()
)
options(
digits = 2,
scipen = 999
)
data(
list = "rec",
package = "astsa"
)
ar.ols(
x = rec,
order = 2
)
##
## Call:
## ar.ols(x = rec, order.max = 2)
##
## Coefficients:
## 1 2
## 1.35 -0.46
##
## Intercept: -0.0564 (0.446)
##
## Order selected 2 sigma^2 estimated as 89.7
ar.yw_rec = ar.yw(
x = rec,
order = 2
)
ar.yw_rec$x.mean # = 62.26278 (mean estimate)
## [1] 62
sqrt(
x = diag(
x = ar.yw_rec$asy.var.coef
)
) # = .04222637, .04222637 (standard errors)
## [1] 95
Obtain the 24 month ahead predictions and their standard errors, and then plot the results.
predict_rec = predict(
object = ar.yw_rec,
n.ahead = 24
)
U = predict_rec$pred + predict_rec$se
L = predict_rec$pred - predict_rec$se
astsa::tsplot(
x = cbind(
rec, predict_rec$pred
),
spag = TRUE,
xlim = c(1980,1990),
ylab = "Recruitment"
)
lines(
x = predict_rec$pred,
col = 2,
type = "o"
)
lines(
x = U,
col = 4,
lty = 2
)
lines(
x = L,
col = 4,
lty = 2
)
For AR(p) models, the Yule–Walker estimators are optimal in the sense that the asymptotic
distribution is the best asymptotic normal distribution.
This is because, given initial conditions, AR(p) models are linear models, and the Yule–Walker
estimators are essentially least squares estimators.
If we use method of moments for MA or ARMA models, we will not get optimal estimators
because such processes are nonlinear in the parameters.
Let’s use the variance, autocovariance and autocorrelation to estimate the parameter.
Now let’s use method of moments. We will use the quadratic equation to estimate the
parameter.
\[ \widehat{\rho}(1) = \dfrac{\widehat{\theta}}{1+\widehat{\theta}^2} \\ \widehat{\theta} = \dfrac{1
\pm \sqrt{1 - 4\widehat{\rho}(1)^2}}{2\widehat{\rho}(1)} \]
To eliminate complex numbers, we restrict \(|\widehat{\rho}(1)| \leq 1/2\). There are two
estimates from the quadratic equation. The one using addition has a vertical asymptote at
\(\widehat{\rho}(1) = 0\), while the minus solution has a hole discontinuity at \(\widehat{\rho}(1)\).
We go with the minus solution.
from = -0.5,
to = 0.5,
by = 0.0001
)
theta_p <- (1 + sqrt(1 - 4*(rho_1^2)))/(2*rho_1)
theta_m <- (1 - sqrt(1 - 4*(rho_1^2)))/(2*rho_1)
plot(
x = rho_1,
y = theta_p,
pch = ".",
ylim = c(-100,100)
)
plot(
x = rho_1,
y = theta_m,
pch = "."
)
Using the delta method, we can see that
• For \(|\widehat{\rho}(1)| > 1/2\) we get complex estimates which do not make sense.
• There are two solutions, requiring the statistician to figure out which one to use.
• One of the solutions has a vertical asymptote with respect to \(\widehat{\rho}(1)\) which
is unreasonable.
• For large samples, the method of moments estimate will have greater variance than the
maximum likelihood estimate.
set.seed(
seed = 20230114
)
sarima.sim_ma1 = astsa::sarima.sim(
ma = 0.9,
n = 50
)
astsa::acf1(
series = sarima.sim_ma1,
max.lag = 1,
plot = FALSE
) # [1] .536 (lag 1 sample ACF)
## [1] 0.46
astsa::acf1(
series = sarima.sim_ma1,
max.lag = length(
x = sarima.sim_ma1
) - 1,
plot = TRUE
)
## [1] 0.46 -0.08 -0.25 -0.23 0.04 0.05 -0.07 -0.11 -0.04 0.08
0.09 0.08
## [13] 0.00 0.10 0.10 -0.19 -0.29 -0.18 -0.01 0.08 -0.01 -0.09 -
0.04 0.07
## [25] 0.14 0.07 -0.09 -0.20 -0.06 0.04 0.04 0.03 -0.05 -0.01
0.00 -0.03
## [37] 0.02 -0.03 -0.06 -0.04 -0.02 -0.01 0.01 0.06 0.04 0.03
0.04 0.01
## [49] 0.01
3 ARIMA Models - 3.5.2 Estimation
- Maximum Likelihood Estimation
Aaron Smith
2023-01-27
This code is modified from Time Series Analysis and Its Applications, by Robert H. Shumway,
David S. Stoffer https://github.com/nickpoison/tsa4
The webpages for the texts and some help on using R for time series analysis can be found at
https://nickpoison.github.io/.
\[ x_t = \mu + \phi(x_{t-1}-\mu) + w_t \\ |\phi| < 1 \\ w_t \sim iid N(0,\sigma_w^2) \]
• density functions: the observations are variables, the parameter values are constant
(data has not been collected yet)
• likelihood functions: the parameters are the variables, the observations are constant
(after data collection)
When we do not plug-in values for the data nor the parameters, the likelihood function and the
density are equal to each other.
\[ L(\mu,\phi.\sigma_w^2|x_1,x_2,...,x_n) = f(x_1,x_2,...,x_n|\mu,\phi.\sigma_w^2) \]
Since our process is AR(1), each observation depends of the prior data point.
\[ L(\mu,\phi.\sigma_w^2|x_1,x_2,...,x_n) =
f(x_1|\mu,\phi.\sigma_w^2)f(x_2|x_1,\mu,\phi.\sigma_w^2)f(x_3|x_2,\mu,\phi.\sigma_w^2)...f(x_
n|x_{n-1},\mu,\phi.\sigma_w^2) \]
If we isolate \(w_t\) in our process equation and consider that it is normally distributed, \(w_t
\sim N(0,\sigma_w^2)\), we get
To get a better grasp of \(x_1\), we take its causal representation. We see that \(x_1\) is normal
with expected value \(\mu\) and variance \(\dfrac{\sigma_w^2}{1 - \phi^2}\).
\[\begin{align} x_1 =& \mu + \sum_{j = 0}^{\infty}\phi^j w_{1-j} \\ E(x_1) =& E\left(\mu + \sum_{j =
0}^{\infty}\phi^j w_{1-j}\right) = \mu + \sum_{j = 0}^{\infty}\phi^j E(w_{1-j})= \mu \\ V(x_1) =&
V\left(\mu + \sum_{j = 0}^{\infty}\phi^j w_{1-j}\right)\\ =& E\left(\sum_{j = 0}^{\infty}\phi^j w_{1-
j}\sum_{k = 0}^{\infty}\phi^k w_{1-k}\right) \\ =& E\left(\sum_{j = 0}^{\infty}\sum_{k =
0}^{\infty}\phi^{j+k} w_{1-j} w_{1-k}\right)\\ =& E\left(\sum_{j = 0}^{\infty}\phi^{2j} w_{1-j}^2\right)
+ E\left(\sum_{j\neq k}\phi^{j+k} w_{1-j} w_{1-k}\right)\\ =& \sum_{j = 0}^{\infty}\phi^{2j}
E\left(w_{1-j}^2\right) + \sum_{j\neq k}\phi^{j+k} E\left(w_{1-j} w_{1-k}\right)\\ =&
\sigma_w^2\sum_{j = 0}^{\infty}\phi^{2j} \\ =& \dfrac{\sigma_w^2}{1 - \phi^2} \end{align}\]
Setting the partial derivative to zero and solving, we get maximum likelihood estimator of our
white noise variance.
\[ \widehat{\sigma}_w^2 = \dfrac{S(\widehat{\mu},\widehat{\phi})}{n} \]
This is a biased estimator. If the replace \(n\) in the denominator with \(n-2\), we get the
unbiased unconditional least squares estimate.
We do not need to run the second derivative test. The likelihood function is smooth and positive
over the positive real line and it will go to zero to the right, hence the only point where the
derivative is zero has to be a maximum.
If we take the logarithm of the likelihood function and drop constants, we get an criterion
function for minimization.
This criterion function is rather complicated. We could reasonably simplify the situation by
ignoring the first observation.
Notice that we can adjust the simplier white noise variance estimator to be unbiased.
\[ \widehat{\sigma}_w^2 = \dfrac{S_c(\widehat{\mu},\widehat{\phi})}{n-3} \]
For general AR(p) models, maximum likelihood estimation, unconditional least squares, and
conditional least squares are analogous to the AR(1) example.
For general ARMA models, it is difficult to explicitly write the likelihood function. Instead we
write the likelihood in terms of the innovations, or one-step-ahead prediction errors.
\[ x_t - x_{t}^{t-1} \]
Dr. Smith skipped material on estimating ARMA(p,q) models with maximum likelihood. The
material in the textbook is very general is not specific for our usage.
Example 3.31
Let’s fit a AR(2) model to the Recruitment time series using maximum likelihood.
Load data
rm(
list = ls()
)
options(
digits = 2,
scipen = 999
)
data(
list = "rec",
package = "astsa"
)
ar.mle_rec = ar.mle(
x = rec,
order = 2
)
ar.mle_rec$x.mean
## [1] 62
ar.mle_rec$ar
sqrt(
x = diag(
x = ar.mle_rec$asy.var.coef
)
) # standard errors
ar.mle_rec$var.pred
## [1] 89
Let \(w_t(\beta)\) be the error between the observed value and modeled value immediately
before \(x_t\) is observed.
Set
Let \(\theta_{(0)}\) be an initial guess of \(\theta\). The Gauss–Newton procedure for conditional
least squares is given by
\[ \theta_{(j + 1)} = \theta_{(j)} + \dfrac{\sum_{t = 1}^{n}z_t(\theta_{(j)})w_t(\theta_{(j)})}{\sum_{t =
1}^{n}z^2_t(\theta_{(j)})} \]
The ACF and PACF plots show that a MA(1) model is appropriate.
data(
list = "varve",
package = "astsa"
)
diff_log_varve = diff(
x = log(
x = varve
)
) # data
r <- astsa::acf1(
series = diff_log_varve,
max.lag = 1,
plot = FALSE
) # acf(1)
astsa::acf1(
series = diff_log_varve,
max.lag = length(
x = diff_log_varve
) - 1,
plot = TRUE
)
## [1] -0.40 -0.04 -0.06 0.01 0.00 0.04 -0.04 0.04 0.01 -0.05
0.06 -0.06
## [13] -0.04 0.08 -0.02 0.01 0.00 0.03 -0.05 -0.06 0.07 0.04 -
0.06 0.05
## [25] -0.01 -0.04 0.05 -0.05 0.03 -0.02 0.00 0.06 -0.05 -0.03
0.04 -0.05
## [37] 0.03 -0.06 0.09 -0.03 0.03 -0.01 -0.02 -0.04 -0.01 0.08 -
0.06 0.08
## [49] -0.08 0.05 -0.06 0.03 -0.01 0.03 -0.03 0.02 0.01 0.00 -
0.03 0.02
## [61] -0.03 0.01 0.03 0.04 -0.07 -0.03 0.03 0.00 0.02 -0.01
0.00 0.05
## [73] -0.06 -0.02 -0.01 0.07 -0.04 0.03 -0.07 0.04 0.02 0.01 -
0.06 -0.02
## [85] 0.09 -0.04 0.00 -0.02 0.03 0.03 -0.05 0.01 0.00 0.03
0.01 -0.05
## [97] 0.01 0.00 0.04 -0.04 -0.04 0.04 0.02 0.03 -0.03 -0.05 -
0.01 0.04
## [109] -0.02 0.02 0.01 -0.05 0.05 -0.03 0.00 0.02 -0.02 0.02
0.03 -0.04
## [121] -0.03 0.04 -0.05 0.07 -0.06 0.06 -0.04 0.02 -0.01 0.00
0.01 0.00
## [133] -0.01 0.01 0.04 -0.03 -0.04 0.07 -0.04 -0.01 0.00 0.04 -
0.01 -0.02
## [145] 0.02 -0.10 0.10 -0.06 0.04 0.03 -0.05 0.02 0.03 -0.04 -
0.06 0.10
## [157] -0.04 0.06 -0.01 -0.10 0.08 -0.04 -0.01 0.02 0.01 -0.04
0.06 -0.06
## [169] 0.02 0.01 -0.05 0.07 -0.02 0.07 -0.03 -0.03 0.01 -0.07
0.06 0.05
## [181] -0.07 0.00 0.05 -0.04 -0.04 0.03 -0.02 0.07 0.00 -0.09
0.08 -0.05
## [193] 0.02 0.02 0.00 -0.03 0.05 -0.01 -0.05 0.00 -0.01 0.00
0.03 0.04
## [205] -0.05 -0.03 0.00 0.03 0.01 -0.05 0.03 -0.03 0.04 -0.02
0.02 0.02
## [217] -0.04 -0.02 0.03 0.01 0.01 -0.04 0.04 -0.05 0.02 0.00 -
0.01 0.05
## [229] -0.05 0.04 -0.05 0.03 -0.01 0.03 -0.06 0.02 0.01 -0.02
0.04 -0.04
## [241] 0.01 0.02 -0.01 0.02 -0.01 0.01 -0.02 -0.04 0.03 -0.01
0.01 0.02
## [253] -0.07 0.05 -0.01 0.02 -0.02 -0.01 0.01 0.04 -0.03 -0.02
0.01 0.00
## [265] 0.03 -0.03 0.02 -0.01 0.04 -0.09 0.04 -0.02 0.03 0.00 -
0.06 0.06
## [277] -0.03 0.04 -0.06 0.06 -0.05 0.01 0.02 -0.01 0.04 -0.01 -
0.01 0.00
## [289] -0.01 0.01 0.03 -0.03 0.00 0.00 -0.01 0.05 -0.02 -0.05
0.03 -0.01
## [301] 0.04 -0.07 0.03 -0.04 0.07 0.00 -0.03 0.03 -0.03 0.00
0.02 -0.02
## [313] -0.01 0.01 0.02 -0.05 0.06 -0.05 0.01 0.00 0.02 0.02 -
0.04 0.01
## [325] 0.00 0.02 -0.03 0.01 -0.04 0.05 -0.02 0.03 0.00 -0.09
0.07 0.02
## [337] -0.03 -0.01 -0.01 -0.02 0.05 -0.06 0.02 0.05 -0.04 0.00
0.04 -0.06
## [349] 0.05 -0.06 0.01 0.00 0.02 0.02 -0.04 -0.01 0.00 0.04 -
0.04 -0.01
## [361] 0.04 -0.01 0.01 -0.04 0.05 -0.06 0.06 0.00 -0.04 0.03 -
0.01 0.01
## [373] -0.02 -0.01 0.02 0.00 0.01 -0.01 -0.04 0.03 0.00 0.01
0.02 -0.02
## [385] -0.02 0.01 0.01 0.00 0.01 -0.02 0.01 -0.01 0.00 0.00
0.02 0.00
## [397] -0.01 -0.01 0.02 0.00 -0.02 0.02 0.00 0.02 -0.02 -0.02
0.02 -0.02
## [409] 0.03 -0.04 0.03 -0.01 0.01 -0.02 0.02 -0.02 -0.01 0.00
0.03 -0.03
## [421] 0.05 -0.06 0.04 -0.02 0.04 -0.05 0.02 -0.03 0.06 -0.03
0.01 -0.04
## [433] 0.02 0.01 0.04 -0.03 0.00 -0.01 0.00 0.00 0.00 -0.01
0.03 -0.02
## [445] -0.02 0.05 -0.04 0.02 -0.03 0.03 0.00 0.01 -0.01 -0.02
0.01 -0.01
## [457] 0.02 0.00 0.00 0.01 -0.03 0.03 -0.02 0.00 0.00 0.01
0.02 -0.03
## [469] 0.02 -0.01 0.01 0.00 0.03 -0.04 0.02 -0.04 0.05 -0.03
0.01 0.01
## [481] 0.00 -0.02 0.02 -0.01 -0.01 -0.01 0.01 0.01 0.00 -0.03
0.05 -0.05
## [493] 0.02 0.03 -0.04 0.01 0.02 0.00 0.01 -0.01 0.00 0.01 -
0.01 0.02
## [505] 0.03 -0.05 -0.01 0.03 0.00 0.00 -0.01 0.02 -0.01 -0.02
0.00 0.02
## [517] -0.03 0.02 0.01 -0.01 -0.01 0.01 0.00 0.00 -0.01 0.01 -
0.01 0.00
## [529] 0.00 0.02 -0.04 0.03 -0.01 0.01 -0.03 0.01 0.01 -0.01 -
0.01 0.02
## [541] 0.00 -0.03 0.04 -0.01 -0.01 -0.01 0.01 0.00 0.00 0.00
0.02 -0.03
## [553] -0.01 0.03 -0.01 0.00 0.00 -0.01 0.02 -0.02 0.03 -0.01 -
0.02 0.00
## [565] 0.03 -0.02 0.00 0.02 -0.02 0.02 -0.02 0.02 -0.01 0.00
0.00 0.01
## [577] -0.01 -0.01 0.01 0.01 -0.01 -0.01 0.00 0.02 -0.02 0.01 -
0.01 0.02
## [589] -0.02 0.02 -0.01 0.00 0.01 -0.02 0.01 0.00 0.01 -0.01
0.00 0.00
## [601] 0.01 0.00 0.00 0.01 -0.02 0.01 0.01 0.00 -0.01 -0.01
0.02 -0.01
## [613] 0.00 0.00 0.00 -0.01 0.00 0.01 -0.01 0.00 0.00 0.01 -
0.01 0.00
## [625] 0.01 0.00 -0.01 0.00 0.00 0.00 0.00 0.00
w <- 0
z <- 0
Sc <- 0
Sz <- 0
Szw <- 0
para <- 0
length_varve = length(
x = diff_log_varve
) # 633
Gauss-Newton Estimation
niter <- 20
for (j in 1:niter){
for(t in 2:length_varve){
w[t] <- diff_log_varve[t] - para[j]*w[t-1]
z[t] <- w[t-1] - para[j]*z[t-1]
}
Sc[j] <- sum(
x = w^2
)
Sz[j] <- sum(z^2)
Szw[j] <- sum(z*w)
para[j+1] <- para[j] + Szw[j]/Sz[j]
}
Results
cbind(
iteration = 1:niter-1,
thetahat = para[1:niter],
Sc,
Sz
)
## iteration thetahat Sc Sz
w <- 0
cSS <- 0
th = seq(
from = -0.3,
to = -0.94,
by = -0.01
)
for(p in 1:length(th)){
for(t in 2:length_varve){
w[t] <- diff_log_varve[t] - th[p]*w[t-1]
}
cSS[p] <- sum(
x = w^2
)
}
astsa::tsplot(
x = th,
y = cSS,
ylab = expression(S[c](theta)),
xlab = expression(theta)
)
abline(
v = para[1:length(Sc)],
lty = 2,
col = 4
) # add previous results to plot
points(
x = para[1:length(Sc)],
y = Sc,
pch = 16,
col = 4
)
In the general case of causal and invertible ARMA(p, q) models, maximum likelihood estimation
and conditional and unconditional least squares estimation (and Yule–Walker estimation in the
case of AR models) all lead to optimal estimators.
AR(2):
MA(1):
We handle this the same way as the AR(1) case. \(\theta(B)y_t = w_t\) or \(y_t + \theta y_{t-1} =
w_t\).
\[ \gamma_y(0) = \dfrac{\sigma_w^2}{1-\theta^2} \rightarrow \sigma_w^2 \Gamma_{0,1}^{-1} =
1-\theta^2 \]
MA(2):
Again we treat the moving average coefficients like autoregressive coefficients. Notice the sign
changes compared the AR(2) case.
ARMA(1,1):
This code is modified from Time Series Analysis and Its Applications, by Robert H. Shumway,
David S. Stoffer https://github.com/nickpoison/tsa4
The webpages for the texts and some help on using R for time series analysis can be found at
https://nickpoison.github.io/.
Suppose a time series follows an AR(1) process, but we decide to fit an AR(2) model.
The correct model would have this asymptotic distribution for the parameter.
But because of our model selection error, the parameters have this asymptotic distribution.
(Take the AR(2) distribution from the previous section and plug in zero for \(\phi_2\).)
\[ x_t = \beta z_t + w_t \\ \widehat{\beta} \text{ is asymptotically normal with mean zero} \\
V\left(\sqrt{n}(\widehat{\beta} - \beta)\right) = n\sigma_w^2\left(\sum_{t = 1}^{n}z_t^2\right)^{-1}
= \sigma_w^2\left(\dfrac{1}{n}\sum_{t = 1}^{n}z_t^2\right)^{-1} \]
AR(1):
MA(1):
Let’s repeat this approach for an MA(1) process. Take the Gauss-Newton setup from the
previous section.
Once again when the expected value of our time series is zero, \(\dfrac{1}{n}\sum_{t=2}^{n}z_{t-
1}^2(\widehat{\theta})\) is the sample variance using the population mean instead of the sample
mean. Thus it will converge to the population variance.
\[ \sigma_w^2\left(\dfrac{1}{n}\sum_{t = 2}^{n}z_{t-1}^2(\widehat{\theta})\right)^{-1}
\xrightarrow{p} \sigma_w^2\left(\dfrac{\sigma_w^2}{1 - (-\theta)^2}\right)^{-1} = 1 - \theta^2 \]
If \(n\) is small, or if the parameters are close to the causal boundaries, the asymptotic
approximations can be quite poor. The bootstrap can be helpful in this case.
Once again, we will use the AR(1) case to provide insight into other time series models.
We consider an AR(1) model with a regression coefficient near the boundary of causality and
an error process that is symmetric but not normal.
\[ x_t - \mu = \phi (x_{t-1} - \mu) + w_t \\ \mu = 50 \\ \phi = 0.95 \\ f(w) = \dfrac{1}{2\beta} e^{-
|w|/\beta}, \ w \in \mathbb{R} \\ \beta = 2 \\ E(w_t) = 0 \\ V(w_t) = 2\beta^2 = 8 \]
generate data
The time series plot looks non-stationary in the mean, but we know that it is stationary.
set.seed(
seed = 20230115
)
# VGAM::rlaplace would have been better
rexp_0.5 = rexp(
n = 150,
rate = 0.5
)
runif_sign = runif( # sample with -1,1 would have been better
n = 150,
min = -1,
max = 1
)
rlaplace_0.5 = rexp_0.5*sign(
x = runif_sign
)
sarima.sim_laplace = 50 + astsa::sarima.sim(
n = 100,
ar = 0.95,
innov = rlaplace_0.5,
burnin = 50
)
astsa::tsplot(
x = sarima.sim_laplace,
ylab = expression(X[~t])
)
use Yule-Walker estimate on the data to estimate values
set.seed(
seed = 20230115
) # not that 666
ar.yw_laplace <- ar.yw(
x = sarima.sim_laplace,
order = 1
) # assumes the data were retained
mean_laplace <- ar.yw_laplace$x.mean # estimate of mean
phi <- ar.yw_laplace$ar # estimate of phi
ar.yw_laplace[c("x.mean","ar","var.pred")]
## $x.mean
## [1] 41.7783
##
## $ar
## [1] 0.8555428
##
## $var.pred
## [1] 9.996455
Let’s run simulations with the known parameters to assess the distribution. This will give us
optimistic results compared to what we will get when estimating the coefficient using estimated
values.
set.seed(
seed = 20230209
)
phi.yw <- rep(NA,1000)
for(i in 1:1000){
e <- rexp(
n = 150,
rate = 0.5
)
u <- runif(
n = 150,
min = -1,
max = 1
)
de <- e*sign(u)
x <- 50 + arima.sim(
n = 100,
model = list(
ar = 0.95
),
innov = de,
n.start = 50
)
phi.yw[i] <- ar.yw(
x = x,
order = 1
)$ar
}
mean(
x = phi.yw
)
## [1] 0.8903678
hist(
x = phi.yw
)
phi.yw_0 <- phi.yw
Bootstrap
Notice that the previous calculations and equations used the true values of the parameters, not
the estimated values of the parameters.
Now let’s proceed as if we did not know the parameters of the model. This will simulate using
bootstrap on real data.
nboot = 250 # number of bootstrap replicates
seed = 20230115
)
phi.yw <- rep(
x = NA,
times = 1000
)
for (i in 1:1000){
rexp_0.5 <- rexp(
n = 150,
rate = 0.5
);
runif_sign <- runif(
n = 150,
min = -1,
max = 1
);
rlaplace_0.5 <- rexp_0.5*sign(
x = runif_sign
)
arima.sim_laplace <- 50 + arima.sim(
n = 100,
list(
ar = 0.95
),
innov = rlaplace_0.5,
n.start = 50
)
phi.yw[i] <- ar.yw(
x = arima.sim_laplace,
order = 1
)$ar
}
Picture
hist(
x = phi.star.yw,
breaks = 15,
main = "",
prob = TRUE,
xlim = c(
0.65,1.05
),
ylim = c(
0,14
),
col = astsa::astsa.col(
col = 4,
alpha = 0.3
),
xlab = expression(hat(phi))
)
lines(
x = density(
x = phi.yw,
bw = 0.02
),
lwd = 2
)
curve(
expr = dnorm(
x = x,
mean = 0.95,
sd = 0.03
),
from = 0.75,
to = 1.1,
lty = 2,
lwd = 2,
add = TRUE
)
legend(
x = 0.65,
y = 14,
bty = 'n',
lty = c(
1,0,2
),
lwd = c(
2,0,2
),
col = 1,
pch = c(
NA,22,NA
),
pt.bg = c(
NA,astsa::astsa.col(
col = 4,
alpha = 0.3
),NA
),
pt.cex = 2.5,
legend = c(
'true distribution', 'bootstrap distribution', 'normal approximation'
)
)
3 ARIMA Models - 3.5.3 Estimation
- Bootstraping Autoregressive
Coefficients
Aaron Smith
2023-02-10
digits = 2,
scipen = 999
)
data(
list = "rec",
package = "astsa"
)
x = rec,
aic = TRUE,
order.max = 2
)
Then use the residuals to generate a simulated an AR(2) process with the fitted coefficients as
the coefficients.
X = list_ar.yw,
FUN = function(x) x$ar
))
M_bootstrap <- as.data.frame(
x = M_bootstrap
)
colnames(M_bootstrap) <- c("phi_1","phi_2")
summary(
object = M_bootstrap
)
## phi_1 phi_2
ggplot(M_bootstrap) +
aes(x = phi_1,y = phi_2) +
geom_point(alpha = 0.1) +
geom_density_2d(linewidth = 1) +
geom_point(data = data.frame(phi_1 = mean(M_bootstrap$phi_1),phi_2 =
mean(M_bootstrap$phi_2)),size = 3,color = "red") +
theme_bw() +
labs(
title = "Bootstrap coefficients of the AR(2) model\nwith bootstrap estimated density"
)
Use the bootstrap observations to fit a
bivariate distribution
M_grid <- expand.grid(
phi_1 = seq(
from = min(M_bootstrap$phi_1),
to = max(M_bootstrap$phi_1),
length = 200
),
phi_2 = seq(
from = min(M_bootstrap$phi_2),
to = max(M_bootstrap$phi_2),
length = 200
)
)
M_bootstrapnormal <- cbind(
M_grid,
density = mvtnorm::dmvnorm(
x = M_grid,
mean = colMeans(M_bootstrap),
sigma = cov(M_bootstrap)
)
)
ggplot(M_bootstrapnormal) +
aes(x = phi_1,y = phi_2) +
geom_point(data = M_bootstrap,alpha = 0.1) +
geom_contour(mapping = aes(z = density),lwd = 1) +
geom_point(data = data.frame(phi_1 = mean(M_bootstrap$phi_1),phi_2 =
mean(M_bootstrap$phi_2)),size = 3,color = "red") +
theme_bw() +
labs(
title = "Bootstrap coefficients of the AR(2) model\nwith bootstrap estimated parameters of
bivariate normal"
)
M_grid,
density = mvtnorm::dmvnorm(
x = M_grid,
mean = ar.yw_rec$ar,
sigma = ar.yw_rec$asy.var.coef
)
)
ggplot(M_bivariatenormal) +
aes(x = phi_1,y = phi_2) +
geom_point(data = M_bootstrap,alpha = 0.1) +
geom_contour(mapping = aes(z = density),lwd = 1) +
geom_point(data = data.frame(phi_1 = ar.yw_rec$ar[1],phi_2 = ar.yw_rec$ar[2]),size =
3,color = "red") +
theme_bw() +
labs(
title = "Bootstrap coefficients of the AR(2) model\nwith asymptotic normal distribution"
)