Professional Documents
Culture Documents
Assigment
Assigment
% Lachaise Assignment
% LaTeX Template
% Version 1.0 (26/6/2018)
%
% This template originates from:
% http://www.LaTeXTemplates.com
%
% Authors:
% Marion Lachaise & François Févotte
% Vel (vel@LaTeXTemplates.com)
%
% License:
% CC BY-NC-SA 3.0 (http://creativecommons.org/licenses/by-nc-sa/3.0/)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%----------------------------------------------------------------------------------
------
% PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
%----------------------------------------------------------------------------------
------
\documentclass{article}
%----------------------------------------------------------------------------------
------
% ASSIGNMENT INFORMATION
%----------------------------------------------------------------------------------
------
%----------------------------------------------------------------------------------
------
\begin{document}
%----------------------------------------------------------------------------------
------
% INTRODUCTION
%----------------------------------------------------------------------------------
------
%----------------------------------------------------------------------------------
------
% PROBLEM 1
%----------------------------------------------------------------------------------
------
\begin{question}
For this question, we will switch gears and use R to impute the missing
values. Before answering the questions, first install the R package "mice". Use the
R code "mice.R" provided under the assignment folder (make changes as you see fit)
and answer the following questions:
% Subquestions numbered with letters
\begin{enumerate}[(a)]
\item Looking at the correlation among predictors, do you think it is a
good idea to use regression model of all predictors for missing value imputation?
Why or why not.
\item Use regression, Bayesian regression and predictive mean matching
as the imputation model, how does the correlation among predictors change after
imputation? What about regression results, are there any noticeable differences?
\end{enumerate}
\end{question}
\begin{question}
Let $y_1,y_2,\cdots,y_n$ be i.i.d samples from the following distributions.
For each of the following distributions, do the following: 1) check the wiki page
(your best friend) for the distribution if you are not familiar with it, 2) derive
the MLE of ALL unknown parameters and 3) check the second derivative to make sure
it is indeed the maximum.
% Subquestions numbered with letters
\begin{enumerate}[(a)]
\item Normal distribution with unknown mean $\mu$ and variance $\
sigma^2$, $N(\mu,\sigma^2)$:
\begin{equation}
f(y)=\frac{1}{\sqrt{2\pi\sigma^2}}\exp(-\frac{(y-\mu)^2}{2\sigma^2})
\end{equation}
\item Exponential distribution with unknown rate $\lambda$, $\exp(\
lambda)$:
\begin{equation}
f(y)=\lambda \exp(-\lambda y)
\end{equation}
\item Poisson distribution (discrete) with unknown mean $\lambda$,
$Pois(\lambda)$:
\begin{equation}
P(Y_i=y_i)=\frac{\lambda^{y_i}\exp(-\lambda)}{y_i!}
\end{equation}
\item Binomial distribution with \textbf{known} number of trails $N$
and \textbf{unknown} success probability p, $Binom(N,p)$:
\begin{equation}
P(Y_i=y_i)=\frac{N!}{y_i!(N-y_i)!}p^{y_i}(1-p)^{N-y_i}
\end{equation}
\item (Optional) Let $y_1=y$ be the only sample from a binomial
distribution with \textbf{unknown} number of trails $N$ and \textbf{known} success
probability p, $Binom(N,p)$:
\end{enumerate}
\end{question}
\begin{question}
In the file "tdist.csv", you will find 200 samples from a t-distribution with
one degrees of freedom and unknown location parameter $\mu$, $t(\mu,1)$:
\begin{enumerate}[(a)]
\item Given the probability density function of a t-distribution $t(\
mu,1)$:
\begin{equation}
f(y)=\frac{1}{\Gamma(\frac{1}{2})\sqrt{\pi}}(1+(y-\mu)^2)^{-1}.
\end{equation}
Write down the log-likelihood function of 200 samples, derive the first
and second derivative of the log-likelihood function with respect to $\mu$, can you
find $\hat{\mu} _{MLE}$ analytically?
\item Write a program using Newton-Raphson method to find $\hat{\mu}_{MLE}$
numerically.
\end{enumerate}
\end{question}
\begin{question}
In the file "probit.csv", you will find 400 samples generated from a probit
regression model:
\begin{equation}
y_i = \begin{cases} 0 &\mbox{if } \beta_0+x_i\beta_1+\epsilon_i<0\\
1 & \mbox{if } \beta_0+x_i\beta_1+\epsilon_i>0 \end{cases}, \quad \
epsilon_i\sim N(0,1).
\end{equation}
Write a program using Newton-Raphson method to find $\hat{\beta_0}$ and $\
hat{\beta_1}$ numerically.
\end{question}
\begin{question}
\begin{enumerate}[(a)]
\item The python code "sim\_regression.py" includes a function that
generates n samples from the following regression model:
\begin{equation}
y_i=\beta_0+\beta_1x_i+\epsilon_i, \quad \epsilon_i\sim N(0,\
sigma^2), \quad \beta_0=3,\beta_1=2,\sigma^2=1,
\end{equation}
\item Generate 1,000 different datasets with $n=20$. For each generated
data, find $\hat{\beta_1}$, calculate the standard error $SE(\hat{\beta_1})$ based
on asymptotic distribution and construct the 95\% confidence interval :
\begin{equation}
(\hat{\beta_1}-2SE(\hat{\beta_1}),\hat{\beta_1}+2SE(\hat{\beta_1}))
\end{equation}
Count the number of times the true parameter value $\beta_1$ is contained in
the 95\% CI.
\item Repeat the same exercise in a), but this time change
$n=50$.
\item Repeat the same exercise in a), but this time change
$n=300$.
\end{enumerate}
\end{question}
\begin{question}
Generate one dataset with $n=40$ using "sim\_regression.py".
\begin{enumerate}[(a)]
\item find $\hat{\beta_1}$, calculate the standard error $SE(\hat{\
beta_1})$ based on asymptotic distribution and construct the 95\% confidence
interval.
\item Calculate the standard error $SE(\hat{\beta_1})$ using bootstrap
and construct the 95\% confidence interval. For bootstrap, generate 100 bootstrap
samples by drawing $n=40$ samples from the generated dataset (with
replacement). Compare the 95\% confidence interval, which one do you prefer in this
case and why?
\end{enumerate}
\end{question}
%----------------------------------------------------------------------------------
------
\end{document}