\documentclass[nojss]{jss}

\usepackage{amsmath,amssymb,array}
\usepackage{booktabs}
\usepackage{bm}


\usepackage{Sweave}
%\usepackage{thumbpdf}

%% new commands
\newcommand{\class}[1]{``\code{#1}''}
\newcommand{\fct}[1]{\code{#1()}}

\SweaveOpts{engine=R, eps=FALSE, keep.source = TRUE}
<<options, echo=FALSE>>=
options(prompt = "R> ", digits = 4, show.signif.stars = FALSE)
@
%%\VignetteIndexEntry{Bayesian Robust Generalized Mixed Models for Longitudinal Data}
%%\VignettePackage{BayesRGMM}
%%\VignetteDepends{BayesRGMM}
%%\VignetteKeywords{Bayesian, longitudinal, mixed-effect, R, robust}


\author{Kuo-Jung Lee \& Ray-Bing Chen \& Hsing-Ming Chang\\National Cheng Kung University at Taiwan
   \AND Keunbaik Lee \& Chanmin Kim \\Sungkyunkwan University at Korea}
\Plainauthor{Kuo-Jung Lee, Ray-Bing Chen, Keunbaik Lee,Chanmin Kim}
\title{Bayesian Robust Generalized Mixed Models for Longitudinal Data}
\Plaintitle{Applied Econometrics with R: Package Vignette and Errata}
\Shorttitle{\pkg{BayesRGMM}: Package Vignette}

\Keywords{Bayesian, Longitudinal, Mixed-Effect, \proglang{R}, Robust}
\Plainkeywords{Bayesian, longitudinal, mixed-effect, R, robust}

\Address{
  Kuo-Jung Lee \\
  Deparmtne of Statistis and Institute of Data Science\\
  National Cheng Kung University\\
  E-mail: \email{kuojunglee@ncku.edu.tw}\\
  URL: \url{https://sites.google.com/view/kuojunglee}\\
  
  Ray-Bing Chen \\
  Deparmtne of Statistis and Institute of Data Science\\
  National Cheng Kung University, Taiwan\\
  E-mail: \email{rbchen@ncku.edu.tw}\\
  URL: \url{https://sites.google.com/view/ray-bingchenswebsite/home}\\
  
  Keunbaik Lee \\
  Department of Statistics\\
  Sungkyunkwan University, Korea\\
  E-mail: \email{keunbaik@skku.edu}\\
  URL: \url{https://ecostat.skku.edu/eng_ecostat/intro/faculty_stat.do}\\
  
  Chanmin Kim \\
  Department of Statistics\\
  Sungkyunkwan University, Korea\\
  E-mail: \email{ckim.bayes@gmail.com}\\
  URL: \url{https://ecostat.skku.edu/eng_ecostat/intro/faculty_stat.do}\\
}


\Abstract{
\pkg{BayesRGMM} has the functionality to deal with the incomplete longitudinal studies on binary and ordinal outcomes that are measured repeatedly on subjects over time with drop-outs. Here, we briefly describe the background of methodology and provide an overview of the contents in \pkg{BayesRGMM}.
}

\begin{document}
\SweaveOpts{concordance=TRUE}
\section{Main Methodology}
Denote the response vector for the $i$th subject by $\bm{y}_i=(y_{i1},\cdots,y_{it},\cdots,y_{in_i})'$ where $y_{it}$ is a response at time period $t$ ($i=1,\cdots, N$; $t=1,\cdots,n_i$).  Note that the model and associated methodology can be applicable to the unequally spaced times and the distinct number of observations from subject to subject.We assume that the responses on different subjects are independent.  Also, we assume that $y_{it}$'s are conditionally independent given a random vector $b_{i}$, and that $y_{it}$'s.  For categorical responses, we assume that $y_{it}$ has an exponential family distribution so that generalized linear models (GLMs) can be specified by
\begin{eqnarray}
&&g\left\{E(y_{it})\right\}=x_{it}^T\beta+z_{it}^Tb_{i}, \label{Eq:Probit-1}\\
&&b_i=(b_{i1},\ldots,b_{iq})^T\stackrel{\mbox{indep.}}{\sim} N(0,v_i^{-1}\Sigma), \notag\\
&&v_i \stackrel{\mbox{indep.}}{\sim} \Gamma\left(\nu/2,\nu/2\right), \notag
\end{eqnarray}
where $\beta$ is a $p\times 1$ unknown mean parameter vectors, $x_{it}$ is a $p\times 1$ corresponding vector of covariates, $z_{it}$ is a $q\times 1$ vector,  $0$ is a $n_i\times 1$ zero vector, $\Sigma$ is a $q\times q$ covariance matrix reflecting the subject variations, and $\Gamma(a,b)$ denotes the gamma distribution with shape parameter $a$ and scale parameter $b$. In this paper, we consider the normal and binary responses and the corresponding links are identity and probit, respectively.  


To employ Markov Chain Monte Carlo algorithm techniques for Bayesian estimates and reduce the computational complexity, we introduce a latent variable latent variable $y_{it}^*$ to associate with the binary or ordinal outcome $y_{it}$ as follows, respectively. 
\begin{description}
\item [Binary outcome:] The unobservable latent variable $y_{it}^*$ and the observed binary outcome $y_{it}$ are connected by:
\[
y_{it}=\mathbf{I}_{(y_{it}^*>0)}, \quad t = 1, \ldots, n_i, 
\]
where $\mathbf{I}_{A}$ is the indicator of event $A$. Note that $y_{it}$ is 1 or 0 according to the sign of $y_{it}^*$. We assume that the latent variable is associated with explanatory variable $x_{it}$ and random effect $z_{it}$ with two different approaches to explaining the correlation of the repeated measures within a subject in next two sections. 
\item [Ordinal outcome]  The atent variable $y_{it}^*$ is associated with each ordinal response $y_{it}$. Hence, the probaility of $y_{it}=k$ is modeled through the probability of $y_{it}^*$ falling into the interval of $(\alpha_{k-1},\alpha_k]$, that is, given the random effect $b_i$,
\begin{eqnarray}\label{model-3}
  y_{it}=k \mbox{ if } \alpha_{k-1} < y_{it}^* \leq \alpha_k \mbox{ for }k=1,\ldots, K,
\end{eqnarray}
where $-\infty=\alpha_0<\alpha_1<\cdots<\alpha_K=\infty$.
As consequence, we have the following result:
\begin{eqnarray*}
p(y_{it}=k | b_i)=p(\alpha_{k-1}< y_{it}^* \leq \alpha_{k} | b_i),
\end{eqnarray*}
for $k=1,\ldots,K$.
\end{description}
We assume that the latent variable is associated with explanatory variable $x_{it}$ and random effect $z_{it}$ with two different approaches to explaining the correlation of the repeated measures within a subject in next two sections. 

\subsection{Modified Cholesky Decomposition with Hypersphere Decomposition}
We assume 
\[
y_{it}^*=x_{it}^T\beta+z_{it}^Tb_i+\epsilon_{it}, 
\] 
where $\epsilon_{it}$'s are prediction error and are assumed as
\[
\bm{\epsilon}_i=(\epsilon_{i1},\ldots,\epsilon_{in_i})^T \stackrel{indep.}{\sim} N(0,R_i)
\]
with a correlation matrix $R_i$.  Then the model~\eqref{Eq:Probit-1} is equivalent to, for $i=1, \ldots, N$ and $t=1, \ldots, n_i$, 
\begin{equation}\label{Eq:ProbitLatentVariable}
\begin{aligned}
y_{it} &= 
\begin{cases}
1, & y_{it}^*>0; \\
0 & \mbox{otherwhise}. 
\end{cases}
\end{aligned}
\end{equation}
Let $\bm{y}_i^* = (y_{i1}, \ldots, y_{in_i})'$ and rewrite~\eqref{Eq:ProbitLatentVariable} in matrix form as
\begin{eqnarray*}
\bm{y}_i^*=X_i\beta+Z_i b_i +\bm{\epsilon}_i,
\end{eqnarray*}
where $X_i$ and $Z_i$ are $n_i\times p$  and $n_i\times q$ matrices and defined as follows, respectively, 
\begin{eqnarray*}
X_i=\left(
      \begin{array}{c}
        x_{i1}^T \\
        \vdots \\
        x_{in_i}^T \\
      \end{array}
    \right),
Z_i=\left(
      \begin{array}{c}
        z_{i1}^T \\
        \vdots \\
        z_{in_i}^T \\
      \end{array}
    \right) .
\end{eqnarray*}


On account of identifiability, $R_i$ is restricted as a correlation matrix. In addition to the diagonal elements equal to 1 and off-diagonal elements between -1 and 1, $R_i$ is required to be a positive definite matrix. Moreover, the number of parameters to be estimated increases quadratically with the dimension of the matrix. In order to model $R_{i}$ being positive definite, while alleviating the computational expensive, we propose a modeling of the correlation matrix using the hypersphere decomposition (HD) approach \citep{Zhang:etal:2015}. The correlation matrix $R_i$ is reparameterized via hyperspherical coordinates \citep{Zhang:etal:2015} by the following decomposition:
\begin{eqnarray*}
R_i=F_iF_i^T,
\end{eqnarray*}
where $F_i$ is a lower triangular matrix with the $(t, j)$th element $f_{itj}$ given by
{\small
\begin{eqnarray*}
f_{itj}=\left\{
          \begin{array}{ll}
            1, & \hbox{for $t=j=1$;}\\
            \cos(\omega_{itj}), & \hbox{for $j=1$, $t=2,\cdots,n_i$;} \\
            \cos(\omega_{itj})\prod_{r=1}^{j-1}\sin(\omega_{itr}), & \hbox{for $2\leq j<t\leq n_i$;} \\
            \prod_{r=1}^{j-1}\sin(\omega_{itr}), & \hbox{for $t=j;~ j=2,\cdots,n_i$.}
          \end{array}
        \right.
\end{eqnarray*}}
Here $\omega_{itj}$'s $(\in (0,\pi))$ are angle parameters for trigonometric functions, and the angle parameters are referred to hypersphere (HS) parameters. 

As in \citet{Zhang:etal:2015}, we consider the modeling of the angles $\omega_{itj}$'s instead of the direct modeling of the correlation matrix, and the modeling can be directly interpreted for the correlation \citep{Zhang:etal:2015}. In order to obtain the unconstrained estimation of $\omega_{itj}$ and to reduce the number of parameters for facilitating the computation, we model the correlation structures of the responses in terms of the generalized linear models which are given by:
\begin{eqnarray}
&&\log\left(\frac{\omega_{itj}}{\pi-\omega_{itj}}\right)=u_{itj}^T\delta,\label{GARP-IV-1}
\end{eqnarray}
where $\delta$ is $a \times 1$ vector of unknown parameter vector to model the HS parameters. Importantly, the proposed method reduces the model complexity and obtain fast-to-execute models without loss of accuracy. In addition, note that the design vector $u_{itj}$ in (\ref{GARP-IV-1}) is used to model the HS parameters as functions of subject-specific covariates \citep{Zhang:etal:2015, Pan:Mackenzie:2015}. As a result, the design vector is specified in a manners similar to those used in heteroscedastic regression models. For example, time lag,  $|t - j|$, in the design vector $u_{itj}$ specifies higher lag models. We will introduce the priors of parameters in the model in Section~\ref{Section:Bayesian Methods}.

\subsection{Generalized Autoregressive and Moving-Averaging Model}
In order to give the complete specification of the joint distribution, the latent random vectors $\bm{y}_{i}^*=(y_{i1}^*,\ldots,y_{in_i}^*)^T$ are jointly normally distributed given by:
\begin{equation}
\begin{aligned}
y_{i1}^*&= x_{i1}^T\beta + \epsilon_{i1}, \\
y_{it}^*&=x_{it}^T\beta+z_{it}^Tb_i+\sum_{j=1}^{u-1}\phi_{ij}(y_{i,t-j}^* - x_{i,t-j}^T\beta)+ \sum_{s=1}^{v-1} \psi_{i,t-s}\epsilon_{i,t-s}+\epsilon_{it}, t=1, \ldots, n_i, \label{Eq:StatModel}
\end{aligned}
\end{equation}
where $\phi_{ij}$'s are generalized autoregressive parameters (GARPs) and $\psi_{is}$'s are generalized moving-average parameters (GMAPs). In addition, $\epsilon_{it}$'s are prediction error and are assumed as
\[
\bm{\epsilon}_i=(\epsilon_{i1},\ldots,\epsilon_{in_i})^T \stackrel{indep.}{\sim} N(0,I_i),
\]
where $I_i$ is an $n_i\times n_i$ identity matrix. We can rewrite (\ref{Eq:StatModel}) in matrix form as
\begin{equation*}
\Phi_i (\bm{y}_i^*-X_i\beta) = Z_i b_i +  \Psi_i \bm{\epsilon}_i, 
\end{equation*}
where $X_i$, $n_i\times p$, $Z_i$, $n_i\times q$, $\Phi_i$, $n_i\times n_i$, $\Psi_i$, $n_i\times n_i$,  are matrices and defined as follows, respectively, 
\begin{eqnarray*}
X_i=\left(
      \begin{array}{c}
        x_{i1}^T \\
        \vdots \\
        x_{in_i}^T \\
      \end{array}
    \right),\quad
Z_i=\left(
      \begin{array}{c}
        z_{i1}^T \\
        \vdots \\
        z_{in_i}^T \\
      \end{array}
    \right) 
\end{eqnarray*}
\begin{eqnarray*}
\Phi_i =\left(
      \begin{array}{cccccc}
        1 & 0 & 0 & \ldots & 0&0\\
        -\phi_{i1} & 1 & 0 & \ldots &0&0\\
        -\phi_{i2} & -\phi_{i1} & 1 & \ldots & 0&0 \\
        \vdots & \vdots & \vdots &  \ddots & \vdots &\vdots \\
        0& \ldots & -\phi_{i,u-2} & \ldots & 1 &0 \\
        0 & \ldots &-\phi_{i,u-1} & \ldots &  -\phi_{i1}  & 1
      \end{array}
    \right), \quad 
\Psi_i =\left(
      \begin{array}{cccccc}
        1 & 0 & 0 & \ldots & 0&0\\
        \psi_{i1} & 1 & 0 & \ldots &0&0\\
        \psi_{i2} & \psi_{i1} & 1 & \ldots & 0&0 \\
        \vdots & \vdots & \vdots &  \ddots & \vdots &\vdots \\
        0& \ldots & \psi_{i,v-2} & \ldots & 1 &0 \\
        0 & \ldots &\psi_{i,v-1} & \ldots &  \psi_{i1}  & 1
      \end{array}
    \right)
\end{eqnarray*}
Note that $\Phi_i$ and $\Psi_i$ uniquely exist and are respectively called the generalized autoregressive parameter matrix (GARPM) and moving-average parameter matrix (GMAPM). 

The density of the latent variable $\bm{y}^*$ conditional on the random effect $b=(b_1, \ldots, b_q)$ is given by
\[
p(\bm{y}^*|\bm{b}, \theta) = \prod_{i=1}^N\prod_{t=1}^{n_i} f(y^*_{it}; \mu_{it}, I_i), 
\]
where $\theta = (\beta, \nu, \Sigma, \phi, \psi)$ denote the collection of model parameters, $\mu_{it} = x_{it}^T\beta+z_{it}^Tb_i$ and $f(\cdot)$ is the multivariate normal density function. 


\subsection{Bayesian Methods}\label{Section:Bayesian Methods}
The density of the latent variable $\bm{y}^*$ conditional on the random effect $b=(b_1, \ldots, b_q)$ is given by
\[
p(\bm{y}^*|\bm{b}, \theta) = \prod_{i=1}^N\prod_{t=1}^{n_i} f(y^*_{it}; \mu_{it}, I_i), 
\]
where $\theta $ denote the collection of model parameters, $\mu_{it} = x_{it}^T\beta+z_{it}^Tb_i$ and $f(\cdot)$ is the multivariate normal density function. 

To complete the Bayesian specification of the model, we use proper prior distributions instead of noninformative priors, in order to guarantee the propriety of posterior distribution. The prior distributions for $\beta$, $\Sigma$, and $\delta$ in the model for binary outcome are given by:
\begin{eqnarray*}
&&\beta \sim N_p(0,\sigma_{\beta}^2 \mathrm{I}), \\
&&\Sigma \sim \mathcal{IW}(\nu_b, \Lambda^{-1}), 
\end{eqnarray*}
where $\sigma_{\beta}^2$ and $\sigma_{\delta}^2$ are large to be noninformative \citep{Daniels:Zhao:2003}, $\mathrm{I}$ is the identity matrix corresponding to the dimension of the parameter, and $\Lambda$ is the positive definite scale matrix. Here $N_m(\mu,\Omega)$ denotes the $m$-variate multivariate normal distribution with a mean vector $\mu$ and a covariance matrix $\Omega$, and $\mathcal{IW}(\nu,\Lambda^{-1})$ denotes the inverse Wishart distribtion with degrees of freedom $\nu$ and a symmetric positive definite $q\times q$ scale matrix.

The prior of the parameters in correlation matrix for two different correlation structures are 
\begin{description}
\item [MCD] In the the case of modified Cholesky decomposition with hypersphere decomposition,  we assume $\delta \sim N_a(0, \sigma_{\delta}^2\mathrm{I})$. 
\item [ARMA] In ARMA correlation structure, the non-informative priors is assumed for temporal parameters in GARPM $\phi$'s and GAMPM $\psi$'s with constraints on them to ensure the stationary. 
\end{description}

Furthermore, in the ordinal outcome a prior for $\alpha$ is provided 
\[
\alpha \sim N_{K-1}(0,\sigma_{\alpha}^2I)\mathbf{I}_{(-\infty<\alpha_1<\cdots <\alpha_{K-1}<\infty)}, 
\]
where $\sigma_{\alpha}^2$ is prespecified. 

\section{Implementation}
%------------------------------------------------------------
The aim of this section is to provide a detailed step-by-step \proglang{R} in simulation studies to highlight the most important features of package \pkg{BayesRGMM}, and to show how to extract the most important results.  This section can also be considered as a user manual which allows the readers to run their own similar analyses involving only a mild modification of the example code.  

The \pkg{BayesRGMM} package contains four core functions. The main function \code{BayesRobustProbit} carries out the entire MCMC procedure, and outputs the posterior samples and estimates for model parameters along with several useful estimated information criterion statistics. Internally, most of the calculation is provided by a compiled \proglang{C++} code to reduce the computational time.User-friendly summary function \code{BayesRobustProbitSummary} that summarizes model estimation outcomes is equipped with \code{BayesRobustProbit} and \code{BayesCumulativeProbitHSD}. It provides basic posterior summary statistics such as the posterior point and confidence interval estimates of parameters and the values of  information criterion statistics for model comparison. The function \code{SimulatedDataGenerator} and \code{SimulatedDataGenerator.CumulativeProbit} are used to generate simulated binary and ordinal data, respectively, for simulation studies. \code{CorrMat.HSD} is applied to calculate the correlation matrix in MCD model structure.  In this section, we focus primarily on introducing the those functions, and demonstrate their usage with numerical experiments.

\subsection{Simulation Studies}
In the simulation, we demonstrate the use of functions in the \pkg{BayesRGMM} package.We consider a simple random intercept model $q=1$ with the regression coefficient vector of size $p=4$ given by
\[
\beta = (-0.2,-0.3, 0.8, -0.4)', 
\]
where $x_{it}$'s are independently generated from $N(0, 1)$. In addition, $v_i$ is independently simulated from $\Gamma(3, 3)$, $b_i$ is from $N(0, v_i \times 0.5)$, and $z_{it}=1$ for $i=1, \ldots, n$. That is, $b$'s correspond to a Student's $t$-distribution with the degrees of freedom equal to $6$.  We then generate the responses based on~\eqref{Eq:Probit-1}.


In addition to the different correlation structures, we also consider a data that is missing completely at random (MCAR). We set the missing machines as follows
\begin{align*}
\eta_{it} &= -1.5 \times y_{t-1, i} + 1.2\times y_{t-2, i}; 
\end{align*}
Then the missing probability depends on $\eta_{it}$'s defined as
\[
p^{\mbox{miss}}_{it} = \frac{e^{\eta_{it}}}{1+e^{\eta_{it}}}. 
\]
The data for subject $i$ at time point $t$ is missing according to three observed responses for the subject.

\subsection{Simulation 1: MCD Correlation Structure}
The correlation matrix $R_i$ is created based on the given values
\begin{align}\label{Eq:Order_in_HS}
\delta = (-0.5, -0.3)' \quad \mbox{and} \quad u_{itj} = \left(\mathbf{I}\left\{|t-s|=1\right\}, \mathbf{I}\left\{|t-s|=2\right\}\right)'
\end{align}
<<Simulation 1： MCD Correlation Structure, eval=TRUE>>=
# Simulation study for MCD correlation structure
library(BayesRGMM)
rm(list=ls(all=TRUE))

Fixed.Effs = c(-0.2, -0.3, 0.8, -0.4) 
P = length(Fixed.Effs)
q = 1
T = 5
N = 100
num.of.iter = 100

HSD.para = c(-0.5,  -0.3)
a = length(HSD.para)
w = array(runif(T*T*a), c(T, T, a))

for(time.diff in 1:a)
	w[, , time.diff]=1*(as.matrix(dist(1:T, 1:T, method="manhattan"))
	                    ==time.diff)

HSD.sim.data = SimulatedDataGenerator(Num.of.Obs = N, Num.of.TimePoints = T, 
      Fixed.Effs = Fixed.Effs, Random.Effs = list(Sigma = 0.5*diag(1), df=3), 
      Cor.in.DesignMat = 0., Missing = list(Missing.Mechanism = 2, 
      RegCoefs = c(-1.5, 1.2)), Cor.Str = "HSD", 
      HSD.DesignMat.para = list(HSD.para = HSD.para, DesignMat = w))

hyper.params = list(
        sigma2.beta = 1,
        sigma2.delta = 1,
        v.gamma = 5,
        InvWishart.df = 5,
        InvWishart.Lambda = diag(q) )

HSD.output = BayesRobustProbit(fixed = as.formula(paste("y~-1+", 
             paste0("x", 1:P, collapse="+"))), data=HSD.sim.data$sim.data, 
             random = ~ 1, HS.model = ~IndTime1+IndTime2, Robustness=TRUE, subset = NULL, 
             na.action='na.exclude', hyper.params = hyper.params, 
             num.of.iter = num.of.iter, Interactive = FALSE)

original = options(digits = 4)
Model.Estimation = BayesRobustProbitSummary(HSD.output)

cat("\nCoefficients:\n")
print(Model.Estimation$beta.est.CI)

cat("\nParameters in HSD model:\n")
print(Model.Estimation$delta.est.CI)

cat("\nRandom effect: \n")
print(Model.Estimation$random.cov)

cat("\nModel Information:\n")
print(Model.Estimation$model.info)

cat("\nEstimate of Ri: \n")
print(Model.Estimation$Ri, quote = FALSE)

options(original)

@

\subsection{Simulation 2: ARMA Correlation Structure}
To model the serial dependence for the repeated measurement, we consider an ARMA(1, 1) correlation structure with 
\[
\phi = 0.4, \qquad \mbox{and}\qquad \psi = 0.2. 
\]

<<Simulation 2： ARMA Correlation Structure, eval=TRUE>>=
library(BayesRGMM)
rm(list=ls(all=TRUE))


Fixed.Effs = c(-0.2,-0.8, 1.0, -1.2)
P = length(Fixed.Effs)
q = 1
T = 10
N = 100
num.of.iter = 100

ARMA.sim.data = SimulatedDataGenerator(Num.of.Obs = N, Num.of.TimePoints = T, 
  Fixed.Effs = Fixed.Effs, Random.Effs = list(Sigma = 0.5*diag(1), df=3), 
  Cor.in.DesignMat = 0., list(Missing.Mechanism = 2, RegCoefs = c(-1.5, 1.2)), 
  Cor.Str = "ARMA", ARMA.para=list(AR.para = 0.4, MA.para=0.2))

ARMA.output = BayesRobustProbit(fixed = as.formula(paste("y~-1+", 
  paste0("x", 1:P, collapse="+"))), data=ARMA.sim.data$sim.data, random = ~ 1, 
  Robustness=TRUE, subset = NULL, na.action='na.exclude', arma.order = c(1, 1), 
  num.of.iter = num.of.iter, Interactive = FALSE)

original = options(digits = 4)

Model.Estimation = BayesRobustProbitSummary(ARMA.output)

cat("\nCoefficients:\n")
print(Model.Estimation$beta.est.CI)

cat("\nAMRA parameters:\n\n")
print(Model.Estimation$arma.est)

cat("\nRandom effect: \n")
print(Model.Estimation$random.cov)

cat("\nModel Information:\n")
print(Model.Estimation$model.info)

options(original)
@
\subsection{Ordinal Outcome}
We consider a simple random intercept model ($q=1$). For $k = 1, 2, 3$  and $t=1,\ldots,n_i$, the model is given by:
\begin{align}
&y_{it}^*=\beta_{1}Time_{it}+\beta_{2}Group_{i}+\beta_{3}Time_{it}\times Group_{i}+b_{i0}+\epsilon_{it},\label{sim-1}\\
&b_{i0}\sim N(0,\sigma_b^2),\label{sim-2}\\
&\epsilon_{i}=(\epsilon_{i1},\ldots,\epsilon_{in_i})^T\sim N(0,R_i),\label{sim-3}    
\end{align}
where $Time_{it}\sim N(0,1)$ and $Group_{i}$ equals 0 or 1 with approximately the same sample size for each group.
The true parameters in the simulations are as below:
\begin{eqnarray*}
&&(\beta_{01},\beta_{02})=(-0.5,0.5);~~(\beta_1,\beta_2,\beta_3)=(-0.1,0.1,-0.1);~~ \sigma_b^2=0.2.
\end{eqnarray*}
The model for correlation matrix $R_i$ is given by
\begin{eqnarray}
\log\left(\frac{\omega_{itj}}{\pi-\omega_{itj}}\right)=\delta_1 1_{(|t-j|=1)}+\delta_2 1_{(|t-j|=2)},
\end{eqnarray}
where $(\delta_1,\delta_2)=(-0.9,-0.6)$.

We consider a data that is missing completely at random (MCAR) with a machine defined by
\begin{align*}
\eta_{it} &= -0.7\times y_{t-1, i} -0.2\times y_{t-2, i}-0.1\times y_{t-3, i}; 
\end{align*}
Then the missing probability depends on $\eta_{it}$'s defined as
\[
p^{\mbox{miss}}_{it} = \frac{e^{\eta_{it}}}{1+e^{\eta_{it}}}. 
\]
The data for subject $i$ at time point $t$ is missing according to three observed responses for the subject.

<<Simulation: Ordinal outcome, eval=TRUE>>=
library(BayesRGMM)
rm(list=ls(all=TRUE))

Fixed.Effs = c(-0.1, 0.1, -0.1) #c(-0.8, -0.3, 1.8, -0.4) #c(-0.2,-0.8, 1.0, -1.2)
P = length(Fixed.Effs) 
q = 1 #number of random effects
T = 7 #time points
N = 100 #number of subjects
Num.of.Cats = 3 #in KBLEE simulation studies, please fix it. 
num.of.iter = 1000 #number of iterations

HSD.para = c(-0.9, -0.6) #the parameters in HSD model
a = length(HSD.para)
w = array(runif(T*T*a), c(T, T, a)) #design matrix in HSD model
 
for(time.diff in 1:a)
w[, , time.diff] = 1*(as.matrix(dist(1:T, 1:T, method="manhattan")) ==time.diff)


x = array(0, c(T, P, N))
for(i in 1:N){
    #x[,, i] = t(rmvnorm(P, rep(0, T), AR1.cor(T, Cor.in.DesignMat)))
    x[, 1, i] = 1:T
    x[, 2, i] = rbinom(1, 1, 0.5)
    x[, 3, i] = x[, 1, i]*x[, 2, i]
}

DesignMat = x

#Generate a data with HSD model


#MAR
CPREM.sim.data = SimulatedDataGenerator.CumulativeProbit(Num.of.Obs = N, 
    Num.of.TimePoints = T, Num.of.Cats = Num.of.Cats, Fixed.Effs = Fixed.Effs, 
    Random.Effs = list(Sigma = 0.5*diag(1), df=3), DesignMat = DesignMat, 
    Missing = list(Missing.Mechanism = 2, MissingRegCoefs=c(-0.7, -0.2, -0.1)), 
    HSD.DesignMat.para = list(HSD.para = HSD.para, DesignMat = w))


print(table(CPREM.sim.data$sim.data$y))
print(CPREM.sim.data$classes)


BCP.output = BayesRobustProbit(fixed = as.formula(paste("y~", paste0("x", 1:P, collapse="+"))), 
    data=CPREM.sim.data$sim.data, random = ~ 1, Robustness = TRUE, 
    subset = NULL, na.action='na.exclude', HS.model = ~IndTime1+IndTime2, 
    hyper.params=NULL, num.of.iter=num.of.iter, Interactive = FALSE)


BCP.Est.output = BayesRobustProbitSummary(BCP.output)
BCP.Est.output
@
\bibliography{BayesRGMM}

\end{document}