---
title: "Posterior Predictive Inference"
output:
  rmarkdown::html_vignette:
    mathjax: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"
vignette: >
  %\VignetteIndexEntry{Posterior Predictive Inference}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
header-includes:
  - \def\T{{ \scriptstyle \top }}
  - \newcommand{\thetasp}{{\theta_{\text{sp}}}}
  - \newcommand{\GP}{\mathrm{GP}}
  - \newcommand{\N}{\mathcal{N}}
  - \newcommand{\EF}{\mathrm{EF}}
  - \newcommand{\Norm}{\mathrm{N}}
  - \newcommand{\GCMc}{\mathrm{GCM}_c}
  - \newcommand{\calL}{\mathcal{L}}
  - \newcommand{\IG}{\mathrm{IG}}
  - \newcommand{\IW}{\mathrm{IW}}
  - \newcommand{\given}{\mid}
---

In this article, we discuss the function -

- `posteriorPredict()`

This function can be used to obtain posterior predictive inference at unobserved locations in space or time. It is applied on the output of functions `spLMexact()`, `spLMstack()`, `spGLMexact()`, `spGLMstack()`, `stvcGLMexact()`, `stvcGLMstack()` etc.

```{r}
library(spStack)
library(ggplot2)
set.seed(1729)
```

The `joint` argument in `posteriorPredict()` indicates if the predictions at the new locations or times are to be made based on the joint posterior predictive distribution or not. If `joint=FALSE`, then the individual predictions are made from their corresponding posterior predictive distributions.

## Prediction in spatial linear model

Define the collection of candidate parameters and fit the model using `spLMstack()`.

```{r}
# training and test data sizes
n_train <- 150
n_pred <- 50

data("simGaussian")
dat_train <- simGaussian[1:n_train, ]
dat_pred <- simGaussian[n_train + 1:n_pred, ]

mod1 <- spLMstack(y ~ x1, data = dat_train,
                  coords = as.matrix(dat_train[, c("s1", "s2")]),
                  cor.fn = "matern",
                  params.list = list(phi = c(1.5, 3, 5),
                                     nu = c(0.75, 1.25),
                                     noise_sp_ratio = c(0.5, 1, 2)),
                  n.samples = 1000, loopd.method = "psis",
                  parallel = FALSE, solver = "ECOS", verbose = TRUE)
```

Define the new coordinates, run `posteriorPredict()`, and finally sample from the *stacked posterior*.

```{r}
sp_pred <- as.matrix(dat_pred[, c("s1", "s2")])
X_new <- as.matrix(cbind(rep(1, n_pred), dat_pred$x1))
mod.pred <- posteriorPredict(mod1, coords_new = sp_pred, covars_new = X_new, joint = TRUE)
post_samps <- stackedSampler(mod.pred)
```

Finally, we analyze the posterior predictive distributions of the spatial process as well as the responses against their corresponding true values in order to assess how well the predictions are made.

```{r fig.align='center', fig.height=3.5, fig.width=7}
postpred_z <- post_samps$z.pred
post_z_summ <- t(apply(postpred_z, 1, function(x) quantile(x, c(0.025, 0.5, 0.975))))
z_combn <- data.frame(z = dat_pred$z_true, zL = post_z_summ[, 1],
                      zM = post_z_summ[, 2], zU = post_z_summ[, 3])
plot_z_summ <- ggplot(data = z_combn, aes(x = z)) +
  geom_errorbar(aes(ymin = zL, ymax = zU), alpha = 0.5, color = "skyblue") +
  geom_point(aes(y = zM), size = 0.5, color = "darkblue", alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid") +
  xlab("True z1") + ylab("Posterior of z1") + theme_bw() +
  theme(panel.grid = element_blank(), aspect.ratio = 1)

postpred_y <- post_samps$y.pred
post_y_summ <- t(apply(postpred_y, 1, function(x) quantile(x, c(0.025, 0.5, 0.975))))
y_combn <- data.frame(y = dat_pred$y, yL = post_y_summ[, 1],
                      yM = post_y_summ[, 2], yU = post_y_summ[, 3])

plot_y_summ <- ggplot(data = y_combn, aes(x = y)) +
  geom_errorbar(aes(ymin = yL, ymax = yU), alpha = 0.5, color = "skyblue") +
  geom_point(aes(y = yM), size = 0.5, color = "darkblue", alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid") +
  xlab("True y") + ylab("Posterior of y") + theme_bw() +
  theme(panel.grid = element_blank(), aspect.ratio = 1)

ggpubr::ggarrange(plot_z_summ, plot_y_summ)
```


## Prediction in spatial generalized linear model

Define the collection of candidate parameters and fit the model using `spGLMstack()`. We use spatial Poisson count data `simPoisson` for this example.

```{r}
# training and test data sizes
n_train <- 150
n_pred <- 50

# load spatial Poisson data
data("simPoisson")
dat_train <- simPoisson[1:n_train, ]
dat_pred <- simPoisson[n_train + 1:n_pred, ]

mod1 <- spGLMstack(y ~ x1, data = dat_train, family = "poisson",
                   coords = as.matrix(dat_train[, c("s1", "s2")]), cor.fn = "matern",
                   params.list = list(phi = c(3, 4, 5), nu = c(0.5, 1.0),
                                      boundary = c(0.5)),
                   priors = list(nu.beta = 5, nu.z = 5),
                   n.samples = 1000,
                   loopd.controls = list(method = "CV", CV.K = 10, nMC = 500),
                   verbose = TRUE)
```

Define the new coordinates, run `posteriorPredict()`, and finally sample from the *stacked posterior*. To demonstrate the usage, we specify `joint=FALSE` for the prediction task.

```{r}
sp_pred <- as.matrix(dat_pred[, c("s1", "s2")])
X_new <- as.matrix(cbind(rep(1, n_pred), dat_pred$x1))
mod.pred <- posteriorPredict(mod1, coords_new = sp_pred, covars_new = X_new, joint = FALSE)
post_samps <- stackedSampler(mod.pred)
```

Finally, we analyze the posterior predictive distributions of the spatial process as well as the responses against their corresponding true values in order to assess how well the predictions are made.

```{r fig.align='center', fig.height=3.5, fig.width=7}
postpred_z <- post_samps$z.pred
post_z_summ <- t(apply(postpred_z, 1, function(x) quantile(x, c(0.025, 0.5, 0.975))))
z_combn <- data.frame(z = dat_pred$z_true, zL = post_z_summ[, 1],
                      zM = post_z_summ[, 2], zU = post_z_summ[, 3])

plot_z_summ <- ggplot(data = z_combn, aes(x = z)) +
  geom_errorbar(aes(ymin = zL, ymax = zU), alpha = 0.5, color = "skyblue") +
  geom_point(aes(y = zM), size = 0.5, color = "darkblue", alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid") +
  xlab("True z") + ylab("Posterior predictive of z") + theme_bw() +
  theme(panel.grid = element_blank(), aspect.ratio = 1)

postpred_y <- post_samps$y.pred
post_y_summ <- t(apply(postpred_y, 1, function(x) quantile(x, c(0.025, 0.5, 0.975))))
y_combn <- data.frame(y = dat_pred$y, yL = post_y_summ[, 1],
                      yM = post_y_summ[, 2], yU = post_y_summ[, 3])

plot_y_summ <- ggplot(data = y_combn, aes(x = y)) +
  geom_errorbar(aes(ymin = yL, ymax = yU), alpha = 0.5, color = "skyblue") +
  geom_point(aes(y = yM), size = 0.5, color = "darkblue", alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid") +
  xlab("True y") + ylab("Posterior predictive of y") + theme_bw() +
  theme(panel.grid = element_blank(), aspect.ratio = 1)

ggpubr::ggarrange(plot_z_summ, plot_y_summ)
```


## Prediction in spatially-temporally varying coefficients model

Define the collection of candidate parameters and fit the model using `stvcGLMstack()`. We use spatial Poisson count data `sim_stvcPoisson` for this example.

```{r}
# Example 2: Spatial-temporal model with varying coefficients
n_train <- 150
n_pred <- 50
data("sim_stvcPoisson")
dat <- sim_stvcPoisson[1:(n_train + n_pred), ]

# split dataset into test and train
dat_train <- dat[1:n_train, ]
dat_pred <- dat[n_train + 1:n_pred, ]

# create list of candidate models (multivariate)
mod.list2 <- candidateModels(list(phi_s = list(1, 2, 3),
                                  phi_t = list(1, 2, 4),
                                  boundary = c(0.5, 0.75)), "cartesian")

# fit a spatial-temporal varying coefficient model using predictive stacking
mod1 <- stvcGLMstack(y ~ x1 + (x1), data = dat_train, family = "poisson",
                     sp_coords = as.matrix(dat_train[, c("s1", "s2")]),
                     time_coords = as.matrix(dat_train[, "t_coords"]),
                     cor.fn = "gneiting-decay",
                     process.type = "multivariate",
                     candidate.models = mod.list2,
                     loopd.controls = list(method = "CV", CV.K = 10, nMC = 500),
                     n.samples = 500)
```

Define the new coordinates, run `posteriorPredict()`, and finally sample from the *stacked posterior*. We use `joint=FALSE` for this particular example.

```{r}
# prepare new coordinates and covariates for prediction
sp_pred <- as.matrix(dat_pred[, c("s1", "s2")])
tm_pred <- as.matrix(dat_pred[, "t_coords"])
X_new <- as.matrix(cbind(rep(1, n_pred), dat_pred$x1))
mod_pred <- posteriorPredict(mod1,
                             coords_new = list(sp = sp_pred, time = tm_pred),
                             covars_new = list(fixed = X_new, vc = X_new),
                             joint = FALSE)

# sample from the stacked posterior and posterior predictive distribution
post_samps <- stackedSampler(mod_pred)
```

Finally, we analyze the posterior predictive distributions of the spatial-temporal process by plotting them against their corresponding true values.

```{r fig.align='center', fig.height=3.5, fig.width=7}
postpred_z <- post_samps$z.pred
post_z1_summ <- t(apply(postpred_z[1:n_pred,], 1,
                        function(x) quantile(x, c(0.025, 0.5, 0.975))))
post_z2_summ <- t(apply(postpred_z[n_pred + 1:n_pred,], 1,
                        function(x) quantile(x, c(0.025, 0.5, 0.975))))

z1_combn <- data.frame(z = dat_pred$z1_true, zL = post_z1_summ[, 1],
                       zM = post_z1_summ[, 2], zU = post_z1_summ[, 3])
z2_combn <- data.frame(z = dat_pred$z2_true, zL = post_z2_summ[, 1],
                       zM = post_z2_summ[, 2], zU = post_z2_summ[, 3])

library(ggplot2)
plot_z1_summ <- ggplot(data = z1_combn, aes(x = z)) +
  geom_errorbar(aes(ymin = zL, ymax = zU), alpha = 0.5, color = "skyblue") +
  geom_point(aes(y = zM), size = 0.5, color = "darkblue", alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid") +
  xlab("True z1") + ylab("Posterior predictive of z1") + theme_bw() +
  theme(panel.grid = element_blank(), aspect.ratio = 1)

plot_z2_summ <- ggplot(data = z2_combn, aes(x = z)) +
  geom_errorbar(aes(ymin = zL, ymax = zU), alpha = 0.5, color = "skyblue") +
  geom_point(aes(y = zM), size = 0.5, color = "darkblue", alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "solid") +
  xlab("True z2") + ylab("Posterior predictive of z2") + theme_bw() +
  theme(panel.grid = element_blank(), aspect.ratio = 1)

ggpubr::ggarrange(plot_z1_summ, plot_z2_summ)
```