--- title: 'Low Dimensional Example of MultiRFM' author: "Wei Liu" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Low Dimensional Example of MultiRFM} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette introduces the usage of MultiRFM for the analysis of low-dimensional multi-study multivariate data with heavy tail, by comparison with other methods. The package can be loaded with the command, and define some metric functions: ```{r eval = FALSE} library(MultiRFM) trace_statistic_fun <- function(H, H0){ tr_fun <- function(x) sum(diag(x)) mat1 <- t(H0) %*% H %*% qr.solve(t(H) %*% H) %*% t(H) %*% H0 tr_fun(mat1) / tr_fun(t(H0) %*% H0) } trace_list_fun <- function(Hlist, H0list){ trvec <- rep(NA, length(Hlist)) for(i in seq_along(trvec)){ trvec[i] <- trace_statistic_fun(Hlist[[i]], H0list[[i]]) } return(mean(trvec)) } ``` ## Generate the simulated data First, we generate the simulated data with heavy tail, where the error term follows from a multivariate t-distribution with degree of freedom 3. ```{r eval = FALSE} nu <- 3 # nu is set to p <- 100 nvec <- c(150,200); q <- 3;qs <- c(2,2); S <- length(nvec) sigma2_eps <- 1 datList <- gendata_simu_multi(seed=1, nvec=nvec, p=p, q=q, qs=qs, rho=c(5,5), err.type='mvt', sigma2_eps = sigma2_eps, nu=nu) ``` Fit the MultiRFM model using the function `MultiRFM()` in the R package `MultiRFM`. Users can use `?MultiRFM` to see the details about this function. For two matrices $\widehat D$ and $D$, we use trace statistic to measure their similarity. The trace statistic ranges from 0 to 1, with higher values indicating better performance. ```{r eval = FALSE} methodNames <- c("MultiRFM", "MSFA-CAVI", "MSFA-SVI") metricMat <- matrix(NA, nrow=length(methodNames), ncol=5) colnames(metricMat) <- c('A_tr', 'B_tr', 'F_tr', 'H_tr', 'Time') row.names(metricMat) <- methodNames XList <- datList$Xlist; res <- MultiRFM(XList, q=q, qs=qs) #str(res) metricMat["MultiRFM",'Time'] <- res$time_use metricMat["MultiRFM",'A_tr'] <- trace_statistic_fun(res$A, datList$A0) metricMat["MultiRFM",'B_tr'] <- trace_list_fun(res$B, datList$Blist0) metricMat["MultiRFM",'F_tr'] <- trace_list_fun(res$F, datList$Flist) metricMat["MultiRFM",'H_tr'] <- trace_list_fun(res$H, datList$Hlist) ``` ## Compare with other methods We compare MultiRFM with two prominent methods: MSFA-CAVI and MSFA-SVI First, we implement MSFA-CAVI: ```{r eval = FALSE} X_s <- lapply(XList, scale, scale=FALSE) hmu <- sapply(XList, colMeans) library(VIMSFA) ### MSFA-CAVI print("MSFA-CAVI") tic <- proc.time() cavi_est <- cavi_msfa(X_s, K=q, J_s=qs) toc <- proc.time() time_cavi <- toc[3] - tic[3] hLam <- Reduce(cbind, cavi_est$mean_psi_s) hF_cavi <- hH_cavi <- list() for(s in 1:S){ # s <- 1 hF_cavi[[s]] <- t(Reduce(cbind, cavi_est$mean_f[[s]])) hH_cavi[[s]] <- t(Reduce(cbind, cavi_est$mean_l[[s]])) } metricMat["MSFA-CAVI",'Time'] <- time_cavi metricMat["MSFA-CAVI",'A_tr'] <- trace_statistic_fun(cavi_est$mean_phi, datList$A0) metricMat["MSFA-CAVI",'B_tr'] <- trace_list_fun(cavi_est$mean_lambda_s, datList$Blist0) metricMat["MSFA-CAVI",'F_tr'] <- trace_list_fun(hF_cavi, datList$Flist) metricMat["MSFA-CAVI",'H_tr'] <- trace_list_fun(hH_cavi, datList$Hlist) ``` Next, we implement MSFA-SVI: ```{r eval = FALSE} print("MSFA-SVI") tic <- proc.time() svi_est <- svi_msfa(X_s, K=q, J_s=qs, verbose = 0) toc <- proc.time() time_svi <- toc[3] - tic[3] hLam <- Reduce(cbind, svi_est$mean_psi_s) hF_cavi <- hH_cavi <- list() for(s in 1:S){ # s <- 1 hF_cavi[[s]] <- t(Reduce(cbind, svi_est$mean_f[[s]])) hH_cavi[[s]] <- t(Reduce(cbind, svi_est$mean_l[[s]])) } metricMat["MSFA-SVI",'Time'] <- time_cavi metricMat["MSFA-SVI",'A_tr'] <- trace_statistic_fun(svi_est$mean_phi, datList$A0) metricMat["MSFA-SVI",'B_tr'] <- trace_list_fun(svi_est$mean_lambda_s, datList$Blist0) metricMat["MSFA-SVI",'F_tr'] <- trace_list_fun(hF_cavi, datList$Flist) metricMat["MSFA-SVI",'H_tr'] <- trace_list_fun(hH_cavi, datList$Hlist) ``` ## Visualize the comparison of performance Next, we summarized the metrics for MultiRFM and other compared methods in a data.frame object. ```{r eval = FALSE} dat_metric <- data.frame(metricMat) dat_metric$Method <- factor(row.names(dat_metric), levels=row.names(dat_metric)) ``` Plot the results for MultiRFM and other methods, which suggests that MultiRFM achieves better estimation accuracy for the study-shared loading matrix A, study-specified loading matrix B and factor matrix H. MultiRFM significantly outperforms the compared methods in terms of estimation accuracy of B and H, as well as computational efficiency. ```{r eval = FALSE, fig.width=13, fig.height=5} library(cowplot) library(ggplot2) p1 <- ggplot(data=subset(dat_metric, !is.na(A_tr)), aes(x= Method, y=A_tr, fill=Method)) + geom_bar(stat="identity") + xlab(NULL) + scale_x_discrete(breaks=NULL) + theme_bw(base_size = 16) p2 <- ggplot(data=subset(dat_metric, !is.na(F_tr)), aes(x= Method, y=F_tr, fill=Method)) + geom_bar(stat="identity") + xlab(NULL) + scale_x_discrete(breaks=NULL)+ theme_bw(base_size = 16) p3 <- ggplot(data=subset(dat_metric, !is.na(B_tr)), aes(x= Method, y=B_tr, fill=Method)) + geom_bar(stat="identity") + xlab(NULL) + scale_x_discrete(breaks=NULL) + theme_bw(base_size = 16) p4 <- ggplot(data=subset(dat_metric, !is.na(H_tr)), aes(x= Method, y=H_tr, fill=Method)) + geom_bar(stat="identity") + xlab(NULL) + scale_x_discrete(breaks=NULL)+ theme_bw(base_size = 16) p5 <- ggplot(data=subset(dat_metric, !is.na(Time)), aes(x= Method, y=Time, fill=Method)) + geom_bar(stat="identity") + xlab(NULL) + scale_x_discrete(breaks=NULL)+ theme_bw(base_size = 16) plot_grid(p1,p2,p3, p4, p5, nrow=2, ncol=3) ``` ## Select the parameters We applied the proposed 'CUP' method to select the number of factors. The results showed that the CUP method has the potential to identify the true values. ```{r eval = FALSE} datList <- gendata_simu_multi(seed=1, nvec=nvec, p=p, q=q, qs=qs, rho=c(5,5), err.type='mvt', sigma2_eps = sigma2_eps, nu=3) XList <- datList$Xlist; q_max <- 6; qs_max <- 4 hq.list <- selectFac.MultiRFM(XList, q_max=q_max, qs_max=qs_max, verbose = FALSE) message("hq = ", hq.list$q, " VS true q = ", q) message("hqs.vec = ", paste(hq.list$qs, collapse =", "), " VS true qs.vec = ", paste(qs, collapse =", ")) ```
**Session Info** ```{r} sessionInfo() ```