DataFusionGDM provides tools to simulate genetic distance matrices (GDM), compare/alignment of distance spaces via MDS and Procrustes, and evaluate imputation under structured missingness (BESMI).
# Install from GitHub
if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")
remotes::install_github("jiashuaiz/DataFusion-GDM")library(DataFusionGDM)
res <- run_genetic_scenario("default", n_pops = 30, seed = 2025)
#>
#> Raw distances before transformation :
#> Dimensions: 30 30
#> Is symmetric: TRUE
#> Range of distances: 0 30.34014
#> Mean distance: 15.88792
#> Diagonal values - min: 0 max: 0
#> Quantiles: 0%: 0 25%: 7.938344 50%: 16.54102 75%: 23.1751 100%: 30.34014
#>
#> Final genetic distances after transformation :
#> Dimensions: 30 30
#> Is symmetric: TRUE
#> Range of distances: 0 1
#> Mean distance: 0.7895569
#> Diagonal values - min: 0 max: 0
#> Quantiles: 0%: 0 25%: 0.7268217 50%: 0.8404666 75%: 0.9433305 100%: 1
# Display MDS plot (heatmap requires ComplexHeatmap, not shown here)
res$plots$mds()# create two related matrices from the simulated GDM
G <- res$results$distance_matrix
A <- G + matrix(rnorm(length(G), 0, 0.02), nrow = nrow(G)); diag(A) <- 0
B <- G + matrix(rnorm(length(G), 0.03, 0.02), nrow = nrow(G)); diag(B) <- 0
mds <- perform_mds(A, B)
Yt <- apply_procrustes(mds$X, mds$Y, mds$Y)
B_cal <- coords_to_distances(Yt)
mean((A - B)^2)
#> [1] 0.001681216
mean((A - B_cal)^2)
#> [1] 0.01596116# Prepare a masked dataset in-memory
mask <- matrix(FALSE, nrow = nrow(G), ncol = ncol(G))
sel <- seq_len(min(5, nrow(G)))
mask[sel, sel] <- TRUE
M_input <- G; M_input[mask] <- NA
# Impute
impt <- besmi_iterative_imputation(M_input, M_mask = mask, M_real = G, max_iterations = 3)
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Number of logged events: 10
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Number of logged events: 15
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
#> fold
#> Warning: Number of logged events: 20
str(impt$metrics)
#> 'data.frame': 3 obs. of 9 variables:
#> $ k : logi NA NA NA
#> $ bs : logi NA NA NA
#> $ iteration : num 1 2 3
#> $ imputation_dis : num 0.316 0.333 0.317
#> $ propagation_dis: num 0.0668 0.0619 0.0598
#> $ runtime : num NA NA NA
#> $ improvement_pct: num NA NA NA
#> $ converged : logi FALSE FALSE FALSE
#> $ averaged : logi FALSE TRUE TRUESee inst/examples for fuller pipelines that write results to disk in a project context.