## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ## ----install, eval=FALSE------------------------------------------------------ # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("MeLSI") ## ----basic_setup-------------------------------------------------------------- # Load required packages library(MeLSI) library(vegan) library(ggplot2) # Generate synthetic microbiome data for demonstration # Using smaller dataset for faster vignette execution set.seed(42) test_data <- generate_test_data(n_samples = 40, n_taxa = 50, n_signal_taxa = 5) X <- test_data$counts y <- test_data$metadata$Group # Display data dimensions cat("Data dimensions:", nrow(X), "samples x", ncol(X), "taxa\n") cat("Groups:", paste(unique(y), collapse = ", "), "\n") ## ----preprocessing------------------------------------------------------------ # CLR transformation (recommended for microbiome data) # Use the built-in helper function for easy CLR transformation X_clr <- clr_transform(X) # Display preprocessing results cat("CLR transformation completed\n") cat("Data range:", round(range(X_clr), 3), "\n") ## ----run_melsi---------------------------------------------------------------- # Run MeLSI with default parameters # Note: For real analyses, use n_perms = 200+ for reliable p-values # Using reduced parameters here for faster vignette execution (<15 min for Bioconductor) cat("Running MeLSI analysis...\n") melsi_results <- melsi( X_clr, y, n_perms = 50, # Number of permutations (use 200+ for real analysis) B = 20, # Number of weak learners (default 30, reduced for speed) m_frac = 0.8, # Fraction of features per learner show_progress = TRUE ) # Display results cat("\nMeLSI Results:\n") cat(sprintf("F-statistic value: %.4f\n", melsi_results$F_observed)) cat(sprintf("P-value: %.4f\n", melsi_results$p_value)) cat(sprintf("Significant: %s\n", ifelse(melsi_results$p_value < 0.05, "Yes", "No"))) # VIP plot is automatically generated, but you can also create it manually: # Using top_n = 10 for faster execution plot_vip(melsi_results, top_n = 10) # PCoA plot using the learned MeLSI distance plot_pcoa(melsi_results, X_clr, y) ## ----comparison--------------------------------------------------------------- # Compare with Euclidean distance dist_euclidean <- dist(X_clr) adonis_euclidean <- adonis2(dist_euclidean ~ y, permutations = 50) # Compare with Bray-Curtis distance dist_bray <- vegdist(X, method = "bray") adonis_bray <- adonis2(dist_bray ~ y, permutations = 50) # Display comparison results cat("\nMethod Comparison:\n") cat(sprintf("MeLSI: F-statistic = %.4f, p = %.4f\n", melsi_results$F_observed, melsi_results$p_value)) cat(sprintf("Euclidean: F-statistic = %.4f, p = %.4f\n", adonis_euclidean[["F"]][1], adonis_euclidean[["Pr(>F)"]][1])) cat(sprintf("Bray-Curtis: F-statistic = %.4f, p = %.4f\n", adonis_bray[["F"]][1], adonis_bray[["Pr(>F)"]][1])) # Calculate improvement improvement <- (melsi_results$F_observed - adonis_euclidean[["F"]][1]) / adonis_euclidean[["F"]][1] * 100 cat(sprintf("\nMeLSI improvement over Euclidean: %.1f%%\n", improvement)) ## ----vip_plot----------------------------------------------------------------- # Plot VIP with directionality (default) # Using top_n = 10 for faster execution plot_vip(melsi_results, top_n = 10, title = "Top 10 Features by Importance") # Plot VIP without directionality coloring plot_vip(melsi_results, top_n = 10, directionality = FALSE) # Extract and examine feature weights programmatically feature_weights <- melsi_results$feature_weights top_features <- head(sort(feature_weights, decreasing = TRUE), 10) cat("\nTop 10 Most Weighted Features:\n") for (i in 1:length(top_features)) { cat(sprintf("%2d. %s (weight: %.4f)\n", i, names(top_features)[i], top_features[i])) } ## ----pcoa_plot---------------------------------------------------------------- # Create PCoA plot using the learned MeLSI distance matrix plot_pcoa(melsi_results, X_clr, y, title = "PCoA: MeLSI Distance") ## ----advanced_usage, eval=FALSE----------------------------------------------- # # Run MeLSI with custom parameters # # Note: This example is set to eval=FALSE for faster vignette execution # # Uncomment and run locally for full demonstration # cat("Running MeLSI with custom parameters...\n") # custom_results <- melsi( # X_clr, y, # n_perms = 200, # More permutations for higher precision (recommended: 200+) # B = 50, # Larger ensemble for more stable results # m_frac = 0.8, # Fraction of features per learner # show_progress = TRUE # ) # # cat(sprintf("Custom MeLSI F-statistic value: %.4f (p = %.4f)\n", # custom_results$F_observed, custom_results$p_value)) ## ----phyloseq_integration, eval=FALSE----------------------------------------- # # Load phyloseq data # library(phyloseq) # data(GlobalPatterns) # Example phyloseq object # # # Extract OTU table and sample data # X <- as(otu_table(GlobalPatterns), "matrix") # y <- sample_data(GlobalPatterns)$SampleType # # # CLR transformation # X_clr <- clr_transform(X) # # # Run MeLSI analysis # results <- melsi(X_clr, y, n_perms = 200, B = 30, show_progress = FALSE) # # Note: For vignette execution, use reduced parameters (n_perms=50, B=20) to stay under 15 minutes # # # Results can be used with other phyloseq functions # # For example, you can add the learned distance matrix to a phyloseq object ## ----session_info------------------------------------------------------------- sessionInfo()