Ivan Tomic info@ivantomic.com
You can install the released version of immunaut from CRAN with:
install.packages("immunaut")Or you can install immunaut directly from GitHub with use of following commands:
# install.packages("devtools")
devtools::install_github("atomiclaboratory/immunaut", subdir = 'R-package')library("immunaut")
# Generate a demo dataset with 1000 subjects, 200 features, 4 clusters, and a 10% probability of missing values
dataset <- generate_demo_data(n_subjects = 1000, n_features = 200, 
                                desired_number_clusters = 4, # Approximate number of clusters
                                cluster_overlap_sd = 35, # Standard deviation for cluster overlap
                                missing_prob = 0.1) # Probability of missing values
# Generate a file header for the dataset to use in downstream analysis
file_header <- generate_file_header(dataset)
settings <- list(
    fileHeader = file_header,
    seed = 1337,
    selectedColumns = colnames(dataset),  # Columns selected for analysis
    # Exclude outcome, age, and gender columns from the analysis
    excludedColumns = c("outcome", "age", "gender"),
    removeNA = TRUE,
    
    clusterType = "Louvain",
    target_clusters_range = c(3,4),
    resolution_increments =c(0.01, 0.1, 0.2, 0.3, 0.4),
    min_modularities = c(0.5, 0.6, 0.7, 0.8),
    pickBestClusterMethod = "Modularity",
    weights = list(AUROC = 0.9, modularity = 0.05, silhouette = 0.05),
    
    preProcessDataset = c("scale", "center", "medianImpute", "corr", "zv", "nzv"),
    selectedPartitionSplit = 0.7,  # Use the current partition split
    selectedPackages = c("rf", "gcvEarth"),
    trainingTimeout = 180
)# Perform t-SNE and Louvain clustering using the 'immunaut' function
result <- immunaut(dataset, settings)
# Plot the clustered t-SNE results using ggplot2
p <- plot_clustered_tsne(result$tsne_clust$info.norm, 
                                result$tsne_clust$cluster_data, 
                                result$settings) 
print(p) # Display the plot
# Extract the dataset with the applied clustering from the result
dataset_ml <- result$dataset$dataset_ml
# Run the auto_simon_ml function to train machine learning models on the dataset
model_results <- auto_simon_ml(dataset_ml, settings)
# Extract the names of the models
model_names <- names(model_results$models)
# Create a data frame to store the model names and their corresponding AUROC values
model_auroc_table <- data.frame(
  Model = character(),
  AUROC = numeric(),
  stringsAsFactors = FALSE
)
# Loop through the models and extract AUROC values (One-vs-Rest) for Multiclass Models
for (model_name in model_names) {
  auroc_value <- model_results$models[[model_name]][["predictions"]][["AUROC"]]
  # Add the model name and its AUROC to the table
  model_auroc_table <- rbind(model_auroc_table, data.frame(Model = model_name, AUROC = auroc_value))
}
library(ggplot2)
# Create a bar chart with AUROC values
ggplot(model_auroc_table, aes(x = Model, y = AUROC, fill = Model)) +
  geom_bar(stat = "identity") +  # Create bars
  geom_text(aes(label = round(AUROC, 3)), vjust = -0.5) +  # Add AUROC values above bars
  ggtitle("AUROC for Models") +
  xlab("Model") + 
  ylab("AUROC") +
  theme_minimal() +  # Use a minimal theme
  scale_fill_brewer(palette = "Set3")
# Update settings for DBSCAN clustering
settings$clusterType <- "Density"
settings$minPtsAdjustmentFactor <- 1.5
settings$epsQuantile <- 0.9
# Run t-SNE and DBSCAN clustering
dbscan_result <- immunaut(dataset, settings)
#> [1] "====> Density-based clustering"# Update settings for Mclust clustering
settings$clusterType <- "Mclust"
settings$clustGroups <- 3  # Specify the number of clusters for Mclust
# Run t-SNE and Mclust clustering
mclust_result <- immunaut(dataset, settings)
#> [1] "==> cluster_tsne_mclust clustGroups:  3"
#> fitting ...
#>   |                                                                                                                           |                                                                                                                   |   0%  |                                                                                                                           |========                                                                                                           |   7%  |                                                                                                                           |===============                                                                                                    |  13%  |                                                                                                                           |=======================                                                                                            |  20%  |                                                                                                                           |===============================                                                                                    |  27%  |                                                                                                                           |======================================                                                                             |  33%  |                                                                                                                           |==============================================                                                                     |  40%  |                                                                                                                           |======================================================                                                             |  47%  |                                                                                                                           |=============================================================                                                      |  53%  |                                                                                                                           |=====================================================================                                              |  60%  |                                                                                                                           |=============================================================================                                      |  67%  |                                                                                                                           |====================================================================================                               |  73%  |                                                                                                                           |============================================================================================                       |  80%  |                                                                                                                           |====================================================================================================               |  87%  |                                                                                                                           |===========================================================================================================        |  93%  |                                                                                                                           |===================================================================================================================| 100%# Update settings for Hierarchical clustering
settings$clusterType <- "Hierarchical"
settings$clustLinkage <- "ward.D2"
settings$clustGroups <- 3
# Run t-SNE and Mclust clustering
hierarchical_result <- immunaut(dataset, settings)
#> [1] "====> Noise indices:  31"
#> [1] "====> Noise indices done"
library(immunaut)
library(ggplot2)
data("immunautLAIV")
file_header <- generate_file_header(immunautLAIV)
# Base settings (shared across all runs unless overridden):
settings <- list()
settings$fileHeader <- file_header
settings$selectedColumns <- c(
  "H1_HAI_FC","H3_HAI_FC","B_HAI_FC","H1N1_NA_FC","H3_HA_IgA_FC",
  "B_HA_IgA_FC","B_NA_IgA_FC","H1_CD8_IFNg_FC","H3_CD8_IFNg_FC",
  "H3_CD8_IL2_FC","HAB_CD8_IFNg_FC","HAB_CD8_IL2_FC","NC99_H1_FC",
  "SWISS_H3_FC","HK14_H3_FC","KAN_H3_FC","B_PHU_FC","cH7_IgA_FC",
  "N2_FC","cH6_FC"
)
settings$groupingVariables <- c(
  "max_HAI_responder","max_iga_responder","max_mnp_cd4_responder",
  "max_mnp_cd8_responder ","year","sex","v0_resp_virus_positive",
  "h1_v2_shed","h3_v2_shed","b_v2_shed","h1_v7_shed","h3_v7_shed",
  "b_v7_shed","h1_v0_seropositive","h3_v0_seropositive","b_v0_seropositive"
)
settings$preProcessDataset <- c("medianImpute","center","scale","corr","nzv","zv")
result <- immunaut(immunautLAIV, settings)
p <- plot_clustered_tsne(result$tsne_clust$info.norm, 
                                result$tsne_clust$cluster_data, 
                                result$settings) 
# Adjust the plot: use Set2 palette and a nice minimal theme
p <- p +
  scale_color_brewer(palette = "Set2") +
  theme_minimal(base_size = 12) +
  labs(
    title    = "Clustered t-SNE of Pediatric LAIV Data",
    subtitle = "Comprehensive Immune Response Dataset (244 Children)",
    color    = "Cluster"
  )
print(p)