## ----chunk_setup, include = FALSE--------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----vignette_setup, echo=FALSE, message=FALSE, warning = FALSE--------------- # Track time spent on making the vignette. start_time <- Sys.time() pkg <- function(x) { paste0("[*", x, "*](https://cran.r-project.org/package=", x, ")") } pypkg <- function(x) { paste0("[*", x, "*](https://pypi.org/project/", x, "/)") } ## ----bioconductor_install, eval=FALSE----------------------------------------- # install.packages("BiocManager") # WORK iN PROGRESS # BiocManager::install("DOtools") ## ----github_install, eval=FALSE----------------------------------------------- # BiocManager::install("MarianoRuzJurado/DOtools") ## ----load_library, message=FALSE---------------------------------------------- library(DOtools) # Additional packages library(SummarizedExperiment) library(scran) library(scater) library(plyr) library(dplyr) library(tibble) library(enrichR) library(kableExtra) library(Seurat) ## ----Cellbender, eval=FALSE, warning = FALSE---------------------------------- # base <- DOtools:::.example_10x() # dir.create(file.path(base, "/cellbender")) # # raw_files <- list.files(base, # pattern = "raw_feature_bc_matrix\\.h5$", # recursive = TRUE, # full.names = TRUE # ) # # DO.CellBender( # cellranger_path = base, # output_path = file.path(base, "/cellbender"), # samplenames = c("disease"), # cuda = TRUE, # BarcodeRanking = FALSE, # cpu_threads = 48, # epochs = 150 # ) ## ----read_example_data, warning = FALSE, eval=FALSE--------------------------- # base <- DOtools:::.example_10x() # # paths <- c( # file.path(base, "healthy/outs/filtered_feature_bc_matrix.h5"), # file.path(base, "disease/outs/filtered_feature_bc_matrix.h5") # ) # # SCE_obj <- DO.Import( # pathways = paths, # ids = c("healthy-1", "disease-1"), # DeleteDoublets = TRUE, # cut_mt = .05, # min_counts = 500, # min_genes = 300, # high_quantile = .95, # Seurat = FALSE # Set to TRUE for Seurat object # ) ## ----preflt, out.width="100%", fig.align="center", fig.width=8, fig.height=6---- prefilterplots <- system.file( "figures", "prefilterplots-1.png", package = "DOtools" ) pQC1 <- magick::image_read(prefilterplots) plot(pQC1) ## ----postflt, out.width="100%", fig.align="center", fig.width=8, fig.height=6---- postfilterplots <- system.file( "figures", "postfilterplots-1.png", package = "DOtools" ) pQC2 <- magick::image_read(postfilterplots) plot(pQC2) ## ----Correlation, fig.width=6, fig.height=6----------------------------------- # Making sure we have a save folder base <- tempfile("my_tempdir_") dir.create(base) SCE_obj <- readRDS( system.file("extdata", "sce_data.rds", package = "DOtools" ) ) DO.Correlation(SCE_obj) ## ----CCA Integration, warning = FALSE----------------------------------------- SCE_obj <- DO.Integration( sce_object = SCE_obj, split_key = "orig.ident", HVG = TRUE, scale = TRUE, pca = TRUE, integration_method = "CCAIntegration" ) ## ----scVI Integration, warning=FALSE, eval=FALSE------------------------------ # # (Optional) Integration with scVI-Model # SCE_obj <- DO.scVI( # sce_object = SCE_obj, # batch_key = "orig.ident", # layer_counts = "counts", # layer_logcounts = "logcounts" # ) # # # SNN_Graph <- scran::buildSNNGraph(SCE_obj, # use.dimred = "scVI" # ) # # clust_SCVI <- igraph::cluster_louvain(SNN_Graph, # resolution = 0.3 # ) # # SCE_obj$leiden0.3 <- factor(igraph::membership(clust_SCVI)) # SCE_obj <- scater::runUMAP(SCE_obj, dimred = "scVI", name = "UMAP") ## ----Clustering and UMAP, warning = FALSE------------------------------------- DO.UMAP(SCE_obj, group.by = "leiden0.3" ) DO.UMAP(SCE_obj, group.by = "condition", legend.position = "right", label = FALSE ) ## ----annotation, warning = FALSE---------------------------------------------- SCE_obj <- DO.CellTypist(SCE_obj, modelName = "Healthy_COVID19_PBMC.pkl", runCelltypistUpdate = TRUE, over_clustering = "leiden0.3" ) DO.UMAP(SCE_obj, group.by = "autoAnnot", legend.position = "right") ## ----Manual annotation, fig.width=12, fig.height=7---------------------------- markers_list <- scran::findMarkers( SCE_obj, test.type = "t", groups = SingleCellExperiment::colData(SCE_obj)$autoAnnot, direction = "up", lfc = 0.25, pval.type = "any" ) # pick top 5 per cluster, naming adjustments annotation_Markers <- lapply(names(markers_list), function(cluster) { df <- as.data.frame(markers_list[[cluster]]) df$gene <- rownames(df) df$cluster <- cluster df %>% rename( avg_log2FC = summary.logFC, p_val = p.value, p_val_adj = FDR ) %>% dplyr::select(gene, cluster, avg_log2FC, p_val, p_val_adj) }) %>% bind_rows() # or with seurat if preferred Seu_obj <- as.Seurat(SCE_obj) annotation_Markers <- FindAllMarkers( object = Seu_obj, assay = "RNA", group.by = "autoAnnot", min.pct = 0.25, logfc.threshold = 0.25 ) annotation_Markers <- annotation_Markers %>% arrange(desc(avg_log2FC)) %>% distinct(gene, .keep_all = TRUE) %>% group_by(cluster) %>% slice_head(n = 5) p1 <- DO.Dotplot( sce_object = SCE_obj, Feature = annotation_Markers, group.by.x = "leiden0.3", plot.margin = c(1, 1, 1, 1), annotation_x = TRUE, point_stroke = 0.1, annotation_x_rev = TRUE, textSize = 14, hjust = 0.5, vjust = 0, textRot = 0, segWidth = 0.3, lwd = 3 ) # manual set of markers annotation_Markers <- data.frame( cluster = c( "ImmuneCells", rep("B_cells", 3), rep("T_cells", 3), rep("NK", 2), rep("Myeloid", 3), rep("pDC", 3) ), genes = c( "PTPRC", "CD79A", "BANK1", "MS4A1", "CD3E", "CD4", "IL7R", "NKG7", "KLRD1", "CD68", "CD14", "ITGAM", "LILRA4", "CLEC4C", "LRRC26" ) ) p2 <- DO.Dotplot( sce_object = SCE_obj, Feature = annotation_Markers, group.by.x = "leiden0.3", plot.margin = c(1, 1, 1, 1), annotation_x = TRUE, point_stroke = 0.1, annotation_x_rev = TRUE, textSize = 14, hjust = 0.5, vjust = 0, textRot = 0, segWidth = 0.3, lwd = 3 ) # Visualise marker expression in UMAP DO.UMAP(SCE_obj, FeaturePlot = TRUE, features = "NKG7", group.by = "leiden0.3", legend.position = "right" ) ## ----renaming----------------------------------------------------------------- SCE_obj$annotation <- plyr::revalue(SCE_obj$leiden0.3, c( `1` = "T_cells", `2` = "T_cells", `3` = "NK", `4` = "B_cells", `5` = "Monocytes", `6` = "NK", `7` = "T_cells", `8` = "pDC" )) DO.UMAP(SCE_obj, group.by = "annotation", legend.position = "right") ## ----cell populations, warning=FALSE------------------------------------------ DO.CellComposition(SCE_obj, assay_normalized = "RNA", cluster_column = "annotation", sample_column = "orig.ident", condition_column = "condition", transform_method = "arcsin", n_reps = 3 ) ## ----Recluster, warning=FALSE------------------------------------------------- SCE_obj <- DO.FullRecluster(SCE_obj, over_clustering = "annotation") DO.UMAP(SCE_obj, group.by = "annotation_recluster") T_cells <- DO.Subset(SCE_obj, ident = "annotation_recluster", ident_name = grep("T_cells", unique(SCE_obj$annotation_recluster), value = TRUE ) ) T_cells <- DO.CellTypist(T_cells, modelName = "Healthy_COVID19_PBMC.pkl", runCelltypistUpdate = FALSE, over_clustering = "annotation_recluster", SeuV5 = FALSE ) T_cells$annotation <- plyr::revalue( T_cells$annotation_recluster, c( `T_cells_1` = "CD4_T_cells", `T_cells_2` = "CD4_T_cells", `T_cells_3` = "CD4_T_cells", `T_cells_4` = "CD8_T_cells" ) ) ## ----re-annotate-------------------------------------------------------------- SCE_obj <- DO.TransferLabel(SCE_obj, Subset_obj = T_cells, annotation_column = "annotation", subset_annotation = "annotation" ) DO.UMAP(SCE_obj, group.by = "annotation", legend.position = "right") ## ----GO analysis, warning=FALSE----------------------------------------------- # this data set contains only one sample per condition # we introduce replicates for showing the pseudo bulk approach set.seed(123) SCE_obj$orig.ident2 <- sample(rep(c("A", "B", "C", "D", "E", "F"), length.out = ncol(SCE_obj) )) CD4T_cells <- DO.Subset(SCE_obj, ident = "annotation", ident_name = "CD4_T_cells" ) DGE_result <- DO.MultiDGE(CD4T_cells, sample_col = "orig.ident2", method_sc = "wilcox", ident_ctrl = "healthy" ) head(DGE_result, 10) %>% kable(format = "html", table.attr = "style='width:100%;'") %>% kable_styling(bootstrap_options = c( "striped", "hover", "condensed", "responsive" )) ## ----GO analysis2, warning=FALSE---------------------------------------------- result_GO <- DO.enrichR( df_DGE = DGE_result, gene_column = "gene", pval_column = "p_val_adj_SC_wilcox", log2fc_column = "avg_log2FC_SC_wilcox", pval_cutoff = 0.05, log2fc_cutoff = 0.25, path = NULL, filename = "", species = "Human", go_catgs = "GO_Biological_Process_2023" ) head(result_GO, 5) %>% kable(format = "html", table.attr = "style='width:100%;'") %>% kable_styling(bootstrap_options = c( "striped", "hover", "condensed", "responsive" )) ## ----GO visualisation, fig.width=10, fig.height=8----------------------------- result_GO_sig <- result_GO[result_GO$Adjusted.P.value < 0.05, ] result_GO_sig$celltype <- "CD4T_cells" DO.SplitBarGSEA( df_GSEA = result_GO_sig, term_col = "Term", col_split = "Combined.Score", cond_col = "State", pos_cond = "enriched", showP = FALSE, path = paste0(base, "/") ) GSEA_plot <- list.files( path = base, pattern = "SplitBar.*\\.svg$", full.names = TRUE, recursive = TRUE ) plot(magick::image_read_svg(GSEA_plot)) ## ----dotplot, fig.width=5, fig.height=5--------------------------------------- DO.Dotplot( sce_object = SCE_obj, group.by.x = "condition", group.by.y = "annotation", Feature = "NKG7", stats_y = TRUE ) ## ----Heat map----------------------------------------------------------------- path_file <- tempfile("dotools_plots_") dir.create(path_file, recursive = TRUE, showWarnings = FALSE) DO.Heatmap(SCE_obj, group_by = "leiden0.3", features = rownames(SCE_obj)[1:10], xticks_rotation = 45, path = path_file, stats_x_size = 20, showP = FALSE ) Heatmap_plot <- list.files( path = path_file, pattern = "Heatmap*\\.svg$", full.names = TRUE, recursive = TRUE ) plot(magick::image_read_svg(Heatmap_plot)) ## ----Violin, fig.width=8, fig.height=5.5, warning = FALSE--------------------- SCE_obj_sub <- DO.Subset(SCE_obj, ident = "annotation", ident_name = c("NK", "CD4_T_cells", "B_cells") ) DO.VlnPlot(SCE_obj_sub, Feature = "NKG7", group.by = "condition", group.by.2 = "annotation", ctrl.condition = "healthy" ) ## ----Bar, fig.width=5, fig.height=6, warning = FALSE-------------------------- SCE_obj_NK <- DO.Subset(SCE_obj, ident = "annotation", ident_name = "NK" ) DO.BarplotWilcox(SCE_obj_NK, group.by = "condition", ctrl.condition = "healthy", Feature = "NKG7", x_label_rotation = 0 ) ## ----Box, fig.width=5, fig.height=6, warning = FALSE-------------------------- set.seed(123) SCE_obj$rdm_sample <- sample(rep(c("A", "B", "C"), length.out = ncol(SCE_obj) )) DO.BoxPlot(SCE_obj, group.by = "rdm_sample", ctrl.condition = "A", Feature = "nCount_RNA", step_mod = 50, stat_pos_mod = 1.05, plot_sample = FALSE ) ## ----session_info, echo=FALSE----------------------------------------------------------------------------------------- options(width = 120) sessioninfo::session_info()