## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", ## see https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html crop = NULL ) ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE---------------- ## Bib setup library("RefManageR") ## Write bibliography information bib <- c( R = citation(), BiocStyle = citation("BiocStyle")[1], knitr = citation("knitr")[1], RefManageR = citation("RefManageR")[1], rmarkdown = citation("rmarkdown")[1], sessioninfo = citation("sessioninfo")[1], testthat = citation("testthat")[1], StatescopeR = citation("StatescopeR")[1] ) ## ----"install", eval = FALSE-------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("StatescopeR") # # ## Check that you have a valid Bioconductor installation # BiocManager::valid() ## ----'Prepare scRNAseq data'-------------------------------------------------- ## Load StatescopeR & scRNAseq (for example data) suppressMessages(library(StatescopeR)) suppressMessages(library(scRNAseq)) suppressMessages(library(scuttle)) ## Load SegerstolpePancreas data set scRNAseq <- SegerstolpePancreasData() ## remove duplicate genes scRNAseq <- scRNAseq[!duplicated(rownames(scRNAseq)), ] ## Subset to 1 healthy and type 2 diabetes samples scRNAseq <- scRNAseq[, scRNAseq$individual %in% c("H2", "T2D2", "T2D3")] ## remove cells with no cell type label scRNAseq <- scRNAseq[, !is.na(scRNAseq$`cell type`)] ## remove very rare cell types (<100 cells in total data set) celltypes_to_remove <- names(table(scRNAseq$`cell type`)[(table(scRNAseq$`cell type`) < 100)]) scRNAseq <- scRNAseq[, !scRNAseq$`cell type` %in% celltypes_to_remove] ## ----'Prepare StatescopeR input'---------------------------------------------- ## Normalize (cp10k) and logtransform scRNAseq cpm(scRNAseq) <- calculateCPM(scRNAseq) logcounts(scRNAseq) <- log1p(cpm(scRNAseq) / 100) ## Create scRNAseq reference/signature with 5 hvg for quick example signature <- create_signature(scRNAseq, hvg_genes = TRUE, n_hvg_genes = 5L, labels = scRNAseq$`cell type` ) ## select subset of genes for deconvolution (3/5 hvg to make it quick) selected_genes <- select_genes(scRNAseq, 3L, 5L, labels = scRNAseq$`cell type` ) ## Create pseudobulk and normalize to cp10k (logging is done within Statescope) pseudobulk <- aggregateAcrossCells(scRNAseq, ids = scRNAseq$individual) normcounts(pseudobulk) <- calculateCPM(pseudobulk) / 100 pseudobulk <- as(pseudobulk, "SummarizedExperiment") rownames(pseudobulk) <- rownames(scRNAseq) ## (optional) Create prior expectation prior <- gather_true_fractions(scRNAseq, ids = scRNAseq$individual, label_col = "cell type") # Use True sc fractions for this prior[rownames(prior) != "ductal cell", ] <- NA # Keep only ductal cells prior <- t(prior) # Transpose it to nSample x nCelltype ## ----'Run StatescopeR'-------------------------------------------------------- ## Run Deconvolution module Statescope <- BLADE_deconvolution(signature, pseudobulk, selected_genes, prior, cores = 1L, Nrep = 1L) ## Run Refinement module Statescope <- Refinement(Statescope, signature, pseudobulk, cores = 2L) ## Run State Discovery module (pick k=2 for quick example) Statescope <- StateDiscovery(Statescope, k = 2L, Ncores = 2L) ## ----'Evaluate Statescope'---------------------------------------------------- ## Count fractions of cells per sample in the pseudobulk true_fractions <- gather_true_fractions(scRNAseq, ids = scRNAseq$individual, label_col = "cell type") ## Plot correlation and RMSE with true fractions per celltype fraction_eval(Statescope, true_fractions) ## ----'Downstream analysis/visualizations'------------------------------------- ## Show predicted fractions metadata(Statescope)$fractions ## Show predicted ductal cell type specific gene expression profiles assay(metadata(Statescope)$ct_specific_gep$`ductal cell`, "weighted_gep") ## Show ductal cell state scores per sample metadata(Statescope)$statescores$`ductal cell` ## Show ductal cell state loadings metadata(Statescope)$stateloadings$`ductal cell` ## Plot heatmap of fractions fraction_heatmap(Statescope) ## Plot barplot of top loadings barplot_stateloadings(Statescope) ## ----'Group expectations'----------------------------------------------------- ## define cell types in one group grouped_cts <- c("alpha cell", "acinar cell") ## initialize group assignment matrix (nCelltype x nGroup) with 0's celltype_names <- colnames(signature$mu) group_names <- c("Group", setdiff(celltype_names, grouped_cts)) nCelltype <- length(celltype_names) nGroup <- length(group_names) group <- matrix(0, nrow = nGroup, ncol = nCelltype, dimnames = list(group_names, celltype_names) ) ## initialize prior fraction matrix (nSamples x nCelltypes) with NA's nSamples <- ncol(pseudobulk) sample_names <- colnames(pseudobulk) prior <- matrix( nrow = nSamples, ncol = nGroup, dimnames = list(sample_names, group_names) ) ## Assign cell types to groups for (ct in celltype_names) { if (ct %in% grouped_cts) { ## assign cell type to group group["Group", ct] <- 1 } else { ## Assign cell type to its own group group[ct, ct] <- 1 } } ## Add grouped prior fractions to prior prior[names(true_fractions), "Group"] <- colSums(as.matrix( true_fractions[grouped_cts, ])) ## init group prior group_prior <- list("Group" = group, "Expectation" = prior) ## Now you can just run Statescope as with a normal prior. Note we do not give ## a ductal cell prior this time Statescope <- BLADE_deconvolution(signature, pseudobulk, selected_genes, group_prior, cores = 1L, Nrep = 1L ) ## ----"citation"--------------------------------------------------------------- ## Citation info citation("StatescopeR") ## ----reproduce3, echo=FALSE--------------------------------------------------- ## Session info library("sessioninfo") options(width = 80) session_info()