## ----setup, include=FALSE----------------------------------------------------- library(BiocStyle) knitr::opts_chunk$set(eval = TRUE, echo = TRUE, collapse = TRUE) ## ----install-cran, echo=TRUE, eval=FALSE-------------------------------------- # install.packages(c("Rcpp", "RcppArmadillo", "parallel")) ## ----install-bioc, echo=TRUE, eval=FALSE-------------------------------------- # # Install Bioconductor dependencies # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install(c("rhdf5", "strawr", "rtracklayer", "GenomicRanges", "BSgenome", "Biostrings")) ## ----load-package, echo=TRUE, eval=TRUE--------------------------------------- # Loading the package: # install.packages(HiCPotts) library(HiCPotts) ## ----simulate-data, include=FALSE, eval=TRUE---------------------------------- set.seed(123) N <- 10 bins <- seq(1, 20000, by = 2000) data <- expand.grid(start = bins, start.j. = bins) data$end.i. <- data$start + 1999 data$end <- data$start.j. + 1999 data$chrom <- "chr2L" data$GC <- runif(nrow(data), 0.3, 0.7) # Simulated GC content data$ACC <- runif(nrow(data), 0, 1) # Simulated accessibility data$TES <- rpois(nrow(data), 2) # Simulated TE counts data$interactions <- rpois(nrow(data), lambda = 5) # Simulated interaction counts data <- as.data.frame(data) head(data) ## ----get_data, include=FALSE, eval=FALSE-------------------------------------- # # Example (not run) # # data <- prepare_data( # file_path = "path/to/hic_file.h5", # chr = "chr2L", # start = 1, # end = 20000, # resolution = 2000, # genome_package = "BSgenome.Dmelanogaster.UCSC.dm6", # acc_wig = "path/to/dnase.wig", # chain_file = "path/to/chain.file", # te_granges = "path/to/te.gtf" # ) ## ----process_data, include=FALSE, eval=TRUE----------------------------------- processed <- process_data(data, N = N, scale_max = 500, standardization_y = TRUE) x_vars <- processed[["x_vars"]] # Single matrix for this example y <- processed[["y"]] str(x_vars) # Lists of distance, GC, TEs, ACC matrices ## ----setup_MCMC, include=FALSE, eval=TRUE------------------------------------- # Set MCMC parameters gamma_prior <- 0.3 iterations <- 20 theta_start <- c(0.5) # size_start <- c(1, 1, 1) dist <- "ZIP" ## ----run_MCMC, include=FALSE, eval=TRUE--------------------------------------- # Run MCMC results <- run_chain_betas( N = N, gamma_prior = gamma_prior, iterations = iterations, x_vars = x_vars, y = y, theta_start = theta_start, use_data_priors = TRUE, # user_fixed_priors = FALSE, # epsilon = NULL, distance_metric = "manhattan", dist = dist, # size_start = size_start, mc_cores = 1 ) ## ----MCMC_results, include=FALSE, eval=TRUE----------------------------------- # Extract results chains <- results[["chains"]] gamma <- results[["gamma"]] theta <- results[["theta"]] # size <- results[["size]] ## ----compute_result, include=FALSE, eval=TRUE--------------------------------- # Compute probabilities probs <- compute_HMRFHiC_probabilities( data = data, chain_betas = chains, iterations = iterations, dist = "ZIP" ) probs$prob1 # This produces matrices of probabilities for the first component, which translate to assigning interactions to biological states (e.g., active vs. inactive chromatin). ## ----User_prior, include=FALSE, eval=TRUE------------------------------------- user_priors <- list( list(mean = c(0, 0, 0, 0, 0), sd = c(1, 1, 1, 1, 1)), list(mean = c(0, 0, 0, 0, 0), sd = c(1, 1, 1, 1, 1)), list(mean = c(0, 0, 0, 0, 0), sd = c(1, 1, 1, 1, 1)) ) # Run MCMC results <- run_chain_betas( N = N, gamma_prior = gamma_prior, iterations = iterations, x_vars = x_vars, y = y, theta_start = theta_start, # use_data_priors = TRUE, user_fixed_priors = user_priors, # epsilon = NULL, distance_metric = "manhattan", dist = dist, # size_start = size_start, mc_cores = 1 ) ## ----multiple_matrices, include=FALSE, eval=FALSE----------------------------- # y_list <- list(y, y) # Example with two matrices # # results <- run_chain_betas( # N = N, # gamma_prior = gamma_prior, # iterations = iterations, # x_vars = x_vars, # y = y_list, # theta_start = theta_start, # size_start = size_start, # use_data_priors = TRUE, # user_fixed_priors = NULL, # epsilon = NULL, # distance_metric = "manhattan", # dist = dist, # mc_cores = 2 # ) ## ----Additional_distributions, include=FALSE, eval=TRUE----------------------- # Run MCMC results <- run_chain_betas( N = N, gamma_prior = gamma_prior, iterations = iterations, x_vars = x_vars, y = y, theta_start = theta_start, # use_data_priors = TRUE, user_fixed_priors = user_priors, # epsilon = NULL, distance_metric = "manhattan", dist = "Poisson", # size_start = size_start, mc_cores = 1 ) ## ----echo = FALSE------------------------------------------------------------- sessionInfo()