## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----eval = FALSE------------------------------------------------------------- # install.packages("polyqtlR") ## ---- eval = FALSE------------------------------------------------------------ # install.packages("Rcpp") # install.packages("foreach") # install.packages("doParallel") ## ----------------------------------------------------------------------------- library(polyqtlR) ## ----------------------------------------------------------------------------- data("phased_maplist.4x", "SNP_dosages.4x", "Phenotypes_4x") ## ----eval = FALSE------------------------------------------------------------- # IBD_4x <- estimate_IBD(phased_maplist = phased_maplist.4x, # genotypes = SNP_dosages.4x, # ploidy = 4, # bivalent_decoding = FALSE, # ncores = 4) ## ----eval = FALSE------------------------------------------------------------- # nc <- parallel::detectCores() - 1 ## ----echo = FALSE------------------------------------------------------------- nc <- 2 #to pass CRAN checks ## ---- eval = FALSE------------------------------------------------------------ # IBD_4x <- import_IBD(method = "TO", #TO for TetraOrigin # folder = "TetraOrigin", # filename = paste0("TetraOrigin_Output_bivs_LinkageGroup",1:5,"_Summary"), # bivalent_decoding = TRUE) ## ----eval = FALSE------------------------------------------------------------- # Importing map data under description inferTetraOrigin-Summary,Genetic map of biallelic markers # Importing parental phasing under description inferTetraOrigin-Summary,MAP of parental haplotypes # Importing IBD data under description inferTetraOrigin-Summary,Conditonal genotype probability ## ---- eval = FALSE------------------------------------------------------------ # IBD_4x <- import_IBD(method = "PO", #PO for PolyOrigin # folder = "PolyOrigin", # filename = "PolyOrigin_Output") ## ----eval=FALSE--------------------------------------------------------------- # IBD_4x <- estimate_IBD(phased_maplist = phased_maplist.4x, # genotypes = SNP_dosages.4x, # method = "heur", # ploidy = 4) ## ---- eval = FALSE------------------------------------------------------------ # thinned_maplist.4x <- thinmap(maplist = phased_maplist.4x, # dosage_matrix = SNP_dosages.4x) ## ---- echo = FALSE------------------------------------------------------------ cat("87 markers from a possible 93 on LG1 were included. 89 markers from a possible 93 on LG2 were included.") ## ---- out.width = "780px", echo = FALSE--------------------------------------- knitr::include_graphics("figures/thinmap.png") ## ----eval = FALSE------------------------------------------------------------- # IBD_4x.spl <- spline_IBD(IBD_list = IBD_4x, # gap = 1) #grid at 1 cM spacing ## ----echo = FALSE------------------------------------------------------------- IBD_4x <- readRDS(file = "IBD_4x_DR.RDS") ## ---- fig.width = 8, fig.height = 7------------------------------------------- visualiseHaplo(IBD_list = IBD_4x, display_by = "name", linkage_group = 1, select_offspring = colnames(SNP_dosages.4x)[3:11], #cols 1+2 are parents multiplot = c(3,3)) #plot layout in 3x3 grid ## ---- echo = FALSE------------------------------------------------------------ IBDnoisy <- readRDS("noisyIBD.RDS") ## ---- fig.width = 8, fig.height = 7, echo = FALSE----------------------------- visualiseHaplo(IBDnoisy, display_by = "name", select_offspring = "all", multiplot = c(3,3)) ## ----echo = FALSE------------------------------------------------------------- data("IBD_4x") ## ----------------------------------------------------------------------------- GIC_4x <- estimate_GIC(IBD_list = IBD_4x) ## ----------------------------------------------------------------------------- visualiseGIC(GIC_list = GIC_4x) ## ----------------------------------------------------------------------------- dim(Phenotypes_4x) head(Phenotypes_4x) ## ----eval = FALSE------------------------------------------------------------- # qtl_LODs.4x <- QTLscan(IBD_list = IBD_4x, # Phenotype.df = Phenotypes_4x, # genotype.ID = "geno", # trait.ID = "pheno", # block = "year") ## ----echo = FALSE------------------------------------------------------------- qtl_LODs.4x <- readRDS("qtl_LODs.4x.RDS") ## ---- fig.align='center',fig.height=10---------------------------------------- plotQTL(LOD_data = qtl_LODs.4x, layout = "g", colour = "darkgreen") ## ----------------------------------------------------------------------------- plotQTL(LOD_data = qtl_LODs.4x, colour = "dodgerblue") ## ----eval = FALSE------------------------------------------------------------- # qtl_LODs.4x <- QTLscan(IBD_list = IBD_4x, # Phenotype.df = Phenotypes_4x, # genotype.ID = "geno", # trait.ID = "pheno", # block = "year", # perm_test = TRUE, # ncores = nc) ## ----echo = FALSE------------------------------------------------------------- qtl_LODs.4x <- readRDS("qtl_LODs.4x_perm.RDS") ## ----------------------------------------------------------------------------- plotQTL(LOD_data = qtl_LODs.4x, colour = "dodgerblue") ## ----------------------------------------------------------------------------- blues <- BLUE(data = Phenotypes_4x, model = pheno~geno, random = ~1|year, genotype.ID = "geno") ## ----eval = FALSE------------------------------------------------------------- # qtl_LODs.4x <- QTLscan(IBD_list = IBD_4x, # Phenotype.df = blues, # genotype.ID = "geno", # trait.ID = "blue", # perm_test = TRUE, # ncores = nc) ## ----echo = FALSE------------------------------------------------------------- qtl_LODs.4x <- readRDS("qtl_LODs.4x_fastpermute.RDS") ## ----------------------------------------------------------------------------- plotQTL(LOD_data = qtl_LODs.4x, colour = "dodgerblue") ## ----------------------------------------------------------------------------- findPeak(qtl_LODs.4x, linkage_group = 1) ## ----eval = FALSE------------------------------------------------------------- # qtl_LODs.4x_cofactor <- QTLscan(IBD_list = IBD_4x, # Phenotype.df = Phenotypes_4x, # genotype.ID = "geno", # trait.ID = "pheno", # block = "year", # cofactor_df = data.frame("LG" = 1, # "cM" = 12.3), # perm_test = FALSE, # ncores = nc)#nc is the number of cores, defined earlier ## ----echo = FALSE------------------------------------------------------------- qtl_LODs.4x_cofactor <- readRDS("qtl_LODs.4x_cofactor.RDS") ## ----------------------------------------------------------------------------- new_pheno <- qtl_LODs.4x_cofactor$Residuals head(new_pheno) colnames(new_pheno) ## ----------------------------------------------------------------------------- blues <- BLUE(data = new_pheno, model = Pheno~geno, random = ~1|Block, genotype.ID = "geno") ## ----eval = FALSE------------------------------------------------------------- # qtl_LODs.4x_cofactor <- QTLscan(IBD_list = IBD_4x, # Phenotype.df = blues, # genotype.ID = "geno", # trait.ID = "blue", # perm_test = TRUE, # ncores = nc) ## ----echo = FALSE------------------------------------------------------------- qtl_LODs.4x_cofactor <- readRDS("qtl_LODs.4x_cof_fastpermute.RDS") ## ----------------------------------------------------------------------------- plotQTL(LOD_data = qtl_LODs.4x_cofactor, colour = "red") ## ----eval = FALSE------------------------------------------------------------- # blues <- BLUE(data = Phenotypes_4x, # model = pheno~geno, # random = ~1|year, # genotype.ID = "geno") # # QTLmodel <- check_cofactors(IBD_list = IBD_4x, # Phenotype.df = blues, # genotype.ID = "geno", # trait.ID = "blue", # LOD_data = qtl_LODs.4x, # ncores = nc) # # # QTLmodel ## ----echo = FALSE------------------------------------------------------------- blues <- BLUE(data = Phenotypes_4x, model = pheno~geno, random = ~1|year, genotype.ID = "geno") QTLmodel <- readRDS(file = "QTLmodel.RDS") QTLmodel ## ----------------------------------------------------------------------------- PVE(IBD_list = IBD_4x, Phenotype.df = blues, genotype.ID = "geno", trait.ID = "blue", QTL_df = QTLmodel) ## ----------------------------------------------------------------------------- plotQTL(LOD_data = list(qtl_LODs.4x, qtl_LODs.4x_cofactor)) ## ----------------------------------------------------------------------------- blues <- BLUE(data = Phenotypes_4x, model = pheno~geno, random = ~1|year, genotype.ID = "geno") ## ----------------------------------------------------------------------------- qtl_explored <- exploreQTL(IBD_list = IBD_4x, Phenotype.df = blues, genotype.ID = "geno", trait.ID = "blue", linkage_group = 1, LOD_data = qtl_LODs.4x) ## ----------------------------------------------------------------------------- default_QTLconfig <- get("segList_4x",envir = getNamespace("polyqtlR")) default_QTLconfig[1:2] length(default_QTLconfig) ## ----------------------------------------------------------------------------- default_QTLconfig[[1]] #replace 1 with 27 to see the second most-likely model ## ----eval = FALSE------------------------------------------------------------- # visualiseQTLeffects(IBD_list = IBD_4x, # Phenotype.df = blues, # genotype.ID = "geno", # trait.ID = "blue", # linkage_group = 1, # LOD_data = qtl_LODs.4x) ## ---- out.width = "780px", echo = FALSE--------------------------------------- knitr::include_graphics("figures/qtl_effects.png") ## ----------------------------------------------------------------------------- hist(x = blues$blue) ## ----eval = FALSE------------------------------------------------------------- # visualiseHaplo(IBD_list = IBD_4x, # display_by = "phenotype", # Phenotype.df = blues, # genotype.ID = "geno", # trait.ID = "blue", # pheno_range = c(44,max(blues$blue)), # linkage_group = 1, # multiplot = c(3,3)) ## ---- out.width = "780px", echo = FALSE--------------------------------------- knitr::include_graphics("figures/fish_hom1.png") ## ----eval = FALSE------------------------------------------------------------- # qtl_SMR.4x <- singleMarkerRegression(dosage_matrix = SNP_dosages.4x, # Phenotype.df = blues, # genotype.ID = "geno", # trait.ID = "blue", # return_R2 = TRUE, # perm_test = TRUE, # ncores = nc, # maplist = phased_maplist.4x) ## ----echo = FALSE------------------------------------------------------------- qtl_SMR.4x <- readRDS("qtl_SMR.4x.RDS") ## ----------------------------------------------------------------------------- plotQTL(LOD_data = list(qtl_LODs.4x, qtl_SMR.4x), plot_type = c("lines","points"), #IBD results as lines, SMR results as points.. pch = 19, colour = c("darkgreen","navyblue")) ## ----------------------------------------------------------------------------- PVE(IBD_list = IBD_4x, Phenotype.df = Phenotypes_4x, genotype.ID = "geno", trait.ID = "pheno", block = "year", QTL_df = data.frame("LG" = 1,"cM" = 12.3)) ## ----eval = FALSE------------------------------------------------------------- # mr <- meiosis_report(IBD_list = IBD_4x) ## ---- out.width = "500px", echo = FALSE--------------------------------------- knitr::include_graphics("figures/mr1.png") ## ---- out.width = "500px", echo = FALSE--------------------------------------- knitr::include_graphics("figures/mr2.png") ## ----echo = FALSE------------------------------------------------------------- mr <- readRDS("mr.RDS") ## ----------------------------------------------------------------------------- par(mfrow = c(1,2)) visualisePairing(meiosis_report.ls = mr, parent = "P1", datawidemax = 5) ## ----------------------------------------------------------------------------- recom.ls <- count_recombinations(IBD_4x) ## ----------------------------------------------------------------------------- layout(matrix(c(1,3,2,3),nrow=2)) #make tidy layout RLS_summary <- plotRecLS(recom.ls) #capture the function output as well ## ---- fig.width = 8, fig.height = 7------------------------------------------- visualiseHaplo(IBD_list = IBD_4x, display_by = "name", linkage_group = 1, select_offspring = colnames(SNP_dosages.4x)[3:11], #cols 1+2 are parents multiplot = c(3,3), #plot layout in 3x3 grid recombination_data = recom.ls) ## ----eval = FALSE------------------------------------------------------------- # visHap1 <- visualiseHaplo(IBD_list = IBD_4x, # display_by = "name", # select_offspring = "all", # linkage_group = 1, # cM_range = c(1.56,50), # recombinant_scan = c(1,3), # multiplot = c(4,2)) ## ----------------------------------------------------------------------------- visHap1 ## ---- fig.width = 8, fig.height = 7------------------------------------------- visualiseHaplo(IBD_list = IBD_4x, display_by = "name", select_offspring = visHap1$recombinants, linkage_group = 1, cM_range = c(1.56,50),#note: start position = first marker @1.56 cM recombinant_scan = c(1,3), multiplot = c(3,2), recombination_data = recom.ls) # we can add this track for clarity ## ----eval = FALSE------------------------------------------------------------- # visHap2 <- visualiseHaplo(IBD_list = IBD_4x, # display_by = "name", # select_offspring = "all", # linkage_group = 1, # cM_range = c(1.56,15), # allele_fish = 1, # multiplot = c(4,2)) ## ----------------------------------------------------------------------------- visHap2 ## ----fig.width = 7, fig.height = 7-------------------------------------------- visualiseHaplo(IBD_list = IBD_4x, display_by = "name", select_offspring = visHap2$allele_fish$h1, linkage_group = 1, cM_range = c(1.56,15), multiplot = c(4,4)) ## ---- eval = FALSE------------------------------------------------------------ # IBD_multiError <- maxL_IBD(phased_maplist = phased_maplist.4x, # genotypes = SNP_dosages.4x, # ploidy = 4, # bivalent_decoding = FALSE, # errors = c(0.01,0.02,0.05,0.1,0.2), # ncores = 4) ## ---- eval = FALSE------------------------------------------------------------ # IBD_4x <- IBD_multiError$maxL_IBD ## ---- eval = FALSE------------------------------------------------------------ # # Copy our dosage matrix to a new matrix: # error_dosages <- SNP_dosages.4x # # # Only work with offspring errors for now: # F1cols <- 3:ncol(error_dosages) # # # Count number of (offspring) dosage scores: # ndose <- length(error_dosages[,F1cols]) # # # Generate a vector of random positions (10% of total nr): # set.seed(42) # error.pos <- sample(1:ndose,round(ndose*0.1)) # # # Replace real values with these random scores (simple error simulation): # error_dosages[,F1cols][error.pos] <- sapply(error_dosages[,F1cols][error.pos], function(x) sample(setdiff(0:4,x),1)) # # # Re-estimate IBD probabilities, using the maxL_IBD function: # IBD_multiError <- maxL_IBD(phased_maplist = phased_maplist.4x, # genotypes = error_dosages, # ploidy = 4, # errors = c(0.01, 0.02, 0.05, 0.1, 0.3), # ncores = nc) # # IBD_4x.err <- IBD_multiError$maxL_IBD # # # Re-impute marker dosages # new_dosages <- impute_dosages(IBD_list=IBD_4x.err,dosage_matrix=error_dosages, # min_error_prior = 0.01, # rounding_error = 0.2) # ## ---- echo = FALSE------------------------------------------------------------ cat("____________________________________________________________________________ 186 out of a total possible 186 markers were imputed In the original dataset there were 0 % missing values among the 186 markers In the imputed dataset there are now 1.4 % missing values among these, using a rounding threshold of 0.2 ____________________________________________________________________________ The % of markers with changed dosage scores is as follows (0 = no change): error.matrix 0 1 2 3 4 89.44 4.18 2.88 1.72 0.35 ") new_dosages <- readRDS(file = "new_dosages.RDS") error_dosages <- readRDS(file = "error_dosages.RDS") F1cols <- 3:ncol(error_dosages); ndose <- length(error_dosages[,F1cols]) ## ----------------------------------------------------------------------------- # Check the results: round(length(which(error_dosages[,F1cols] != SNP_dosages.4x[,F1cols])) / ndose,2) round(length(which(new_dosages[,F1cols] != SNP_dosages.4x[,F1cols])) / ndose,2)