## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = FALSE, comment = ">"#, # fig.width = 7, fig.height = 4, fig.fullwidth = TRUE ) ## ----setup-------------------------------------------------------------------- library(dcifer) pardef <- par(no.readonly = TRUE) ## ----------------------------------------------------------------------------- sfile <- system.file("extdata", "MozParagon.csv", package = "dcifer") dsmp <- readDat(sfile, svar = "sampleID", lvar = "locus", avar = "allele") str(dsmp, list.len = 2) ## ---- eval = FALSE------------------------------------------------------------ # dsmp <- formatDat(dlong, svar = "sampleID", lvar = "locus", avar = "allele") ## ----------------------------------------------------------------------------- # optionally, extract location information meta <- unique(read.csv(sfile)[c("sampleID", "province")]) meta <- meta[match(names(dsmp), meta$sampleID), ] # order samples as in dsmp ## ----------------------------------------------------------------------------- lrank <- 2 coi <- getCOI(dsmp, lrank = lrank) ## ----------------------------------------------------------------------------- afreq <- calcAfreq(dsmp, coi, tol = 1e-5) str(afreq, list.len = 2) ## ----------------------------------------------------------------------------- afile <- system.file("extdata", "MozAfreq.csv", package = "dcifer") afreq2 <- readAfreq(afile, lvar = "locus", avar = "allele", fvar = "freq") ## ---- eval = FALSE------------------------------------------------------------ # # alternatively, if allele frequencies are provided in an R data frame (aflong): # afreq2 <- formatAfreq(aflong, lvar = "locus", avar = "allele", fvar = "freq") ## ----------------------------------------------------------------------------- dsmp2 <- matchAfreq(dsmp, afreq2) ## ----------------------------------------------------------------------------- provinces <- c("Maputo", "Inhambane") nsite <- table(meta$province)[provinces] ord <- order(factor(meta$province, levels = provinces)) dsmp <- dsmp[ord] coi <- coi[ ord] ## ---- eval = FALSE------------------------------------------------------------ # dres <- ibdDat(dsmp, coi, afreq, pval = TRUE, confint = TRUE, rnull = 0, # alpha = 0.05, nr = 1e3) ## ----------------------------------------------------------------------------- dres[9, 5, ] ## ---- fig.width = 6, fig.height = 6------------------------------------------- par(mar = c(3, 3, 1, 1)) nsmp <- length(dsmp) atsep <- cumsum(nsite)[-length(nsite)] # create symmetric matrix dmat <- dres[, , "estimate"] dmat[upper.tri(dmat)] <- t(dmat)[upper.tri(t(dmat))] # determine significant, indices for upper triangle alpha <- 0.05 # significance level isig <- which(dres[, , "p_value"] <= alpha, arr.ind = TRUE) col_id <- rep(c("plum4", "lightblue4"), nsite) plotRel(dmat, isig = isig[, 2:1], draw_diag = TRUE, alpha = alpha, idlab = TRUE, col_id = col_id) abline(v = atsep, h = atsep, col = "gray45", lty = 5) ## ---- fig.height = 3.6-------------------------------------------------------- par(mfrow = c(1, 2), mar = c(1, 0, 1, 0.2)) plotRel(dres, draw_diag = TRUE, alpha = alpha) mtext("p-values", 3, 0.2) # p-values for upper triangle pmat <- matrix(NA, length(dsmp), length(dsmp)) pmat[upper.tri(pmat)] <- t(log(dres[, , "p_value"]))[upper.tri(pmat)] pmat[pmat == -Inf] <- min(pmat[is.finite(pmat)]) plotRel(pmat, rlim = NULL, draw_diag = TRUE, col = hcl.colors(101, "Red-Purple"), sig = FALSE, add = TRUE, col_diag = "white", border_diag = "gray45") abline(v = atsep, h = atsep, col = "gray45", lty = 5) # number of non-missing loci for upper triangle dmiss <- lapply(dsmp, function(lst) sapply(lst, function(v) all(!v))) nmat <- matrix(NA, nsmp, nsmp) for (jsmp in 2:nsmp) { for (ismp in 1:(jsmp - 1)) { nmat[ismp, jsmp] <- sum(!dmiss[[ismp]] & !dmiss[[jsmp]]) } } nrng <- range(nmat, na.rm = TRUE) par(mar = c(1, 0.2, 1, 0)) plotRel(dres, draw_diag = TRUE, alpha = alpha) mtext("number of loci", 3, 0.2) coln <- hcl.colors(diff(nrng)*2.4, "Purple-Blue", rev = TRUE)[1:(diff(nrng) + 1)] plotRel(nmat, rlim = NA, col = coln, add = TRUE, draw_diag = TRUE, col_diag = "gray45", border_diag = "white") par(pardef) ## ---- fit.width = 7, fig.height = 5.8----------------------------------------- layout(matrix(1:2, 1), width = c(7, 1)) par(mar = c(1, 1, 2, 1)) plotRel(dmat, draw_diag = TRUE, isig = rbind(isig, isig[, 2:1])) atclin <- cumsum(nsite) - nsite/2 abline(v = atsep, h = atsep, col = "gray45", lty = 5) mtext(provinces, side = 3, at = atclin, line = 0.2) mtext(provinces, side = 2, at = atclin, line = 0.2) par(mar = c(1, 0, 2, 3)) plotColorbar() par(pardef) ## ---- fig.width = 6.2, fig.height = 6----------------------------------------- # horizontal colorbar par(mar = c(1, 1, 1, 3)) border_sig <- "darkviolet" plotRel(dres, draw_diag = TRUE, border_diag = border_sig, alpha = alpha, border_sig = border_sig, lwd_sig = 2) legend(32, 20, pch = 0, col = border_sig, pt.lwd = 2, pt.cex = 1.4, box.col = "gray", legend = expression("significant at" ~~ alpha == 0.05)) text(1:nsmp + 0.3, 1:nsmp - 0.5, labels = names(dsmp), col = col_id, adj = 0, cex = 0.6, xpd = TRUE) par(fig = c(0.25, 1, 0.81, 0.92), mar = c(1, 1, 1, 1), new = TRUE) plotColorbar(at = c(0.2, 0.4, 0.6, 0.8), horiz = TRUE) ncol <- 301 lines(c(0, ncol, ncol, 0, 0), c(0, 0, 1, 1, 0), col = "gray") par(pardef) ## ---- eval = FALSE------------------------------------------------------------ # # First, create a grid of r values to evaluate over # revals <- mapply(generateReval, 1:5, nr = c(1e3, 1e2, 32, 16, 12)) ## ----------------------------------------------------------------------------- sig1 <- sig2 <- vector("list", nrow(isig)) for (i in 1:nrow(isig)) { sig1[[i]] <- ibdEstM(dsmp[isig[i, ]], coi[isig[i, ]], afreq, Mmax = 5, equalr = FALSE, reval = revals) } for (i in 1:nrow(isig)) { sig2[[i]] <- ibdEstM(dsmp[isig[i, ]], coi[isig[i, ]], afreq, equalr = TRUE) } M1 <- sapply(sig1, function(r) sum(r > 0)) M2 <- sapply(sig2, length) rtotal1 <- sapply(sig1, sum) rtotal2 <- sapply(sig2, sum) cor(M1, M2) cor(rtotal1, rtotal2) ## ----------------------------------------------------------------------------- samples <- names(dsmp) sig <- as.data.frame(isig, row.names = FALSE) sig[c("id1", "id2")] <- list(samples[isig[, 1]], samples[isig[, 2]]) sig[c("M1", "M2")] <- list(M1, M2) sig[c("rtotal1", "rtotal2")] <- list(round(rtotal1, 3), round(rtotal2, 3)) head(sig) ## ----------------------------------------------------------------------------- i <- 18 pair <- dsmp[isig[i, ]] coii <- coi[isig[i, ]] res1 <- ibdPair(pair, coii, afreq, M = M1[i], pval = TRUE, equalr = FALSE, reval = revals[[M1[i]]]) res2 <- ibdPair(pair, coii, afreq, M = M2[i], pval = TRUE, equalr = TRUE, confreg = TRUE, llik = TRUE, reval = revals[[1]]) res1$rhat # estimate with equalr = FALSE rep(res2$rhat, M2[i]) # estimate with equalr = TRUE ## ---- fig.width = 6, fig.height = 4.5----------------------------------------- CI <- range(res2$confreg) llikCI <- max(res2$llik) - qchisq(1 - alpha, df = 1)/2 llrng <- range(res2$llik[is.finite(res2$llik)]) yCI <- llrng + diff(llrng)*0.25 yLR <- (res2$llik[1] + max(res2$llik))/2 cols <- c("purple", "cadetblue3", "gray60") par(mar = c(3, 2.5, 2, 0.1), mgp = c(1.5, 0.3, 0)) plot(revals[[1]], res2$llik, type = "l", xlab = "r", ylab = "log-likelihood", yaxt = "n") abline(v = res2$rhat, lty = 1, col = cols[1]) abline(h = c(max(res2$llik), llikCI, res2$llik[[1]]), lty = 2, col = cols[2]) abline(v = CI, col = cols[1], lty = 5) arrows(CI[1], yCI, CI[2], yCI, angle = 20, length = 0.1, code = 3, col = cols[3]) arrows(0.15, res2$llik[1], 0.15, max(res2$llik), angle = 20, length = 0.1, code = 3, col = cols[3]) text(0.165, yLR, adj = 0, "0.5 LR statistic", col = cols[3]) text(mean(CI), yCI + 0.05*diff(llrng), "confidence interval", col = cols[3]) text(res2$rhat - 0.025, yLR, "MLE", col = cols[3], srt = 90) par(pardef)