## ----setup, include=FALSE----------------------------------------------------- # del /s /a:h desktop.ini vignette_input <- knitr::current_input(dir = TRUE) vignette_dir <- if (is.null(vignette_input)) getwd() else dirname(vignette_input) fig_dir <- tempdir() knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = file.path(fig_dir, "getting-started-") ) library(cosmic) library(knitr) library(kableExtra) library(ggplot2) library(MASS) ## ----load-seattle------------------------------------------------------------- path <- system.file("extdata", "dataSPD.RData", package = "cosmic") if (!nzchar(path)) { path_candidates <- c( file.path(vignette_dir, "..", "inst", "extdata", "dataSPD.RData"), file.path("..", "inst", "extdata", "dataSPD.RData"), file.path("inst", "extdata", "dataSPD.RData") ) path <- path_candidates[file.exists(path_candidates)][1] } if (is.na(path) || !nzchar(path)) { stop("Could not find 'dataSPD.RData' in the installed package or source tree.") } load(path) ## ----seattle-overview--------------------------------------------------------- seattle_overview <- data.frame( quantity = c("Officer-incident rows", "Incidents", "Officers", "Force levels"), value = c(nrow(d), length(unique(d$id)), length(unique(d$idOff)), length(unique(d$y)))) seattle_overview ## ----force-distribution------------------------------------------------------- force_distribution <- data.frame( force_level = as.integer(names(table(d$y))), count = as.integer(table(d$y))) force_distribution ## ----incident-size------------------------------------------------------------ officers_per_incident <- table(table(d$id)) incident_size_distribution <- data.frame( officers_in_incident = as.integer(names(officers_per_incident)), number_of_incidents = as.integer(officers_per_incident) ) incident_size_distribution ## ----install-cmdstanr, eval = FALSE------------------------------------------- # install.packages("cmdstanr", # repos = c("https://stan-dev.r-universe.dev", # getOption("repos"))) # cmdstanr::install_cmdstan() ## ----fit-seattle, eval = FALSE------------------------------------------------ # # 5-6 hours? # fit <- cosmic(d, # incidentID = id, # officerID = idOff, # y = y, # iter = 10000, # chains = 4, # cores = 1, # threads = 8) ## ----loadPrecompute, echo=FALSE, results='hide'------------------------------- if(FALSE) { SPDdiag <- diagnose(fit) future::plan(future::multisession, workers = 4) flag_officer_tail_pct <- 0.05 off_summary <- officer_summary(fit, pct_threshold = 0.25, pct_tail = flag_officer_tail_pct) SPDoff_summary <- off_summary SPDdraws <- posterior(fit, pars="sDelta") save(SPDdiag, SPDoff_summary, SPDdraws, file="vignettes/SPDprecompute.RData") } # load pre-computed results fit_path_candidates <- c(file.path(vignette_dir, "SPDprecompute.RData"), "SPDprecompute.RData", file.path("vignettes", "SPDprecompute.RData")) fit_path <- fit_path_candidates[file.exists(fit_path_candidates)][1] if (is.na(fit_path)) { stop("Could not find 'SPDprecompute.RData' next to the vignette source.") } load(fit_path) ## ----diagnose-fit, eval = FALSE----------------------------------------------- # diagnose(fit) ## ----echo = FALSE------------------------------------------------------------- # taken from pre-computed results print(SPDdiag) ## ----officer-summary, eval = FALSE-------------------------------------------- # flag_officer_tail_pct <- 0.05 # off_summary <- officer_summary(fit, # pct_threshold = 0.25, # pct_tail = flag_officer_tail_pct) # # head(off_summary) ## ----echo = FALSE------------------------------------------------------------- # taken from pre-computed results flag_officer_tail_pct <- 0.05 head(SPDoff_summary) ## ----outlier-report, eval = FALSE--------------------------------------------- # outliers <- outlier_report(off_summary, prob_outlier = 0.9) # head(outliers, 10) ## ----echo = FALSE------------------------------------------------------------- # taken from pre-computed results outliers <- outlier_report(SPDoff_summary, prob_outlier = 0.9) head(outliers, 10) ## ----------------------------------------------------------------------------- outliers_display <- outliers prob_cols <- names(outliers_display) %in% c("pRankToppct", "pRankBotpct") tail_pct_label <- formatC(100 * flag_officer_tail_pct, format = "fg", digits = 3) names(outliers_display)[names(outliers_display) == "pRankToppct"] <- paste0("Prob. top ", tail_pct_label, "%") names(outliers_display)[names(outliers_display) == "pRankBotpct"] <- paste0("Prob. bottom ", tail_pct_label, "%") outliers_display[prob_cols] <- lapply(outliers_display[prob_cols], function(x) sprintf("%.2f", x)) digits <- rep(0, ncol(outliers_display)) digits[seq.int(length(digits)-4,length(digits))] <- 2L knitr::kable( outliers_display, format = "html", digits = digits, caption = "Officers with strong posterior evidence of being in the tails of the peer distribution", align = rep("r", ncol(outliers_display))) |> kableExtra::kable_styling( bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, position = "center") |> kableExtra::row_spec(0, bold = TRUE, color = "white", background = "#2C3E50") |> kableExtra::column_spec(1, bold = TRUE) |> kableExtra::scroll_box(width = "100%", height = "320px") ## ----posterior-draws, eval = FALSE-------------------------------------------- # sDelta_draws <- posterior(fit, pars = "sDelta")$sDelta ## ----posterior-draws-precompute, echo = FALSE--------------------------------- sDelta_draws <- SPDdraws$sDelta ## ----------------------------------------------------------------------------- score_summary <- data.frame( parameter = c("s[2]", "s[3]"), mean = c( mean(1 + sDelta_draws[, 1]), mean(1 + sDelta_draws[, 1] + sDelta_draws[, 2]) ), lower_95 = c( unname(stats::quantile(1 + sDelta_draws[, 1], probs = 0.025)), unname(stats::quantile(1 + sDelta_draws[, 1] + sDelta_draws[, 2], probs = 0.025)) ), upper_95 = c( unname(stats::quantile(1 + sDelta_draws[, 1], probs = 0.975)), unname(stats::quantile(1 + sDelta_draws[, 1] + sDelta_draws[, 2], probs = 0.975)) ) ) ## ----posterior-draw-summary, echo = FALSE------------------------------------- knitr::kable( score_summary, digits = 3, caption = "Posterior summaries for the estimated score parameters" ) ## ----posterior-joint-plot, fig.width=10, fig.height=6.5, out.width='100%', fig.align='center'---- ggplot(data.frame(x = sDelta_draws[,1], y = sDelta_draws[,2]), aes(x=x, y=y)) + geom_point(alpha = 0.05, size = 0.5) + geom_density_2d(color = "red") + labs(x = expression(s[2] - 1), y = expression(s[3] - s[2])) + theme_minimal() ## ----posterior-s2-hist, fig.width=10, fig.height=5.5, out.width='100%', fig.align='center'---- hist(1+sDelta_draws[,1], xlab=expression(s[2]), main="") abline(v=1+mean(sDelta_draws[,1])) ## ----posterior-s3-hist, fig.width=10, fig.height=5.5, out.width='100%', fig.align='center'---- hist(1+sDelta_draws[,1]+sDelta_draws[,2], xlab=expression(s[3]), main="") abline(v=1+mean(sDelta_draws[,1])+mean(sDelta_draws[,2]))