## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(data.table) library(dplyr) library(future) library(ggplot2) library(whatifbandit) ## ----data--------------------------------------------------------------------- data(tanf) glimpse(tanf) ## ----singlesimnoeval, eval=FALSE---------------------------------------------- # sim <- single_mab_simulation( # data = tanf, # assignment_method = "Batch", # period_length = 1000, # algorithm = "UCB1", # whole_experiment = TRUE, perfect_assignment = TRUE, # prior_periods = "All", # blocking = FALSE, # data_cols = c( # id_col = "ic_case_id", # success_col = "success", # condition_col = "condition" # ) # ) ## ----singlesimeval------------------------------------------------------------ sim <- single_mab_simulation( data = tanf, assignment_method = "Date", time_unit = "Month", period_length = 1, algorithm = "Thompson", whole_experiment = FALSE, perfect_assignment = TRUE, prior_periods = "All", blocking = TRUE, block_cols = c("service_center"), data_cols = c( id_col = "ic_case_id", date_col = "appt_date", success_col = "success", condition_col = "condition", month_col = "recert_month", success_date_col = "date_of_recert", assignment_date_col = "letter_sent_date" ) ) ## ----singlesimcontrolaug, eval=FALSE------------------------------------------ # tanf <- arrange(tanf, appt_date) # conditions <- setNames(levels(tanf$condition), c("Control", "T1", "T2")) # # sim <- single_mab_simulation( # data = tanf, # assignment_method = "Individual", # algorithm = "Thompson", # whole_experiment = FALSE, perfect_assignment = TRUE, # prior_periods = 500, # blocking = TRUE, block_cols = c("service_center"), # data_cols = c( # id_col = "ic_case_id", # success_col = "success", # condition_col = "condition" # ), # control_augment = 0.2, # control_condition = "no_letter" # ) ## ----------------------------------------------------------------------------- class(sim) sim ## ----------------------------------------------------------------------------- sim_summary <- summary(sim) print(sim_summary, width = Inf) ## ----------------------------------------------------------------------------- # Inside Summary Call summary(sim, level = 0.8) |> select(estimated_probability_of_success, SE, lower_bound, upper_bound, level) |> print(width = Inf) # By hand quantile <- qnorm(0.2 / 2, lower.tail = FALSE) sim_summary |> mutate( lower_bound = estimated_probability_of_success - SE * quantile, upper_bound = estimated_probability_of_success + SE * quantile ) |> select(estimated_probability_of_success, SE, lower_bound, upper_bound) ## ----------------------------------------------------------------------------- plot(sim, type = "arm") ## ----------------------------------------------------------------------------- plot(sim, type = "assign") ## ----------------------------------------------------------------------------- plot(sim, type = "estimate", level = 0.9, height = 0.4) + scale_x_continuous(breaks = seq(0, 1, .1), limits = range(0, 1)) ## ----------------------------------------------------------------------------- set.seed(532454) seeds <- sample.int(1000000, 100, replace = FALSE) time <- system.time( multiple_sims <- multiple_mab_simulation( data = tanf, assignment_method = "Date", time_unit = "Month", period_length = 1, algorithm = "Thompson", whole_experiment = FALSE, perfect_assignment = TRUE, prior_periods = "All", blocking = TRUE, block_cols = c("service_center"), data_cols = c( id_col = "ic_case_id", date_col = "appt_date", success_col = "success", condition_col = "condition", month_col = "recert_month", success_date_col = "date_of_recert", assignment_date_col = "letter_sent_date" ), keep_data = TRUE, times = 100, seeds = seeds ) ) ## ----------------------------------------------------------------------------- get_size <- function(x, unit) { string_format <- object.size(x) |> format(units = unit) num <- regmatches(string_format, regexpr("[0-9]+", string_format)) |> as.numeric() return(num) } full_size <- get_size(multiple_sims, unit = "MiB") full_size_kib <- get_size(multiple_sims, unit = "KiB") multiple_sims$final_data_nest <- NULL reduced_size <- get_size(multiple_sims, unit = "KiB") ## ----------------------------------------------------------------------------- load("parallel.RData") ## ----------------------------------------------------------------------------- # set.seed(532454) # seeds <- sample.int(1000000, 100, replace = FALSE) # plan("multisession", workers = 4) # parallel_time <- system.time( # multiple_sims <- multiple_mab_simulation( # data = tanf, # assignment_method = "Date", # time_unit = "Month", # period_length = 1, # algorithm = "Thompson", # whole_experiment = FALSE, perfect_assignment = TRUE, # prior_periods = "All", # blocking = TRUE, block_cols = c("service_center"), # data_cols = c( # id_col = "ic_case_id", # date_col = "appt_date", # success_col = "success", # condition_col = "condition", # month_col = "recert_month", # success_date_col = "date_of_recert", # assignment_date_col = "letter_sent_date" # ), # keep_data = FALSE, times = 100, seeds = seeds # ) # ) # plan("sequential") ## ----------------------------------------------------------------------------- class(multiple_sims) print(multiple_sims) ## ----------------------------------------------------------------------------- summary(multiple_sims) |> print(width = Inf) ## ----------------------------------------------------------------------------- summary(multiple_sims, level = 0.75) |> print(width = Inf) ## ----------------------------------------------------------------------------- plot(multiple_sims, type = "summary") ## ----------------------------------------------------------------------------- plot(multiple_sims, type = "hist", quantity = "estimate", geom = list(bins = 50)) plot(multiple_sims, type = "hist", quantity = "assignment", facet = list(switch = "x")) ## ----------------------------------------------------------------------------- plot(multiple_sims, type = "estimate", cdf = "empirical", level = 0.99, height = 0.4 ) + scale_x_continuous(breaks = seq(0, 1, .1), limits = range(0, 1)) ## ----------------------------------------------------------------------------- load("datatable.RData") ## ----------------------------------------------------------------------------- # # Prepare the dataset: # tanf_large <- tanf # setDT(tanf_large) # for (i in 1:9) { # tanf_large <- rbindlist(list(tanf_large, tanf_large)) # } # # setorder(tanf_large, appt_date) # # # Set id to be the row number for uniqueness: # tanf_large[, id := .I] ## ----------------------------------------------------------------------------- # set.seed(523432453) # dataframe_time <- system.time(single_mab_simulation( # data = as.data.frame(tanf_large), # assignment_method = "Batch", # period_length = 3000, # algorithm = "Thomspon", # whole_experiment = FALSE, perfect_assignment = TRUE, # prior_periods = "All", # blocking = FALSE, # data_cols = c( # id_col = "id", # success_col = "success", # condition_col = "condition" # ), # ndraws = 5000 # )) # # datatable_time <- system.time(single_mab_simulation( # data = tanf_large, # assignment_method = "Batch", # period_length = 3000, # algorithm = "UCB1", # whole_experiment = FALSE, perfect_assignment = TRUE, # prior_periods = "All", # blocking = FALSE, # data_cols = c( # id_col = "id", # success_col = "success", # condition_col = "condition" # ), # ndraws = 5000 # ))