## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.height = 5, fig.width = 7, warning = FALSE, message = FALSE ) ## ----------------------------------------------------------------------------- library(aorsf) library(ggplot2) ## ----------------------------------------------------------------------------- set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ] fit_clsf <- orsf(data = penguins_orsf_train, formula = species ~ .) ## ----------------------------------------------------------------------------- pred_spec <- list(flipper_length_mm = c(190, 210)) pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec) pd_oob ## ----------------------------------------------------------------------------- sum(pd_oob[flipper_length_mm == 190, mean]) ## ----------------------------------------------------------------------------- sum(pd_oob[flipper_length_mm == 190, medn]) ## ----------------------------------------------------------------------------- set.seed(329) index_train <- sample(nrow(penguins_orsf), 150) penguins_orsf_train <- penguins_orsf[index_train, ] penguins_orsf_test <- penguins_orsf[-index_train, ] fit_regr <- orsf(data = penguins_orsf_train, formula = bill_length_mm ~ .) ## ----------------------------------------------------------------------------- pred_spec <- list(flipper_length_mm = c(190, 210)) pd_new <- orsf_pd_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) pd_new ## ----------------------------------------------------------------------------- pred_spec = pred_spec_auto(species, island, body_mass_g) pd_new <- orsf_pd_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) pd_new ## ----------------------------------------------------------------------------- pd_new <- orsf_pd_new(fit_regr, expand_grid = FALSE, pred_spec = pred_spec, new_data = penguins_orsf_test) pd_new ## ----------------------------------------------------------------------------- custom_pred_spec <- data.frame(species = 'Adelie', island = 'Biscoe') pd_new <- orsf_pd_new(fit_regr, pred_spec = custom_pred_spec, new_data = penguins_orsf_test) pd_new ## ----------------------------------------------------------------------------- set.seed(329) index_train <- sample(nrow(pbc_orsf), 150) pbc_orsf_train <- pbc_orsf[index_train, ] pbc_orsf_test <- pbc_orsf[-index_train, ] fit_surv <- orsf(data = pbc_orsf_train, formula = Surv(time, status) ~ . - id, oobag_pred_horizon = 365.25 * 5) ## ----------------------------------------------------------------------------- pd_train <- orsf_pd_inb(fit_surv, pred_spec = list(bili = 1:5)) pd_train ## ----------------------------------------------------------------------------- pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili)) pd_train ## ----------------------------------------------------------------------------- pd_train <- orsf_pd_inb(fit_surv, pred_spec_auto(bili), pred_horizon = seq(500, 3000, by = 500)) pd_train ## ----------------------------------------------------------------------------- # a rare case of modify_in_place = TRUE orsf_update(fit_surv, data = pbc_orsf, modify_in_place = TRUE) fit_surv ## ----------------------------------------------------------------------------- pd_sex_tv <- orsf_pd_oob(fit_surv, pred_spec = pred_spec_auto(sex), pred_horizon = seq(365, 365*5)) ggplot(pd_sex_tv) + aes(x = pred_horizon, y = mean, color = sex) + geom_line() + labs(x = 'Time since baseline', y = 'Expected risk') ## ----------------------------------------------------------------------------- library(data.table) ratio_tv <- pd_sex_tv[ , .(ratio = mean[sex == 'm'] / mean[sex == 'f']), by = pred_horizon ] ggplot(ratio_tv, aes(x = pred_horizon, y = ratio)) + geom_line(color = 'grey') + geom_smooth(color = 'black', se = FALSE) + labs(x = 'time since baseline', y = 'ratio in expected risk for males versus females') ## ----------------------------------------------------------------------------- pd_smry <- orsf_summarize_uni(fit_surv, n_variables = 4) pd_smry ## ----------------------------------------------------------------------------- head(as.data.table(pd_smry)) ## ----------------------------------------------------------------------------- pred_spec = pred_spec_auto(bili, edema, trt) pd_bili_edema <- orsf_pd_oob(fit_surv, pred_spec) ggplot(pd_bili_edema) + aes(x = bili, y = medn, col = trt, linetype = edema) + geom_line() + labs(y = 'Expected predicted risk') ## ----------------------------------------------------------------------------- library(survival) pbc_orsf$edema_05 <- ifelse(pbc_orsf$edema == '0.5', 'yes', 'no') fit_cph <- coxph(Surv(time,status) ~ edema_05 * bili, data = pbc_orsf) anova(fit_cph) ## ----echo = FALSE------------------------------------------------------------- # drop so it won't interfere with downstream code pbc_orsf$edema_05 <- NULL ## ----------------------------------------------------------------------------- # use just the continuous variables preds <- names(fit_surv$get_means()) vint_scores <- orsf_vint(fit_surv, predictors = preds) vint_scores ## ----------------------------------------------------------------------------- # top scoring interaction pd_top <- vint_scores$pd_values[[1]] # center pd values so it's easier to see the interaction effect pd_top[, mean := mean - mean[1], by = var_2_value] ggplot(pd_top) + aes(x = var_1_value, y = mean, color = factor(var_2_value), group = factor(var_2_value)) + geom_line() + labs(x = "albumin", y = "predicted mortality (centered)", color = "protime") ## ----------------------------------------------------------------------------- # test the top score (expect strong interaction) fit_cph <- coxph(Surv(time,status) ~ albumin * protime, data = pbc_orsf) anova(fit_cph) ## ----------------------------------------------------------------------------- pred_spec <- list(flipper_length_mm = c(190, 210)) ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec) ice_oob ## ----eval=FALSE--------------------------------------------------------------- # # ice_oob %>% # .[flipper_length_mm == 190] %>% # .[id_row == 1] %>% # .[['pred']] %>% # sum() # ## ----echo=FALSE--------------------------------------------------------------- 1 ## ----------------------------------------------------------------------------- pred_spec <- list(flipper_length_mm = c(190, 210)) ice_new <- orsf_ice_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) ice_new ## ----------------------------------------------------------------------------- pred_spec = pred_spec_auto(species, island, body_mass_g) ice_new <- orsf_ice_new(fit_regr, pred_spec = pred_spec, new_data = penguins_orsf_test) ice_new ## ----------------------------------------------------------------------------- ice_new <- orsf_ice_new(fit_regr, expand_grid = FALSE, pred_spec = pred_spec, new_data = penguins_orsf_test) ice_new ## ----------------------------------------------------------------------------- custom_pred_spec <- data.frame(species = 'Adelie', island = 'Biscoe') ice_new <- orsf_ice_new(fit_regr, pred_spec = custom_pred_spec, new_data = penguins_orsf_test) ice_new ## ----------------------------------------------------------------------------- ice_train <- orsf_ice_inb(fit_surv, pred_spec = list(bili = 1:5)) ice_train ## ----------------------------------------------------------------------------- ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili)) ice_train ## ----------------------------------------------------------------------------- ice_train <- orsf_ice_inb(fit_surv, pred_spec_auto(bili), pred_horizon = seq(500, 3000, by = 500)) ice_train ## ----------------------------------------------------------------------------- pred_spec <- list(bili = seq(1, 10, length.out = 25)) ice_oob <- orsf_ice_oob(fit_surv, pred_spec, boundary_checks = FALSE) ice_oob ## ----------------------------------------------------------------------------- ice_oob[, pred_subtract := rep(pred[id_variable==1], times=25)] ice_oob[, pred := pred - pred_subtract] ## -----orsf_ice---------------------------------------------------------------- ggplot(ice_oob, aes(x = bili, y = pred, group = id_row)) + geom_line(alpha = 0.15) + labs(y = 'Change in predicted risk') + geom_smooth(se = FALSE, aes(group = 1))