## ----include=FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(gsDesignNB) library(gsDesign) library(data.table) library(ggplot2) library(gt) ## ----params------------------------------------------------------------------- # Parameters n_total <- 200 enroll_duration <- 12 # months max_followup <- 12 # months (using months as time unit for clarity) # Convert to years if rates are annual, but let's stick to consistent units. # Let's say rates are per YEAR, so we convert time to years. # Time unit: Year n_total <- 200 enroll_duration <- 1 # 1 year max_followup <- 1 # 1 year enroll_rate <- data.frame( rate = n_total / enroll_duration, duration = enroll_duration ) fail_rate <- data.frame( treatment = c("Control", "Experimental"), rate = c(0.5, 0.35), # Events per year dispersion = c(0.5, 0.5) # Negative binomial dispersion ) dropout_rate <- data.frame( treatment = c("Control", "Experimental"), rate = c(0.05, 0.05), duration = c(100, 100) ) ## ----simulation--------------------------------------------------------------- set.seed(2024) n_sims <- 50 results <- data.frame( sim_id = integer(n_sims), interim_date = numeric(n_sims), interim_z = numeric(n_sims), interim_n = integer(n_sims), interim_info = numeric(n_sims), final_date = numeric(n_sims), final_z = numeric(n_sims), final_n = integer(n_sims), final_info = numeric(n_sims), info_frac = numeric(n_sims) ) # Target completers for interim (40%) target_completers <- 0.4 * n_total for (i in 1:n_sims) { # 1. Simulate Trial Data sim_data <- nb_sim( enroll_rate = enroll_rate, fail_rate = fail_rate, dropout_rate = dropout_rate, max_followup = max_followup, n = n_total ) # 2. Interim Analysis # Find date when target_completers is reached interim_date <- cut_date_for_completers(sim_data, target_completers) # Cut data for completers at this date data_interim <- cut_completers(sim_data, interim_date) # Analyze (Mütze Test) res_interim <- mutze_test(data_interim) # Extract Z-statistic z_interim <- res_interim$z # Extract Information info_interim <- 1 / res_interim$se^2 # 3. Final Analysis (All Data) # Date is when last patient completes (or max follow-up reached) # For final analysis, we use all data collected up to the end of the study. # The end of the study is when the last patient reaches max_followup. date_final <- max(sim_data$calendar_time) # Cut data at final date (includes partial follow-up for dropouts, full for completers) data_final <- cut_data_by_date(sim_data, date_final) res_final <- mutze_test(data_final) z_final <- res_final$z # Extract Information info_final <- 1 / res_final$se^2 # Store results results$sim_id[i] <- i results$interim_date[i] <- interim_date results$interim_z[i] <- z_interim results$interim_n[i] <- nrow(data_interim) results$interim_info[i] <- info_interim results$final_date[i] <- date_final results$final_z[i] <- z_final results$final_n[i] <- nrow(data_final) results$final_info[i] <- info_final results$info_frac[i] <- info_interim / info_final } # Compute asymptotic information # Interim info_asymp_interim <- compute_info_at_time( analysis_time = mean(results$interim_date), accrual_rate = n_total / enroll_duration, accrual_duration = enroll_duration, lambda1 = fail_rate$rate[1], lambda2 = fail_rate$rate[2], dispersion = fail_rate$dispersion[1], ratio = 1, dropout_rate = dropout_rate$rate[1] # Assuming equal dropout ) # Final info_asymp_final <- compute_info_at_time( analysis_time = mean(results$final_date), accrual_rate = n_total / enroll_duration, accrual_duration = enroll_duration, lambda1 = fail_rate$rate[1], lambda2 = fail_rate$rate[2], dispersion = fail_rate$dispersion[1], ratio = 1, dropout_rate = dropout_rate$rate[1] ) message("Asymptotic Information (Interim): ", round(info_asymp_interim, 2)) message("Mean Simulated Information (Interim): ", round(mean(results$interim_info), 2)) message("Asymptotic Information (Final): ", round(info_asymp_final, 2)) message("Mean Simulated Information (Final): ", round(mean(results$final_info), 2)) ## ----summary------------------------------------------------------------------ summary(results[, c("interim_date", "interim_z", "interim_info", "final_date", "final_z", "final_info", "info_frac")]) ## ----plot, fig.width=6, fig.height=6------------------------------------------ # Correlation between interim and final Z-scores cor_z <- cor(results$interim_z, results$final_z) message("Correlation between interim and final Z-scores: ", round(cor_z, 3)) ggplot(results, aes(x = interim_z, y = final_z)) + geom_point(alpha = 0.7) + geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray") + labs( title = paste0("Z-Scores: Interim vs Final Analysis (Cor = ", round(cor_z, 3), ")"), x = "Interim Z-Score", y = "Final Z-Score (Full Data)" ) + theme_minimal() ## ----evaluation--------------------------------------------------------------- # Define design parameters alpha <- 0.025 k <- 2 sfu <- sfLDOF # Lan-DeMets O'Brien-Fleming approximation # Calculate bounds for each simulation based on observed information fraction # Note: In practice, bounds are often fixed or recalculated. Here we check rejection rates. # We'll use the mean information fraction to set a single boundary for simplicity in this summary, # or we could check each trial individually. Let's check individually. reject <- logical(n_sims) for (i in 1:n_sims) { # Compute boundary for interim # We need to spend alpha based on info_frac[i] # Using gsDesign to get the boundary # We want to spend alpha(t) = alpha * sf(t) # But standard group sequential design defines boundaries. # Let's use a simple error spending approach: # Spend alpha_1 at interim based on info_frac[i] # Spend remaining alpha at final (total alpha = 0.025) # Interim spending spend_interim <- sfu(alpha, t = results$info_frac[i])$spend # Interim bound (Z-scale) # P(Z > b1) = spend_interim b1 <- qnorm(1 - spend_interim) # Check interim rejection if (results$interim_z[i] > b1) { reject[i] <- TRUE } else { # Final analysis # We need to find b2 such that P(Z1 < b1, Z2 > b2) = alpha - spend_interim # This requires integration over the joint distribution. # For simplicity in this vignette, we can use the asymptotic correlation # Cor(Z1, Z2) = sqrt(info_frac) # Using gsDesign to compute the exact boundary given the fraction # We create a design with this fraction gs_des <- gsDesign(k = 2, test.type = 1, alpha = alpha, sfu = sfu, timing = results$info_frac[i]) b2 <- gs_des$upper$bound[2] if (results$final_z[i] > b2) { reject[i] <- TRUE } } } message("Power (Empirical Rejection Rate): ", mean(reject))