## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----load-packages, message=FALSE--------------------------------------------- # Libraries library(NlsyLinks) library(discord) library(utils) library(tidyverse) library(ggplot2) # Set random seed for reproducibility set.seed(1492) # Disable scientific notation for clarity options(scipen = 999) conditions <- expand.grid( total_pairs = c(100, 250, 500, 750, 1000), relatedness = c(1, .5), cov_a = c(0, 0.25), cov_c = c(0, 0.25), cov_e = c(0, 0.25), ace_a = c(1), ace_c = c(1), ace_e = c(1) ) ## ----------------------------------------------------------------------------- set.seed(1492) # Set seed for reproducibility n_trials <- 100 FAST <- FALSE # Set to FALSE for slower, more detailed analysis results_list <- list() name.results <- c("coef_xdiff", "p_xdiff", "r.squared") for (cond in seq_along(conditions)) { current <- conditions[cond, ] temp_results <- matrix(NA, nrow = n_trials, ncol = length(name.results)) colnames(temp_results) <- name.results for (i in 1:n_trials) { trial <- kinsim( r_vector = rep(current$relatedness, each = current$total_pairs), npg_all = current$total_pairs, ace_all = c(current$ace_a, current$ace_c, current$ace_e), cov_a = current$cov_a, cov_c = current$cov_c, cov_e = current$cov_e, variables = 2 ) extract <- data.frame( id = trial$id, r = trial$r, y_s1 = trial$y1_1, y_s2 = trial$y1_2, x_s1 = trial$y2_1, x_s2 = trial$y2_2 ) if (FAST == TRUE) { # faster # double enter the data extract2 <- rbind( transform(extract, y_s1 = y_s2, y_s2 = y_s1, x_s1 = x_s2, x_s2 = x_s1 ), extract ) extract2$y_diff <- extract2$y_s1 - extract2$y_s2 extract2$x_diff <- extract2$x_s1 - extract2$x_s2 extract2$x_bar <- (extract2$x_s1 + extract2$x_s2) / 2 extract2$y_bar <- (extract2$y_s1 + extract2$y_s2) / 2 # select pair with ydiff > 0 extract3 <- extract2[extract2$y_diff > 0, ] fit <- tryCatch( lm(y_diff ~ x_bar + y_bar + x_diff, data = extract3), error = function(e) { return(NULL) } ) } # slower if (FAST == FALSE) { fit <- tryCatch( discord_regression( data = extract, outcome = "y", predictors = "x", id = "id", sex = NULL, race = NULL, fast = TRUE ), error = function(e) { return(NULL) } ) } if (!is.null(fit)) { sm <- summary(fit) temp_results[i, "coef_xdiff"] <- coef(sm)["x_diff", "Estimate"] temp_results[i, "p_xdiff"] <- coef(sm)["x_diff", "Pr(>|t|)"] temp_results[i, "r.squared"] <- sm$r.squared } } results_list[[cond]] <- as.data.frame(temp_results) } ## ----summarize-power---------------------------------------------------------- power_summary <- lapply(results_list, function(res) { data.frame( power_xdiff = mean(res$p_xdiff < 0.05, na.rm = TRUE), median_r2 = median(res$r.squared, na.rm = TRUE) ) }) final_results <- cbind(conditions, do.call(rbind, power_summary)) ## ----echo=FALSE, message=FALSE------------------------------------------------ # Define custom labels for each facet variable facet_labels <- list( cov_a = c("0" = "No Cov_A", "1" = "With Cov_A"), cov_c = c("0" = "No Cov_C", "1" = "With Cov_C"), cov_e = c("0" = "No Cov_E", "1" = "With Cov_E") ) # Ensure factors with exact string levels final_results$cov_a <- factor(final_results$cov_a, levels = c(0, 0.25), labels = c( "No Covariate A", "Covariate A (0.25)" ) ) final_results$cov_c <- factor(final_results$cov_c, levels = c(0, 0.25), labels = c( "No Covariate C", "Covariate C (0.25)" ) ) final_results$cov_e <- factor(final_results$cov_e, levels = c(0, 0.25), labels = c( "No Covariate E", "Covariate E (0.25)" ) ) ## ----plot-power, echo=FALSE--------------------------------------------------- final_results_noc <- final_results[final_results$cov_c == "No Covariate C", ] p_noc <- ggplot(final_results_noc, aes( x = total_pairs, y = power_xdiff, fill = factor(relatedness), color = factor(relatedness) )) + geom_bar(stat = "identity", position = "dodge") + labs( title = "Power Analysis for Discordant Sibling Designs", x = "Total Pairs", y = "Power (p < 0.05)", color = "Relatedness", fill = "Relatedness" ) + theme_minimal() + geom_text( x = 600, y = 0.55, label = "False Positive Rate", data = data.frame(cov_a = "No Covariate A", cov_e = "No Covariate E"), fontface = "italic", size = 3.5, inherit.aes = FALSE ) + geom_text( x = 600, y = 0.85, label = "Genetic Confounding", data = data.frame(cov_a = "Covariate A (0.25)", cov_e = "No Covariate E"), fontface = "italic", size = 3.5, inherit.aes = FALSE ) + facet_grid(cov_e ~ cov_a, labeller = labeller(facet_labels)) p_noc final_results_noa <- final_results[final_results$cov_a == "No Covariate A", ] p_noa <- ggplot(final_results_noa, aes( x = total_pairs, y = power_xdiff, fill = factor(relatedness), color = factor(relatedness) )) + geom_bar(stat = "identity", position = "dodge") + labs( title = "Power Analysis for Discordant Sibling Designs", x = "Total Pairs", y = "Power (p < 0.05)", color = "Relatedness", fill = "Relatedness" ) + theme_minimal() + geom_text( x = 600, y = 0.55, label = "False Positive Rate", data = data.frame(cov_c = "No Covariate C", cov_e = "No Covariate E"), fontface = "italic", size = 3.5, inherit.aes = FALSE ) + geom_text( x = 600, y = 0.85, label = "Shared-Environmental Confounding", data = data.frame(cov_c = "Covariate C (0.25)", cov_e = "No Covariate E"), fontface = "italic", size = 3.5, inherit.aes = FALSE ) + facet_grid(cov_e ~ cov_c, labeller = labeller(facet_labels)) p_noa ## ----echo=FALSE, message=FALSE------------------------------------------------ knitr::kable(final_results)