Type: Package
Title: Regression Calibration Using Reliability Studies
Version: 0.2.0
Maintainer: Bowen Liu <bowenliu@hsph.harvard.edu>
Description: Implements regression calibration methods for correcting measurement error in regression models using external or internal reliability studies. Methods are described in Carroll, Ruppert, Stefanski, and Crainiceanu (2006) "Measurement Error in Nonlinear Models: A Modern Perspective" <doi:10.1201/9781420010138>.
License: MIT + file LICENSE
Encoding: UTF-8
Imports: stats, sandwich
Suggests: mgcv, knitr, rmarkdown
VignetteBuilder: knitr
RoxygenNote: 7.3.2
URL: https://lbw080526.github.io/RegCalReliab/, https://github.com/lbw080526/RegCalReliab
BugReports: https://github.com/lbw080526/RegCalReliab/issues
NeedsCompilation: no
Packaged: 2025-09-29 20:44:26 UTC; liubowen
Author: Bowen Liu [aut, cre, cph], Yu Lu [aut], Molin Wang [aut]
Repository: CRAN
Date/Publication: 2025-10-06 08:00:19 UTC

Unified Regression Calibration Wrapper (External Reliability Study)

Description

A single formula interface for regression calibration in external reliability studies. The user simply specifies 'link = "linear"', '"logistic"', or '"log"', and the wrapper selects the appropriate model: * '"linear"' → Gaussian (identity link) * '"logistic"' → Binomial (logit link) * '"log"' → Poisson (log link)

Usage

RC_ExReliab(
  formula,
  main_data,
  rep_data,
  link = c("linear", "logistic", "log"),
  return_details = FALSE
)

Arguments

formula

A formula or character string such as 'Y ~ sbp(sbp2, sbp3) + chol(chol2, chol3) + age + weight'. Terms of the form 'var(rep1, rep2, ...)' are treated as error-prone exposures with replicates in 'rep_data'; other terms are treated as covariates W.

main_data

Data frame holding the outcome, error-prone exposures, and covariates.

rep_data

Data frame holding replicate columns referenced in 'formula'.

link

Character; one of '"linear"', '"logistic"', or '"log"'.

return_details

Logical; if 'TRUE', return parsed, prepared, and RC internals.

Value

A list with: * 'uncorrected': naive regression estimates * 'corrected' : sandwich-corrected regression calibration estimates * optional 'details' if 'return_details = TRUE'

Examples

library(mgcv)
set.seed(123)
add_err <- function(v, sd = sqrt(0.4)) v + rnorm(length(v), 0, sd)

## --- Example 1: External 1Z 0W ---
x <- rnorm(3000)
z.main <- x[1:1500] + rnorm(1500, 0, sqrt(0.4))
z_rep <- rbind(
  cbind(add_err(x[1501:2000]), add_err(x[1501:2000]), NA, NA),
  cbind(add_err(x[2001:2400]), add_err(x[2001:2400]), add_err(x[2001:2400]), NA),
  cbind(add_err(x[2401:3000]), add_err(x[2401:3000]),
        add_err(x[2401:3000]), add_err(x[2401:3000]))
)
colnames(z_rep) <- paste0("z_", 1:4)
Y <- rbinom(1500, 1, plogis(-2.3 + log(1.5) * x[1:1500]))
main_data <- data.frame(Y = Y, z = z.main)
rep_data  <- data.frame(z_rep, check.names = FALSE)
res1 <- RC_ExReliab(Y ~ z(z_1, z_2, z_3, z_4), main_data, rep_data, link = "logistic")
res1$corrected

## --- Example 2: External 1Z 1W ---
x <- rnorm(3000)
W_main <- rnorm(1500)
W_rep  <- rnorm(1500)
z.main <- x[1:1500] + rnorm(1500, 0, sqrt(0.4))
z_rep <- rbind(
  cbind(add_err(x[1501:2000]), add_err(x[1501:2000]), NA, NA),
  cbind(add_err(x[2001:2400]), add_err(x[2001:2400]), add_err(x[2001:2400]), NA),
  cbind(add_err(x[2401:3000]), add_err(x[2401:3000]),
        add_err(x[2401:3000]), add_err(x[2401:3000]))
)
colnames(z_rep) <- paste0("z_", 1:4)
Y <- rbinom(1500, 1, plogis(-2.3 + log(1.5) * x[1:1500] + 0.5 * W_main))
main_data <- data.frame(Y = Y, z = z.main, W = W_main)
rep_data  <- data.frame(z_rep, W = W_rep, check.names = FALSE)
res2 <- RC_ExReliab(Y ~ z(z_1, z_2, z_3, z_4) + W, main_data, rep_data, link = "logistic")
res2$corrected

## --- Example 3: External 2Z 0W ---
x <- mgcv::rmvn(3000, c(0, 0), matrix(c(1, 0.3, 0.3, 1), 2))
z.main <- x[1:1500, ] + matrix(rnorm(1500 * 2, 0, sqrt(0.4)), 1500, 2)
colnames(z.main) <- c("z1", "z2")
z1_rep <- rbind(
  cbind(add_err(x[1501:2000, 1]), add_err(x[1501:2000, 1]), NA, NA),
  cbind(add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), NA),
  cbind(add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]),
        add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]))
)
colnames(z1_rep) <- paste0("z1_", 1:4)
z2_rep <- rbind(
  cbind(add_err(x[1501:2000, 2]), add_err(x[1501:2000, 2]), NA, NA),
  cbind(add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), NA),
  cbind(add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]),
        add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]))
)
colnames(z2_rep) <- paste0("z2_", 1:4)
Y <- rbinom(1500, 1, plogis(-2.3 + log(1.5) * rowSums(x[1:1500, ])))
main_data <- data.frame(Y = Y, z1 = z.main[, 1], z2 = z.main[, 2])
rep_data  <- data.frame(z1_rep, z2_rep, check.names = FALSE)
res3 <- RC_ExReliab(
  Y ~ z1(z1_1, z1_2, z1_3, z1_4) + z2(z2_1, z2_2, z2_3, z2_4),
  main_data, rep_data, link = "logistic"
)
res3$corrected


Unified Regression Calibration Wrapper (Internal Reliability Study)

Description

A single formula interface for regression calibration in internal reliability studies. The user simply specifies 'link = "linear"', '"logistic"', or '"log"', and the wrapper selects the appropriate model: * '"linear"' → Gaussian (identity link) * '"logistic"' → Binomial (logit link) * '"log"' → Poisson (log link)

Usage

RC_InReliab(
  formula,
  main_data,
  link = c("linear", "logistic", "log"),
  return_details = FALSE
)

Arguments

formula

A formula or character string such as 'Y ~ sbp(sbp2, sbp3) + chol(chol2, chol3) + age + weight'. Terms of the form 'var(rep1, rep2, ...)' are treated as error-prone exposures with replicates in 'main_data'; other terms are treated as covariates W.

main_data

Data frame holding the outcome, replicate error-prone exposures, and any covariates.

link

Character; one of '"linear"', '"logistic"', or '"log"'.

return_details

Logical; if 'TRUE', return parsed, prepared, and RC internals.

Value

A list with: * 'uncorrected': naive regression estimates * 'corrected' : sandwich-corrected regression calibration estimates * optional 'details' if 'return_details = TRUE'

Examples

set.seed(123)
add_err <- function(v, sd = sqrt(0.4)) v + rnorm(length(v), 0, sd)

## --- Example 1: Internal 1Z 0W ---
x <- rnorm(3000)
z <- rbind(
  cbind(add_err(x[1:1500]), NA, NA, NA),
  cbind(add_err(x[1501:2000]), add_err(x[1501:2000]), NA, NA),
  cbind(add_err(x[2001:2400]), add_err(x[2001:2400]), add_err(x[2001:2400]), NA),
  cbind(add_err(x[2401:3000]), add_err(x[2401:3000]),
        add_err(x[2401:3000]), add_err(x[2401:3000]))
)
colnames(z) <- paste0("z_", 1:4)
Y <- rbinom(3000, 1, plogis(-2.65 + log(1.5) * x))
main_data <- data.frame(Y, z)
res1 <- RC_InReliab(Y ~ myz(z_1, z_2, z_3, z_4),
                    main_data = main_data,
                    link = "logistic")
res1$corrected

## --- Example 2: Internal 1Z 1W ---
x  <- rnorm(3000)
W1 <- rnorm(3000)
z <- rbind(
  cbind(add_err(x[1:1500]), NA, NA, NA),
  cbind(add_err(x[1501:2000]), add_err(x[1501:2000]), NA, NA),
  cbind(add_err(x[2001:2400]), add_err(x[2001:2400]), add_err(x[2001:2400]), NA),
  cbind(add_err(x[2401:3000]), add_err(x[2401:3000]),
        add_err(x[2401:3000]), add_err(x[2401:3000]))
)
colnames(z) <- paste0("z_", 1:4)
Y <- rbinom(3000, 1, plogis(-2.65 + log(1.5) * x + 0.5 * W1))
main_data <- data.frame(Y, z, W1)
res2 <- RC_InReliab(Y ~ myz(z_1, z_2, z_3, z_4) + W1,
                    main_data = main_data,
                    link = "logistic")
res2$corrected

## --- Example 3: Internal 2Z 0W ---
x <- mgcv::rmvn(3000, c(0,0), matrix(c(1,0.3,0.3,1), 2))
z1 <- rbind(
  cbind(add_err(x[1:1500, 1]), NA, NA, NA),
  cbind(add_err(x[1501:2000, 1]), add_err(x[1501:2000, 1]), NA, NA),
  cbind(add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), NA),
  cbind(add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]),
        add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]))
)
colnames(z1) <- paste0("z1_", 1:4)
z2 <- rbind(
  cbind(add_err(x[1:1500, 2]), NA, NA, NA),
  cbind(add_err(x[1501:2000, 2]), add_err(x[1501:2000, 2]), NA, NA),
  cbind(add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), NA),
  cbind(add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]),
        add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]))
)
colnames(z2) <- paste0("z2_", 1:4)
Y <- rbinom(3000, 1, plogis(-2.65 + log(1.5) * rowSums(x)))
main_data <- data.frame(Y, z1, z2)
res3 <- RC_InReliab(
  Y ~ myz1(z1_1, z1_2, z1_3, z1_4) + myz2(z2_1, z2_2, z2_3, z2_4),
  main_data = main_data,
  link = "logistic")
res3$corrected