## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(ksformat) ## ----discrete----------------------------------------------------------------- fnew( "M" = "Male", "F" = "Female", .missing = "Unknown", .other = "Other Gender", name = "sex" ) gender_codes <- c("M", "F", "M", NA, "X", "F") formatted_genders <- fput(gender_codes, "sex") data.frame( code = gender_codes, label = formatted_genders ) fprint("sex") ## ----ranges------------------------------------------------------------------- fparse(text = ' VALUE age (numeric) [0, 18) = "Child" [18, 65) = "Adult" [65, HIGH] = "Senior" .missing = "Age Unknown" ; ') ages <- c(5, 15.3, 17.9, 18, 45, 64.99, 65, 85, NA) age_groups <- fputn(ages, "age") data.frame( age = ages, group = age_groups ) ## ----bmi---------------------------------------------------------------------- fparse(text = ' VALUE bmi (numeric) [0, 18.5) = "Underweight" [18.5, 25) = "Normal" [25, 30) = "Overweight" [30, HIGH] = "Obese" .missing = "No data" ; ') bmi_values <- c(16.2, 18.5, 22.7, 25, 29.9, 35.1, NA) bmi_labels <- fputn(bmi_values, "bmi") data.frame( bmi = bmi_values, category = bmi_labels ) ## ----bounds------------------------------------------------------------------- fparse(text = ' VALUE score (numeric) (0, 50] = "Low" (50, 100] = "High" .other = "Out of range" ; ') scores <- c(0, 1, 50, 51, 100, 101) score_labels <- fputn(scores, "score") data.frame( score = scores, label = score_labels ) ## ----invalue------------------------------------------------------------------ finput( "Male" = 1, "Female" = 2, name = "sex_inv" ) labels <- c("Male", "Female", "Male", "Unknown", "Female") codes <- finputn(labels, "sex_inv") data.frame( label = labels, code = codes ) ## ----bidirectional------------------------------------------------------------ status_bi <- fnew_bid( "A" = "Active", "I" = "Inactive", "P" = "Pending", name = "status" ) # Forward: code -> label status_codes <- c("A", "I", "P", "A") status_labels <- fputc(status_codes, "status") data.frame(code = status_codes, label = status_labels) # Reverse: label -> code test_labels <- c("Active", "Pending", "Inactive") test_codes <- finputc(test_labels, "status_inv") data.frame(label = test_labels, code = test_codes) ## ----multiparse--------------------------------------------------------------- fparse(text = ' // Study format definitions VALUE race (character) "W" = "White" "B" = "Black" "A" = "Asian" .missing = "Unknown" ; INVALUE race_inv "White" = 1 "Black" = 2 "Asian" = 3 ; ') flist() # character vector of names fprint() ## ----export------------------------------------------------------------------- bmi_fmt <- format_get("bmi") cat(fexport(bmi = bmi_fmt)) ## ----sas-put-input------------------------------------------------------------ # fputn — apply numeric format by name fputn(c(5, 30, 70), "age") # fputc — apply character format by name fputc(c("M", "F"), "sex") # finputn — apply numeric invalue by name finputn(c("White", "Black"), "race_inv") ## ----df-format---------------------------------------------------------------- df <- data.frame( id = 1:6, sex = c("M", "F", "M", "F", NA, "X"), age = c(15, 25, 45, 70, 35, NA), stringsAsFactors = FALSE ) sex_f <- format_get("sex") age_f <- format_get("age") df_formatted <- fput_df( df, sex = sex_f, age = age_f, suffix = "_label" ) df_formatted ## ----missing------------------------------------------------------------------ # With .missing label fput(c("M", "F", NA), "sex") # With keep_na = TRUE fput(c("M", "F", NA), sex_f, keep_na = TRUE) # is_missing() checks is_missing(NA) is_missing(NaN) is_missing("") # TRUE — empty strings are treated as missing ## ----date-formats------------------------------------------------------------- today <- Sys.Date() data.frame( format = c("DATE9.", "MMDDYY10.", "DDMMYY10.", "YYMMDD10.", "MONYY7.", "WORDDATE.", "YEAR4.", "QTR."), result = c( fputn(today, "DATE9."), fputn(today, "MMDDYY10."), fputn(today, "DDMMYY10."), fputn(today, "YYMMDD10."), fputn(today, "MONYY7."), fputn(today, "WORDDATE."), fputn(today, "YEAR4."), fputn(today, "QTR.") ) ) # Multiple dates dates <- as.Date(c("2020-01-15", "2020-06-30", "2020-12-25")) fputn(dates, "DATE9.") ## ----date-numeric------------------------------------------------------------- r_days <- as.numeric(as.Date("2025-01-01")) r_days fputn(r_days, "DATE9.") fputn(r_days, "MMDDYY10.") ## ----time-formats------------------------------------------------------------- seconds <- c(0, 3600, 45000, 86399) data.frame( seconds = seconds, TIME8 = fputn(seconds, "TIME8."), TIME5 = fputn(seconds, "TIME5."), HHMM = fputn(seconds, "HHMM.") ) ## ----datetime-formats--------------------------------------------------------- now <- Sys.time() data.frame( format = c("DATETIME20.", "DATETIME13.", "DTDATE.", "DTYYMMDD."), result = c( fputn(now, "DATETIME20."), fputn(now, "DATETIME13."), fputn(now, "DTDATE."), fputn(now, "DTYYMMDD.") ) ) # From numeric R-epoch seconds r_secs <- as.numeric(as.POSIXct("2025-06-15 14:30:00", tz = "UTC")) fputn(r_secs, "DATETIME20.") ## ----fnew-date---------------------------------------------------------------- # SAS-named format fnew_date("DATE9.", name = "bday_fmt") birthdays <- as.Date(c("1990-03-25", "1985-11-03", "2000-07-14")) fput(birthdays, "bday_fmt") # Custom strftime pattern (e.g. DD.MM.YYYY) fnew_date("%d.%m.%Y", name = "ru_date", type = "date") fput(birthdays, "ru_date") # Custom pattern with missing label fnew_date("MMDDYY10.", name = "us_date", .missing = "NO DATE") mixed <- c(as.Date("2025-01-01"), NA, as.Date("2025-12-31")) fput(mixed, "us_date") fprint("bday_fmt") ## ----date-df------------------------------------------------------------------ patients <- data.frame( id = 1:4, visit_date = as.Date(c("2025-01-10", "2025-02-15", "2025-03-20", NA)), stringsAsFactors = FALSE ) visit_fmt <- fnew_date("DATE9.", name = "visit_fmt", .missing = "NOT RECORDED") fput_df(patients, visit_date = visit_fmt) ## ----date-parse--------------------------------------------------------------- fparse(text = ' VALUE enrldt (date) pattern = "DATE9." .missing = "Not Enrolled" ; VALUE visit_time (time) pattern = "TIME8." ; VALUE stamp (datetime) pattern = "DATETIME20." ; ') fput(as.Date("2025-03-01"), "enrldt") fput(36000, "visit_time") fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp") # Export back to text enrl_obj <- format_get("enrldt") cat(fexport(enrldt = enrl_obj)) fclear() ## ----multilabel-basic--------------------------------------------------------- fnew( "0,5,TRUE,TRUE" = "Infant", "6,11,TRUE,TRUE" = "Child", "12,17,TRUE,TRUE" = "Adolescent", "0,17,TRUE,TRUE" = "Pediatric", "18,64,TRUE,TRUE" = "Adult", "65,Inf,TRUE,TRUE" = "Elderly", "18,Inf,TRUE,TRUE" = "Non-Pediatric", name = "age_categories", type = "numeric", multilabel = TRUE ) ages <- c(3, 14, 25, 70) # fput returns first match only fput(ages, "age_categories") # fput_all returns ALL matching labels all_labels <- fput_all(ages, "age_categories") for (i in seq_along(ages)) { cat("Age", ages[i], "->", paste(all_labels[[i]], collapse = ", "), "\n") } ## ----multilabel-missing------------------------------------------------------- fnew( "0,100,TRUE,TRUE" = "Valid Score", "0,49,TRUE,TRUE" = "Below Average", "50,100,TRUE,TRUE" = "Above Average", "90,100,TRUE,TRUE" = "Excellent", .missing = "No Score", .other = "Out of Range", name = "score_ml", type = "numeric", multilabel = TRUE ) scores <- c(95, 45, NA, 150) ml_result <- fput_all(scores, "score_ml") for (i in seq_along(scores)) { cat("Score", ifelse(is.na(scores[i]), "NA", scores[i]), "->", paste(ml_result[[i]], collapse = ", "), "\n") } ## ----multilabel-parse--------------------------------------------------------- fparse(text = ' VALUE risk (numeric, multilabel) [0, 3] = "Low Risk" [0, 7] = "Monitored" (3, 7] = "Medium Risk" (7, 10] = "High Risk" ; ') risk_scores <- c(2, 5, 9) risk_labels <- fput_all(risk_scores, "risk") for (i in seq_along(risk_scores)) { cat("Score", risk_scores[i], "->", paste(risk_labels[[i]], collapse = " | "), "\n") } ## ----multilabel-export-------------------------------------------------------- risk_obj <- format_get("risk") cat(fexport(risk = risk_obj)) fprint("risk") ## ----ae-grading--------------------------------------------------------------- fnew( "1,1,TRUE,TRUE" = "Mild", "2,2,TRUE,TRUE" = "Moderate", "3,3,TRUE,TRUE" = "Severe", "4,4,TRUE,TRUE" = "Life-threatening", "5,5,TRUE,TRUE" = "Fatal", "3,5,TRUE,TRUE" = "Serious", "1,2,TRUE,TRUE" = "Non-serious", name = "ae_grade", type = "numeric", multilabel = TRUE ) grades <- c(1, 2, 3, 4, 5) ae_labels <- fput_all(grades, "ae_grade") for (i in seq_along(grades)) { cat("Grade", grades[i], ":", paste(ae_labels[[i]], collapse = " + "), "\n") } fclear() ## ----nocase------------------------------------------------------------------- sex_nc <- fnew( "M" = "Male", "F" = "Female", .missing = "Unknown", name = "sex_nc", type = "character", ignore_case = TRUE ) input <- c("m", "F", "M", "f", NA) fput(input, sex_nc) # Note the [nocase] flag fprint("sex_nc") # Also works with fputc fputc("m", "sex_nc") fclear() ## ----expr-sprintf------------------------------------------------------------- stat_fmt <- fnew( "n" = "sprintf('%s', .x1)", "pct" = "sprintf('%.1f%%', .x1 * 100)", name = "stat", type = "character" ) types <- c("n", "pct", "n", "pct") values <- c(42, 0.053, 100, 0.255) fput(types, stat_fmt, values) ## ----expr-twoargs------------------------------------------------------------- ratio_fmt <- fnew( "ratio" = "sprintf('%s/%s', .x1, .x2)", name = "ratio", type = "character" ) fput("ratio", ratio_fmt, 3, 10) fput(c("ratio", "ratio"), ratio_fmt, c(3, 7), c(10, 20)) ## ----expr-ifelse-------------------------------------------------------------- sign_fmt <- fnew( "val" = "ifelse(.x1 > 0, paste0('+', .x1), as.character(.x1))", name = "sign", type = "character" ) nums <- c(5, 0, -3) fput(rep("val", 3), sign_fmt, nums) ## ----expr-mixed--------------------------------------------------------------- mixed_fmt <- fnew( "header" = "HEADER", "n" = "sprintf('N=%s', .x1)", "pct" = "sprintf('%.1f%%', .x1 * 100)", name = "mixed", type = "character" ) keys <- c("header", "n", "pct", "header", "n") vals <- c(0, 42, 0.15, 0, 100) fput(keys, mixed_fmt, vals) ## ----expr-other--------------------------------------------------------------- known_fmt <- fnew( "ok" = "OK", .other = "sprintf('Error(%s)', .x1)", name = "err_fmt", type = "character" ) codes <- c("ok", "E01", "ok", "E99") details <- c("", "timeout", "", "overflow") fput(codes, known_fmt, details) ## ----expr-recycle------------------------------------------------------------- label_fmt <- fnew( "val" = "sprintf('%s (N=%s)', .x1, .x2)", name = "recycle", type = "character" ) fput(c("val", "val"), label_fmt, c(42, 55), 100) ## ----expr-stat-fnew----------------------------------------------------------- # Population counts used as denominators n.trt <- data.frame(pop = c("fas","pps","saf"), ntot = c(34, 30, 36)) get_n <- function(pop) { n.trt$ntot[n.trt$pop == pop] } fnew( "n_fas" = e("get_n('fas')"), "n_pps" = e("get_n('pps')"), "n_saf" = e("get_n('saf')"), "n" = "sprintf('%d', .x1)", "n_pct_fas" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('fas'))", "n_pct_pps" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('pps'))", "n_pct_saf" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('saf'))", "pct" = "dplyr::case_when( .x1>0 & .x1<0.1 ~ sprintf('%5s', ' <0.1%'), .x1>=0.1 | .x1==0 ~ sprintf(paste0('%5.', 1 ,'f%%'), .x1) )", "pval" = "dplyr::case_when( .x1>=0 & .x1<0.001 ~ sprintf('%s', '<0.001'), .x1>=0.001 & .x1<=0.999 ~ sprintf(paste0('%.', 3 ,'f'), .x1), .x1>0.999 ~ sprintf('%s', '>0.999'), .default = '--' )", name = "stat", type = "character" ) ## ----expr-stat-fparse--------------------------------------------------------- fmt <- ' VALUE stat_01 (character) "n_fas" = "get_n(\'fas\')" (eval) "n_pps" = "get_n(\'pps\')" (eval) "n_saf" = "get_n(\'saf\')" (eval) "n" = "sprintf(\'%d\', .x1)" "pct" = "dplyr::case_when(.x1>0 & .x1<0.1 ~ sprintf(\'%5s\', \' <0.1%\'), .x1>=0.1 | .x1==0 ~ sprintf(paste0(\'%5.\', 1 ,\'f%%\'), .x1))" "n_pct_fas" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'fas\'))" "n_pct_pps" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'pps\'))" "n_pct_saf" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'saf\'))" "pval" = "dplyr::case_when(.x1>=0 & .x1<0.001 ~ sprintf(\'%s\', \'<0.001\'), .x1>=0.001 & .x1<=0.999 ~ sprintf(paste0(\'%.\', 3 ,\'f\'), .x1), .x1>0.999 ~ sprintf(\'%s\', \'>0.999\'), .default = \'--\')" ;' fparse(fmt) ## ----expr-stat-apply---------------------------------------------------------- df <- data.frame( types = c("n_fas", "n_pps", "n_saf", "n", "pct", "pct", "n", "pval", "pval", "n_pct_fas", "n_pct_pps", "n_pct_saf"), values = c(NA, NA, NA, 42, 0.053, 0.0008, 100, 0.255, 0.0003, 22, 22, 22) ) df$fmt <- fput(df$types, "stat", df$values) df$fmt_01 <- fput(df$types, "stat_01", df$values) print(df) fclear() ## ----vectorized--------------------------------------------------------------- # Dispatch format: maps type code to format name fnew("1" = "groupx", "2" = "groupy", "3" = "groupz", name = "typefmt", type = "numeric") # Per-group character formats fnew("positive" = "agree", "negative" = "disagree", "neutral" = "notsure", name = "groupx", type = "character") fnew("positive" = "accept", "negative" = "reject", "neutral" = "possible", name = "groupy", type = "character") fnew("positive" = "pass", "negative" = "fail", "neutral" = "retest", name = "groupz", type = "character") type <- c(1, 1, 1, 2, 2, 2, 3, 3, 3) response <- c("positive", "negative", "neutral", "positive", "negative", "neutral", "positive", "negative", "neutral") # Step 1: map type -> format name respfmt <- fput(type, "typefmt") # Step 2: apply per-element format word <- fputc(response, respfmt) data.frame(type = type, response = response, respfmt = respfmt, word = word) fclear() ## ----dates-putn--------------------------------------------------------------- # Format that maps key codes to date format names fnew("1" = "date9.", "2" = "mmddyy10.", name = "writfmt", type = "numeric") fnew_date("date9.") fnew_date("mmddyy10.") # Input data (R date numbers = days since 1970-01-01) number <- c(12103, 10899) key <- c(1, 2) # Look up format name per observation datefmt <- fputn(key, "writfmt") # Apply per-element date format date <- fputn(number, datefmt) data.frame(number = number, key = key, datefmt = datefmt, date = date) fclear() ## ----cntlout-import----------------------------------------------------------- csv_path <- system.file("extdata", "test_cntlout.csv", package = "ksformat") ## ----cntlout-use-------------------------------------------------------------- imported <- fimport(csv_path) names(imported) flist() fprint() ## ----cntlout-apply------------------------------------------------------------ # Character format (GENDER) gender_codes <- c("M", "F", NA, "X") data.frame( code = gender_codes, label = fputc(gender_codes, "GENDER") ) # Numeric format (AGEGRP) ages <- c(5, 17, 18, 45, 65, 100, NA, -1) data.frame( age = ages, group = fputn(ages, "AGEGRP") ) # Numeric format (BMICAT) bmi_values <- c(15.0, 18.5, 22.3, 25.0, 28.7, 30.0, 35.5) data.frame( bmi = bmi_values, category = fputn(bmi_values, "BMICAT") ) # Invalue (RACEIN) race_labels <- c("White", "Black", "Asian", "Other") data.frame( label = race_labels, code = finputn(race_labels, "RACEIN") ) ## ----cntlout-df--------------------------------------------------------------- df <- data.frame( id = 1:5, sex = c("M", "F", "M", NA, "F"), age = c(10, 30, 70, NA, 50), stringsAsFactors = FALSE ) gender_fmt <- imported[["GENDER"]] age_fmt <- imported[["AGEGRP"]] fput_df(df, sex = gender_fmt, age = age_fmt, suffix = "_label") ## ----cntlout-export----------------------------------------------------------- cat(fexport(AGEGRP = age_fmt)) cat(fexport(GENDER = gender_fmt)) ## ----cntlout-manual----------------------------------------------------------- fclear() manual <- fimport(csv_path, register = FALSE) # Library should be empty flist() fprint() # Use directly from returned list fput(c("M", "F"), manual[["GENDER"]]) fclear() ## ----bilingual---------------------------------------------------------------- # Single format, language selected via .x1 extra argument sex_bi <- fnew( "M" = "ifelse(.x1 == 'en', 'Male', 'Homme')", "F" = "ifelse(.x1 == 'en', 'Female', 'Femme')", .missing = "Unknown", name = "sex_bi" ) # .x1 = language code per observation fput(c("M", "F", "M"), sex_bi, c("en", "fr", "en")) # -> "Male" "Femme" "Male" # Alternative: one format per language, selected at apply-time fnew("M" = "Male", "F" = "Female", .missing = "Unknown", name = "sex_en") fnew("M" = "Homme", "F" = "Femme", .missing = "Inconnu", name = "sex_fr") lang <- "fr" fput(c("M", "F", NA), paste0("sex_", lang)) # -> "Homme" "Femme" "Inconnu" fclear() ## ----fputk-setup-------------------------------------------------------------- # Simulate a Subject Visits (SV) domain SV <- data.frame( USUBJID = c("SUBJ-001", "SUBJ-001", "SUBJ-001", "SUBJ-002", "SUBJ-002"), VISITNUM = c(1, 2, 3, 1, 2), SVSTDTC = c("2025-01-15", "2025-02-20", "2025-03-10", "2025-01-18", "2025-02-25"), stringsAsFactors = FALSE ) # Simulate a Questionnaires (QS) domain QS <- data.frame( USUBJID = c("SUBJ-001", "SUBJ-001", "SUBJ-002", "SUBJ-002", "SUBJ-002"), VISITNUM = c(1, 2, 1, 2, 3), QSTESTCD = c("SCORE1", "SCORE1", "SCORE1", "SCORE1", "SCORE1"), QSSTRESN = c(85, 90, 72, 78, NA), stringsAsFactors = FALSE ) SV QS ## ----fputk-register----------------------------------------------------------- # Create composite key -> date string mapping from SV fnew( fmap(paste(SV$USUBJID, SV$VISITNUM, sep = "|"), SV$SVSTDTC), .other = "NOT FOUND", name = "svdtc", type = "character", ignore_case = TRUE ) fprint("svdtc") ## ----fputk-apply-------------------------------------------------------------- QS$SVSTDTC <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtc") QS class(QS$SVSTDTC) # character fclear() ## ----fputk-date--------------------------------------------------------------- # Create composite key -> Date mapping from SV fnew( fmap( paste(SV$USUBJID, SV$VISITNUM, sep = "|"), as.Date(SV$SVSTDTC, format = "%Y-%m-%d") ), .other = NA, name = "svdtn", type = "Date", ignore_case = TRUE ) fprint("svdtn") ## ----fputk-date-apply--------------------------------------------------------- QS$SVSTDTC_DT <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtn") QS class(QS$SVSTDTC_DT) # Date # Typed NA for unmatched keys (SUBJ-002 Visit 3 not in SV) is.na(QS$SVSTDTC_DT[5]) # Date arithmetic works directly QS$SVSTDTC_DT + 7 # add 7 days fclear() ## ----fmap-setup--------------------------------------------------------------- library(ksformat) dm <- data.frame( USUBJID = c("SUBJ-001", "SUBJ-002", "SUBJ-003"), SUBJID = c("001", "002", "003"), RFICDTC = c("2023-03-09T08:45", "2024-08-13T09:53", "2025-06-17T09:03"), stringsAsFactors = FALSE ) # Composite key for both formats keys <- paste(dm$USUBJID, dm$SUBJID, sep = "|") ## ----fmap-date---------------------------------------------------------------- # Date lookup fnew( fmap(keys, as.Date(dm$RFICDTC, format = "%Y-%m-%d")), .other = NA, type = "Date", ignore_case = TRUE, name = "icdtn" ) # Character lookup — same fmap(keys, values) pattern! fnew( fmap(keys, dm$RFICDTC), .other = "NOT FOUND", type = "character", ignore_case = TRUE, name = "icdtc" ) fprint("icdtn") fprint("icdtc") ## ----fmap-apply--------------------------------------------------------------- # Both return the expected results fputk("SUBJ-001", "001", format = "icdtn") class(fputk("SUBJ-001", "001", format = "icdtn")) fputk("SUBJ-001", "001", format = "icdtc") class(fputk("SUBJ-001", "001", format = "icdtc")) fclear() ## ----fmap-default------------------------------------------------------------- # These are equivalent — both map "M" -> "Male" fmt_a <- fnew(c(Male = "M", Female = "F")) fmt_b <- fnew("M" = "Male", "F" = "Female") identical(fput(c("M", "F"), fmt_a), fput(c("M", "F"), fmt_b)) fclear() ## ----fparse-date-char--------------------------------------------------------- fparse(text = ' VALUE svdtc (character, nocase) "SUBJ-001|1" = "2025-01-15" "SUBJ-001|2" = "2025-02-20" "SUBJ-001|3" = "2025-03-10" "SUBJ-002|1" = "2025-01-18" "SUBJ-002|2" = "2025-02-25" .other = "NOT FOUND" ; ') fprint("svdtc") ## ----fparse-date-char-apply--------------------------------------------------- QS <- data.frame( USUBJID = c("SUBJ-001", "SUBJ-001", "SUBJ-002", "SUBJ-002", "SUBJ-002"), VISITNUM = c(1, 2, 1, 2, 3), QSSTRESN = c(85, 90, 72, 78, NA), stringsAsFactors = FALSE ) QS$SVSTDTC <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtc") QS fclear() ## ----fparse-date-native------------------------------------------------------- fparse(text = ' VALUE svdtn (Date, format: %Y-%m-%d, nocase) "SUBJ-001|1" = "2025-01-15" "SUBJ-001|2" = "2025-02-20" "SUBJ-001|3" = "2025-03-10" "SUBJ-002|1" = "2025-01-18" "SUBJ-002|2" = "2025-02-25" ; ') fprint("svdtn") ## ----fparse-date-native-apply------------------------------------------------- QS$SVSTDTC_DT <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtn") QS class(QS$SVSTDTC_DT) # Date is.na(QS$SVSTDTC_DT[5]) # TRUE — no match for SUBJ-002 Visit 3 # Date arithmetic works directly QS$SVSTDTC_DT + 7 ## ----fparse-date-roundtrip---------------------------------------------------- fmt_obj <- format_get("svdtn") txt <- fexport(svdtn = fmt_obj) cat(txt) ## ----fparse-date-reimport----------------------------------------------------- # Re-parse the exported text fclear() fparse(text = txt) # Verify it still works fputk("SUBJ-001", 2, format = "svdtn") fclear() ## ----franges-basic------------------------------------------------------------ fparse(text = ' VALUE age (numeric) [0, 18) = "Child" [18, 65) = "Adult" [65, HIGH] = "Senior" .missing = "Unknown" ; ') franges("age") ## ----franges-filter----------------------------------------------------------- df <- franges("age") # Which ranges have a finite upper bound? df[is.finite(df$high), ] ## ----franges-discrete--------------------------------------------------------- fnew("M" = "Male", "F" = "Female", .missing = "Unknown", name = "sex") franges("sex") # 0 rows ## ----franges-cleanup, include=FALSE------------------------------------------- fclear() ## ----fmap-to-ranges----------------------------------------------------------- fparse(text = ' VALUE visit_ther (numeric) [LOW, 1] = 0 [ 8, 22] = 2 [22, 36] = 4 [37, 50] = 6 [51, 63] = 8 [64, 78] = 10 [79, 91] = 12 ; ') coded_weeks <- c(0, 2, 4, 6, 8, 10, 12) fmap_to_ranges(coded_weeks, "visit_ther") ## ----fmap-to-ranges-na-------------------------------------------------------- fmap_to_ranges(c(2, 99, 4), "visit_ther") ## ----fmap-to-ranges-cleanup, include=FALSE------------------------------------ fclear() ## ----date-range-basic--------------------------------------------------------- fnew( "2023-01-01,2024-01-01,TRUE,FALSE" = "FY23", "2024-01-01,2025-01-01,TRUE,FALSE" = "FY24", "2025-01-01,2026-01-01,TRUE,FALSE" = "FY25", type = "date_range", name = "fiscal_year" ) dates <- as.Date(c("2023-06-15", "2024-03-01", "2024-12-31", "2025-07-04", "2022-01-01", NA)) data.frame( date = dates, fy = fput(dates, "fiscal_year") ) ## ----date-range-fparse-------------------------------------------------------- fparse(text = ' VALUE quarter (date_range) [2024-01-01, 2024-04-01) = "Q1-2024" [2024-04-01, 2024-07-01) = "Q2-2024" [2024-07-01, 2024-10-01) = "Q3-2024" [2024-10-01, 2025-01-01) = "Q4-2024" .other = "Outside 2024" ; ') sample_dates <- as.Date(c("2024-02-14", "2024-05-20", "2024-08-08", "2024-11-30", "2025-03-01")) data.frame( date = sample_dates, quarter = fput(sample_dates, "quarter") ) ## ----date-range-low-high------------------------------------------------------ fparse(text = ' VALUE era (date_range) [LOW, 2000-01-01) = "Pre-2000" [2000-01-01, 2010-01-01) = "2000s" [2010-01-01, 2020-01-01) = "2010s" [2020-01-01, HIGH] = "2020+" ; ') event_dates <- as.Date(c("1985-07-04", "2005-12-25", "2015-06-01", "2023-11-11")) data.frame( date = event_dates, era = fput(event_dates, "era") ) ## ----date-range-export-------------------------------------------------------- q_obj <- format_get("quarter") cat(fexport(quarter = q_obj)) ## ----date-range-roundtrip----------------------------------------------------- # Re-parse the exported text txt <- fexport(quarter = q_obj) fclear() fparse(text = txt) fput(as.Date(c("2024-02-14", "2024-08-08")), "quarter") ## ----date-range-multilabel---------------------------------------------------- fparse(text = ' VALUE study_window (date_range, multilabel) [2024-01-01, 2024-07-01) = "First Half" [2024-04-01, 2024-10-01) = "Mid-Year" [2024-07-01, 2025-01-01) = "Second Half" ; ') checkup_dates <- as.Date(c("2024-02-15", "2024-05-20", "2024-09-01")) all_windows <- fput_all(checkup_dates, "study_window") for (i in seq_along(checkup_dates)) { cat(format(checkup_dates[i]), "->", paste(all_windows[[i]], collapse = " | "), "\n") } ## ----date-range-autodetect---------------------------------------------------- fparse(text = ' VALUE auto_fy [2024-01-01, 2025-01-01) = "2024" ; VALUE auto_shift [2024-01-15 08:00, 2024-01-15 16:00) = "Day shift" ; ') cat("auto_fy type :", format_get("auto_fy")$type, "\n") cat("auto_shift type:", format_get("auto_shift")$type, "\n") ## ----datetime-range----------------------------------------------------------- fparse(text = ' VALUE shift (datetime_range) [2024-01-15 00:00, 2024-01-15 08:00) = "Night" [2024-01-15 08:00, 2024-01-15 16:00) = "Day" [2024-01-15 16:00, 2024-01-16 00:00) = "Evening" ; ') timestamps <- as.POSIXct( c("2024-01-15 03:22:00", "2024-01-15 11:45:00", "2024-01-15 19:00:00"), tz = "UTC" ) data.frame( ts = format(timestamps, tz = "UTC"), shift = fput(timestamps, "shift") ) ## ----date-range-cleanup, include=FALSE---------------------------------------- fclear() ## ----strat-num---------------------------------------------------------------- visits <- fmap_strata( stratum = c("ARM_A", "ARM_A", "ARM_A", "ARM_B", "ARM_B"), low = c(0, 7, 28, 0, 14), high = c(7, 28, Inf, 14, Inf), label = c("Baseline", "Wk1-3", "Wk4+", "Baseline", "Wk2+"), inc_high = c(FALSE, FALSE, TRUE, FALSE, TRUE) ) fnew(visits, type = "stratified_range", ".other|ARM_A" = "A_outside", .other = "outside_window", name = "vw") df <- data.frame( arm = c("ARM_A", "ARM_A", "ARM_B", "ARM_B", "ARM_C"), day = c(3, 35, 5, 40, 10) ) df$visit <- fputk(df$arm, df$day, format = "vw") df ## ----strat-text--------------------------------------------------------------- fparse(text = ' VALUE vw_text (stratified_range, range_subtype: numeric) "ARM_A"|[0, 7) = "Baseline" "ARM_A"|[7, 28) = "Wk1-3" "ARM_A"|[28, HIGH]= "Wk4+" "ARM_B"|[0, 14) = "Baseline" "ARM_B"|[14, HIGH]= "Wk2+" ".other|ARM_A" = "A_outside" .other = "outside_window" ; ') fputk(df$arm, df$day, format = "vw_text") ## ----strat-date--------------------------------------------------------------- windows <- fmap_strata( stratum = c("S001", "S001", "S002", "S002"), low = as.Date(c("2024-01-01", "2024-01-15", "2024-02-01", "2024-02-20")), high = as.Date(c("2024-01-15", "2024-02-01", "2024-02-20", "2024-03-10")), label = c("Screen", "Treat", "Screen", "Treat") ) fnew(windows, type = "stratified_range", range_subtype = "date", .other = "off-window", name = "win") subj <- c("S001", "S001", "S002", "S002", "S003") visits <- as.Date(c("2024-01-05", "2024-01-20", "2024-02-10", "2024-03-01", "2024-01-01")) data.frame( subj = subj, date = visits, phase = fputk(subj, visits, format = "win") ) ## ----strat-roundtrip---------------------------------------------------------- txt <- fexport(format_get("vw")) cat(txt, "\n") fclear() fparse(text = txt) fputk(df$arm, df$day, format = "vw") ## ----strat-cleanup, include=FALSE--------------------------------------------- fclear() ## ----fmap-ranges-num---------------------------------------------------------- age_groups <- fmap_ranges( low = c(0, 18, 65), high = c(18, 65, Inf), label = c("Child", "Adult", "Senior"), inc_high = c(FALSE, FALSE, TRUE) ) fnew(age_groups, type = "numeric", name = "ag") fput(c(5, 25, 90), "ag") fclear() ## ----na-str-setup------------------------------------------------------------- # Source lab mapping (as received from a specification) lb_map <- data.frame( LBCAT = c("BLOOD CHEMISTRY", "COAGULOGRAM", "COAGULATION PANEL", "COAGULOGRAM"), LBSPEC = c("BLOOD", "BLOOD", "BLOOD", "BLOOD"), LBTESTCD = c("ALB", "FIBRINO", "INR", "INR"), LBSTRESU = c("g/L", "g/L", NA, NA), PARAMCD = c("ALB", "FIBRINO", "INR", "INR"), stringsAsFactors = FALSE ) lb_map ## ----na-str-build------------------------------------------------------------- with(lb_map, fmap(paste(LBCAT, LBSPEC, LBTESTCD, LBSTRESU, sep = "|"), PARAMCD) ) |> fnew(ignore_case = TRUE, .other = NA, type = "character", name = "lb_param") fprint("lb_param") ## ----na-str-default----------------------------------------------------------- lb_map$PARAMCD_default <- with(lb_map, fputk(LBCAT, LBSPEC, LBTESTCD, LBSTRESU, format = "lb_param") ) lb_map[, c("LBTESTCD", "LBSTRESU", "PARAMCD", "PARAMCD_default")] ## ----na-str-correct----------------------------------------------------------- lb_map$PARAMCD_back <- with(lb_map, fputk(LBCAT, LBSPEC, LBTESTCD, LBSTRESU, format = "lb_param", na_as_string = TRUE) ) lb_map[, c("LBTESTCD", "LBSTRESU", "PARAMCD", "PARAMCD_back")] ## ----na-str-cleanup, include=FALSE-------------------------------------------- fclear() ## ----finputk-basic------------------------------------------------------------ # Build an INVALUE from two-column composite labels finput( fmap(paste(c("BLOOD CHEMISTRY", "COAGULOGRAM", "COAGULATION PANEL"), c("ALB", "FIBRINO", "INR"), sep = "|"), c(1L, 2L, 3L)), target_type = "integer", name = "lb_code_inv" ) # Reverse lookup: two separate columns → integer code cat_vec <- c("BLOOD CHEMISTRY", "COAGULOGRAM", "COAGULATION PANEL", "OTHER") test_vec <- c("ALB", "FIBRINO", "INR", "X") finputk(cat_vec, test_vec, invalue_name = "lb_code_inv") # BLOOD CHEMISTRY|ALB → 1, COAGULOGRAM|FIBRINO → 2, # COAGULATION PANEL|INR → 3, OTHER|X → NA (no match → missing_value) fclear() ## ----finputk-na--------------------------------------------------------------- # INVALUE where LBSTRESU can be NA (like INR) finput( fmap( paste(lb_map$LBCAT, lb_map$LBTESTCD, lb_map$LBSTRESU, sep = "|"), seq_len(nrow(lb_map)) ), target_type = "integer", name = "lb_row_inv" ) # Reconstruct lb_map row indices — works even when LBSTRESU is NA finputk(lb_map$LBCAT, lb_map$LBTESTCD, lb_map$LBSTRESU, invalue_name = "lb_row_inv", na_as_string = TRUE) fclear()