## -----------------------------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = '#>')
options(rmarkdown.html_vignette.check_title = FALSE)

## ----clear_memory-------------------------------------------------------------
rm(list=ls()) 

## ----runchunks----------------------------------------------------------------
# Set whether or not the following chunks will be executed (run):
execute.vignette <- FALSE

## ----load_packages, eval = execute.vignette-----------------------------------
# #
# #
# # Setup
# #
# #
# library(ggplot2)
# library(httk)
# library(grid)
# library(scales)
# library(viridis)
# library(ggpubr)
# library(readxl)
# library(knitr)
# 
# LOC.WD <- "c:/users/jwambaug/git/httk-dev/work/HTTKPFAS/"
# PFAS.MODEL.USED <- "sumclearancespfas"
# NONPFAS.MODEL.USED <- "sumclearances"

## ----fig_properties, eval = execute.vignette----------------------------------
# FONT.SIZE <- 24
# POINT.SIZE <- 4

## ----scientific.notation, eval = execute.vignette-----------------------------
# scientific_10 <- function(x) {
#   out <- gsub("1e", "10^", scientific_format()(x))
#   out <- gsub("\\+","",out)
#   out <- gsub("10\\^01","10",out)
#   out <- parse(text=gsub("10\\^00","1",out))
# }

## ----RMSLE_function, eval = execute.vignette----------------------------------
# RMSLE <- function(tab,colx,coly)
# {
#     return(signif(mean(log10(tab[,colx]/tab[,coly])^2,na.rm=TRUE)^(1/2),3))
# }

## ----rb2peval, eval = execute.vignette----------------------------------------
# rb2p.table <- subset(chem.physical_and_invitro.data,
#                      Human.Rblood2plasma.Reference %in% "Poothong 2017")
# rb2p.table <- rb2p.table[,colnames(rb2p.table) %in%
#                                      c("Compound","DTXSID",
#                                        "Human.Rblood2plasma"
#                                      )]
# for (this.chem in rb2p.table$DTXSID)
# {
#   if (this.chem %in% get_2023pfasinfo(info="dtxsid", suppress.messages=TRUE,
#                                    model=PFAS.MODEL.USED))
#   {
#     rb2p.table[rb2p.table$DTXSID==this.chem,"HTTK.Rb2p"] <-
#       calc_rblood2plasma(dtxsid=this.chem,
#                          class.exclude=FALSE)
#   }
#   p <- try(parameterize_pfas1comp(dtxsid=this.chem))
#   if(!is(p, "try-error"))
#     rb2p.table[rb2p.table$DTXSID==this.chem,"ML.Rb2p"] <-
#     p$Rblood2plasma
# }
# kable(rb2p.table)

## ----MAeval, eval = execute.vignette------------------------------------------
# logma.table <- subset(chem.physical_and_invitro.data,
#                      logMA.Reference %in% "Droge 2019")
# logma.table <- logma.table[,colnames(logma.table) %in%
#                                      c("Compound","DTXSID",
#                                        "logMA"
#                                      )]
# for (this.chem in logma.table$DTXSID)
# {
#   logma.table[logma.table$DTXSID==this.chem,"HTTK.logMA"] <-
#       signif(log10(calc_ma(dtxsid=this.chem,
#                            pfas.calibration=FALSE,
#                            suppress.messages=TRUE)),3)
# }
# kable(logma.table)

## ----fit_logma, eval = execute.vignette---------------------------------------
# fit.logma <- lm(logMA~HTTK.logMA,data=logma.table)
# summary(fit.logma)
# 
# logma.table$Fit.logMA <- signif(fit.logma$coefficients[1] +
#   fit.logma$coefficients[2]*logma.table$HTTK.logMA,3)

## ----fig_logma, eval = execute.vignette---------------------------------------
# fig.logma  <- ggplot(data=logma.table) +
#   geom_point(aes(
#     x=HTTK.logMA,
#     y=logMA
#     ),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
#   geom_line(aes(x=HTTK.logMA,
#     y=Fit.logMA), color="red")+
#   ylab("Droge (2019) logMA") +
#   xlab("HTTK log Membrane Affinity") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))
# 
# print(fig.logma)

## ----plot_clint_fup, eval = execute.vignette----------------------------------
# new.pfas <- get_2023pfasinfo(info="dtxsid",
#                                       model=PFAS.MODEL.USED,
#                                       suppress.messages=TRUE)
# # What chems does sumclearances model work for overall?
# good.chems <- get_cheminfo(info="dtxsid", model = PFAS.MODEL.USED)
# cat(paste("were incorporated from the scientific literature into R package \\“httk\\” for",
#           sum(new.pfas %in% good.chems),
#           "PFAS\n"))
# cat(paste("Out of",
#           length(new.pfas),
#           "PFAS with new in vitro TK data reported,",
#           sum(new.pfas %in% good.chems),
#           "had the Clint, fup, and physicochemical",
#           "property predictions needed to make HTTK predictions.\n"))
# 
# 
# 
# clint.fup <- subset(chem.physical_and_invitro.data,
#                     !is.na(Human.Funbound.plasma) &
#                     !is.na(Human.Clint) &
#                     (regexpr("PFAS",Chemical.Class)==-1 |
#                     DTXSID %in% new.pfas))
# clint.fup$Human.Clint <- sapply(clint.fup$Human.Clint,
#                                 function(x) as.numeric(strsplit(x,",")[[1]][1]))
# clint.fup$Human.Funbound.plasma <- sapply(clint.fup$Human.Funbound.plasma,
#                                 function(x) as.numeric(strsplit(x,",")[[1]][1]))
# clint.fup[clint.fup$Chemical.Class=="","Chemical.Class"] <- "Non-PFAS"
# clint.fup[clint.fup$Chemical.Class=="PFAS","Chemical.Class"] <- "Newly Measured PFAS"
# clint.fup.pfas <- subset(clint.fup,Chemical.Class!="Non-PFAS")
# clint.fup.pfas <- merge(clint.fup.pfas,subset(httk::dawson2023,Species=="Human" &
#                               Sex=="Female" &
#                               DosingAdj=="Oral")[,c("DTXSID","ClassPredFull")],
#       by="DTXSID",all.x=TRUE)
# colnames(clint.fup.pfas)[colnames(clint.fup.pfas)=="ClassPredFull"] <- "ML.Class"
# save(clint.fup,
#      clint.fup.pfas,
#      file=paste0(LOC.WD,"Clint-Fup-Values.RData"))
# write.table(clint.fup.pfas,
#             quote=FALSE,
#             row.names=FALSE,
#             sep="\t",
#             file=paste0(LOC.WD,"PFAS-Clint_Fup.txt"))
# cat(paste("(fraction unbound in plasma) in",
#           sum(!is.na(clint.fup.pfas$Human.Funbound.plasma)),
#           "PFAS\n"))
# cat(paste("(intrinsic hepatocyte clearance) in",
#           sum(!is.na(clint.fup.pfas$Human.Clint)),
#           "PFAS\n"))

## ----clint.fup.figures, eval = execute.vignette-------------------------------
# load(paste0(LOC.WD,"Clint-Fup-Values.RData"))
# fig.clint.fup  <- ggplot(data=clint.fup) +
#   geom_point(aes(
#     x=Human.Funbound.plasma,
#     y=Human.Clint,
#     colour=Chemical.Class,
#     shape=Chemical.Class),
#     size=POINT.SIZE) +
#   ylab(expression(italic("In Vitro")~"Measured Cl"[int]~"("*mu*"L/min/10"^'6'~"hep.)")) +
#   xlab(expression(italic("In Vitro")~"Measured f"[up])) +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE)) +
#         scale_color_viridis(discrete=2)
# 
# print(fig.clint.fup)
# 
# fig.fup.hist <- ggplot(data=clint.fup) +
#   geom_histogram(aes(
#     x=Human.Funbound.plasma,
#     colour=Chemical.Class,
#     fill=Chemical.Class)) +
#   xlab(expression(italic("In Vitro")~"Measured f"[up])) +
#   scale_x_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#         scale_fill_viridis(discrete=2)+
#         scale_color_viridis(discrete=2)
# 
# print(fig.fup.hist)
# 
# fig.clint.hist <- ggplot(data=clint.fup) +
#   geom_histogram(aes(
#     x=Human.Clint+10^-5,
#     colour=Chemical.Class,
#     fill=Chemical.Class)) +
#   xlab(expression(italic("In Vitro")~"Measured Cl"[int]~"("*mu*"L/min/10"^'6'~"hep.)")) +
#   scale_x_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#         scale_fill_viridis(discrete=2)+
#         scale_color_viridis(discrete=2)+
#   coord_flip()
# 
# print(fig.clint.hist)
# 
# fig.henry.hist <- ggplot(data=clint.fup) +
#   geom_histogram(aes(
#     x=10^logHenry,
#     colour=Chemical.Class,
#     fill=Chemical.Class)) +
#   xlab(expression("Henry's Law Constant (Atm.-m"^"3"*"/mole)")) +
#   scale_x_log10(label=scientific_10)+
#   geom_vline(xintercept =10^-4.5,linetype="dashed",color="blue")+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#         scale_fill_viridis(discrete=2)+
#         scale_color_viridis(discrete=2)
# 
# print(fig.henry.hist)

## ----multipanelnewclintfupdata, eval = execute.vignette-----------------------
# ggarrange(fig.clint.fup, fig.clint.hist, fig.fup.hist, fig.henry.hist,
#           labels = c("A", "B", "C", "D"),
#           ncol = 2, nrow = 2,
#           common.legend = TRUE, legend = "bottom")
# tiff(file = paste0(LOC.WD,"Fig_NewClintFup.tiff"),
#          height = 5*200, width = 5*200, compression = "lzw")
# ggarrange(fig.clint.fup, fig.clint.hist, fig.fup.hist, fig.henry.hist,
#           labels = c("A", "B", "C", "D"),
#           ncol = 2, nrow = 2,
#           common.legend = TRUE, legend = "bottom")
# dev.off()

## ----calc_Kbloodair, eval = execute.vignette----------------------------------
# pfas.data <- subset(clint.fup,Chemical.Class=="Newly Measured PFAS")
# pfas.data[,"logKblood2air"] <- apply(pfas.data,
#                                      1,
#                                      function(x) try(log10(calc_kair(
#                                        dtxsid=x["DTXSID"],
#                                        suppress.messages=TRUE)$Kblood2air)))

## ----kblood2air_results, eval = execute.vignette------------------------------
# good.Kblood2air <- pfas.data$logKblood2air
# good.logHenry <- pfas.data$logHenry
# good.logHenry <- good.logHenry[!is.na(good.Kblood2air)]
# good.Kblood2air <- good.Kblood2air[!is.na(good.Kblood2air)]
# cat(paste0("Out of ",
#           length(good.logHenry),
#           " PFAS for which the Henry's Law Constant could be predicted, ",
#           signif(sum(good.logHenry >
#                        -4.5)/length(good.logHenry)*100,2),
#           "% were more volatile than acetone.\n"))
# most.volatile <- pfas.data[pfas.data$logKblood2air %in% min(good.Kblood2air),
#                            "Compound"]
# cat(paste0("The most volatile chemical was ", most.volatile,
#           " with a predicted logKblood2air of ", signif(min(good.Kblood2air),3),
#           ".\n"))

## ----removeoldhttk, eval = execute.vignette-----------------------------------
# chem.physical_and_invitro.data$Rat.Clint <- NA
# chem.physical_and_invitro.data$Rat.Funbound.plasma <- NA

## ----load_bioavailability_preds, eval = execute.vignette----------------------
# load_honda2023()

## ----invivoeval, eval = execute.vignette--------------------------------------
# cl.table <- httk::pfas.clearance
# human.ML.table <- subset(httk::dawson2023,
#                          Species=="Human" &
#                          Sex=="Female" &
#                          DosingAdj=="Oral")
# 
# for (this.chem in unique(cl.table$DTXSID))
#   if (this.chem %in% get_2023pfasinfo(info="dtxsid",
#                                       model=PFAS.MODEL.USED,
#                                       suppress.messages=TRUE))
#   {
#     chem.subset <- subset(cl.table, DTXSID==this.chem)
#     for (this.species in unique(cl.table$Species))
#     {
#       httk.normal <- calc_total_clearance(dtxsid=this.chem,
#                                species=this.species,
#                                model=NONPFAS.MODEL.USED,
#                                default.to.human=TRUE,
#                                class.exclude=FALSE,
#                                physchem.exclude=FALSE,
#                                suppress.messages=TRUE)
#       httk.pfas <- calc_total_clearance(dtxsid=this.chem,
#                                species=this.species,
#                                model=PFAS.MODEL.USED,
#                                default.to.human=TRUE,
#                                class.exclude=FALSE,
#                                physchem.exclude=FALSE,
#                                suppress.messages=TRUE)
#       if (any(cl.table$DTXSID==this.chem &
#                cl.table$Species==this.species))
#       {
#         cl.table[cl.table$DTXSID==this.chem &
#                cl.table$Species==this.species,
#                "HTTK.Cltot.Lphpkgbw"] <- httk.normal
#         cl.table[cl.table$DTXSID==this.chem &
#                cl.table$Species==this.species,
#                "HTTKPFAS.Cltot.Lphpkgbw"] <- httk.pfas
#       } else {
#         new.row <- cl.table[1,]
#         new.row[] <- NA
#         new.row[,"DTXSID"] <- this.chem
#         new.row[,"Species"] <- this.species
#         new.row[,"Sex"] <- "Female"
#         new.row[,"HTTK.Cltot.Lphpkgbw"] <- httk.normal
#         new.row[,"HTTKPFAS.Cltot.Lphpkgbw"] <- httk.pfas
#         cl.table <- rbind(cl.table,new.row)
#         new.row[,"Sex"] <- "Male"
#         cl.table <- rbind(cl.table,new.row)
#       }
#     }
#   }
# cat(paste0(dim(cl.table)[1]," rows of in vivo clearance data.\n"))

## ----invivoeval2, eval = execute.vignette-------------------------------------
# outside.doa <- NULL
# for (this.chem in unique(cl.table$DTXSID))
#   {
#     chem.subset <- subset(cl.table, DTXSID==this.chem)
#     for (this.species in unique(cl.table$Species))
#     {
#       p <- try(parameterize_pfas1comp(dtxsid=this.chem,
#                                   species=this.species,
#                                   suppress.messages=TRUE))
#       # Clearance in units of L/h/kgBW
#       if(!is(p, "try-error"))
#       {
#         cl.table[cl.table$DTXSID==this.chem &
#                cl.table$Species==this.species,
#                "MachineLearning.Cltot.Lphpkgbw"] <- p$kelim*p$Vd
#       } else {
#         outside.doa <- c(outside.doa, this.chem)
#       }
# 
#     }
# }
# outside.doa <- unique(outside.doa)
# outside.doa <- subset(chem.physical_and_invitro.data,DTXSID%in%outside.doa)$Compound

## ----invivo_interspecies_eval_scaling, eval = execute.vignette----------------
# cl.table <- as.data.frame(cl.table)
# for (this.sex in c("Female","Male"))
# {
#   human.data <- subset(cl.table, Species=="Human" &
#                             Sex==this.sex)
#   for (this.species in unique(cl.table$Species))
#   if (this.species != "Human")
#   {
#     species.data <- subset(cl.table, Species==this.species &
#                                      Sex==this.sex)
#     for (this.chem in union(human.data$DTXSID, species.data$DTXSID))
#     {
#       cl.table.index <- cl.table$Sex==this.sex &
#                cl.table$Species==this.species &
#                cl.table$DTXSID==this.chem
#       human.table.index <- human.data$Sex==this.sex &
#                human.data$DTXSID==this.chem
#       species.table.index <- species.data$Sex==this.sex &
#                species.data$DTXSID==this.chem
#       if (any(human.table.index) & any(species.table.index))
#       {
#         cl.table[cl.table.index, "TK.Ratio.InVivo"] <-
#         signif(species.data[species.table.index, "ClLphpkgbw"] /
#         human.data[human.table.index, "ClLphpkgbw"], 4)
# 
#         cl.table[cl.table.index, "TK.Ratio.HTTK"] <-
#         signif(species.data[species.table.index, "HTTK.Cltot.Lphpkgbw"] /
#         human.data[human.table.index, "HTTK.Cltot.Lphpkgbw"], 4)
# 
#         cl.table[cl.table.index, "TK.Ratio.ML"] <-
#         signif(species.data[species.table.index, "MachineLearning.Cltot.Lphpkgbw"] /
#         human.data[human.table.index, "MachineLearning.Cltot.Lphpkgbw"], 4)
# 
#         cl.table[cl.table.index, "TK.Ratio.HTTKPFAS"] <-
#         signif(species.data[species.table.index, "HTTKPFAS.Cltot.Lphpkgbw"] /
#         human.data[human.table.index, "HTTKPFAS.Cltot.Lphpkgbw"], 4)
#       }
#     }
#   }
# }
# cat(paste0(dim(cl.table)[1],"rows of in vivo clearance data.\n"))

## ----cl_lit_Data, eval = execute.vignette-------------------------------------
# huh.data <- as.data.frame(read_excel(paste0(
#   LOC.WD,"Huh-2011-HumanCL.xlsx")))
# # Huh Data is in ml/min/kg:
# Human.CL.mLpminpkg <- sapply(huh.data[,4],function(x) signif(as.numeric(strsplit(x," ")[[1]][1]),3))
# huh.data$Human.CL.Lphpkg <- signif(Human.CL.mLpminpkg/1000*60, 3)
# huh.data$Rat.CL.Lphpkg <- signif(
#   as.numeric(huh.data$a)*(0.25)^as.numeric(huh.data$b)*60/1000, 3)
# 
# # List of chemicals where we have HTTK data:
# httk.chem.list <- get_cheminfo(model=NONPFAS.MODEL.USED,
#                                info="DTXSID",
#                                suppress.messages = TRUE)
# for (this.chem in sort(unique(huh.data[,"DTXSID"])))
#   if (!is.na(this.chem))
#   {
#     print(this.chem)
#     this.row <- which(huh.data[,"DTXSID"]==this.chem)
#     new.row <- cl.table[1,]
#     new.row[1,] <- NA
#     new.row[1, "DTXSID"] <- huh.data[this.row, "DTXSID"]
#     new.row[1, "Sex"] <- "Female"
#     new.row[1, "ClLphpkgbw"] <- huh.data[this.row, "Human.CL.Lphpkg"]
# 
#     new.row[1, "TK.Ratio.InVivo"] <-
#       huh.data[this.row, "Rat.CL.Lphpkg"] /
#       huh.data[this.row, "Human.CL.Lphpkg"]
#     if (this.chem %in% httk.chem.list)
#     {
#       print("found HTTK data")
#       # Calculate human clearance:
#       HTTK.Human.Cltot.Lphpkgbw <-
#         calc_total_clearance(dtxsid = this.chem,
#                              model=NONPFAS.MODEL.USED,
#                              species="Human",
#                              suppress.messages=TRUE)
#       # Add human row (with NA ratio):
#       new.row[1, "Species"] <- "Human"
#       new.row[1, "HTTK.Cltot.Lphpkgbw"] <- HTTK.Human.Cltot.Lphpkgbw
#       print("added human row")
#       cl.table <- rbind(cl.table,new.row)
#       # Make a row for rat:
#       new.row[1, "Species"] <- "Rat"
#       new.row[1, "ClLphpkgbw"] <- huh.data[this.row, "Rat.CL.Lphpkg"]
#       HTTK.Rat.Cltot.Lphpkgbw <-
#         calc_total_clearance(dtxsid = this.chem,
#                              model=NONPFAS.MODEL.USED,
#                              species="Rat",
#                              default.to.human=TRUE,
#                              suppress.messages=TRUE)
#       new.row[1, "HTTK.Cltot.Lphpkgbw"] <- HTTK.Rat.Cltot.Lphpkgbw
#       new.row[1, "TK.Ratio.HTTK"] <- signif(HTTK.Rat.Cltot.Lphpkgbw /
#                                               HTTK.Human.Cltot.Lphpkgbw, 4)
#       # Add rat row:
#       cl.table <- rbind(cl.table,new.row)
#       print("added rat row")
#     }
#   }
# cat(paste0(dim(cl.table)[1],"rows of in vivo clearance data.\n"))

## ----annotate_chemical_class, eval = execute.vignette-------------------------
# cat(paste0(dim(cl.table)[1],"rows of in vivo clearance data.\n"))
# cl.table$Class <- "PFAS"
# cl.table[!(cl.table$DTXSID %in%
#              subset(chem.physical_and_invitro.data,Chemical.Class=="PFAS")$DTXSID),
#          "Class"] <- "Non-PFAS"
# cl.table$Class <- factor(cl.table$Class, levels=c("PFAS","Non-PFAS"))
# cat(paste("Also in Figure 2 Panel B are clearances based upon",
#   sum(cl.table$Class!="PFAS")/2,
#   "non-PFAS chemicals\n"))
# cl.table <- merge(cl.table,subset(httk::dawson2023,DosingAdj=="Oral")[,
#                     c("DTXSID","Species","Sex","ClassPredFull","ClassModDomain")],
#                   by=c("DTXSID","Species","Sex"),
#                   all.x=TRUE)
# colnames(cl.table)[colnames(cl.table)=="ClassPredFull"] <- "ML.Class"
# save(cl.table, file=paste0(LOC.WD,"InVivo-CL-Table.RData"))
# write.table(cl.table, file=paste0(LOC.WD,"InVivo-CL-Table.txt"),sep="\t",row.names=FALSE,quote = FALSE)
# cat(paste0(dim(cl.table)[1],"rows of in vivo clearance data.\n"))

## ----fit_non_pfas_trend, eval = execute.vignette------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# nonpfasratiotrend <- lm(log10(TK.Ratio.InVivo)~log10(TK.Ratio.HTTK),
#                         data=subset(cl.table, Species=="Rat" & Class!="PFAS"))
# summary(nonpfasratiotrend)
# cat(paste0("For typical organic chemicals, HTTK in vitro data combined with ",
#            "species-specific physiological parameters predicts the Rat:Human ",
#            "clearance ratio with a root mean squared log10 error (RMSLE) of ",
#            signif(RMSLE(subset(cl.table,
#                                Species=="Rat" & Class!="PFAS"),
#                         "TK.Ratio.InVivo", "TK.Ratio.HTTK"),2),
#            ".\n"))

## ----fig_clhttk, eval = execute.vignette--------------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# fig.clhttk.invivo  <- ggplot(data=subset(cl.table, Class=="PFAS")) +
#   geom_point(aes(
#     x=HTTK.Cltot.Lphpkgbw,
#     y=ClLphpkgbw,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab("PFAS In Vivo Clearance (L/h/kg bw)") +
#   xlab("Default HTTK Clearance (L/h/kg bw)") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#    scale_color_viridis(discrete=4)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#   annotate("text", size=FONT.SIZE/3, x = 10^-4, y = 10^-1,
#            label = paste("RMSLE:",
#                          RMSLE(subset(cl.table, Class=="PFAS"),
#                                "HTTK.Cltot.Lphpkgbw","ClLphpkgbw")))+
#   theme(legend.title = element_text(size = FONT.SIZE/3),
#                legend.text = element_text(size = FONT.SIZE/2))
# 
# print(fig.clhttk.invivo)
# 

## ----fig_ratio_httk, eval = execute.vignette----------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# fig.ratio.httk  <- ggplot(data=subset(cl.table, Species=="Rat")) +
#   geom_point(aes(
#     x=TK.Ratio.HTTK,
#     y=TK.Ratio.InVivo,
#     color=Class,
#     shape=Class),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab("In Vivo Derived Human:Rat Clearance Ratio") +
#   xlab("Default HTTK Clearance Ratio") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE)) +
#   scale_color_viridis(discrete=2)
# 
# print(fig.ratio.httk)
# #  ggsave(paste0(LOC.WD,
# #    "Fig_rat_cl_ratio.tiff"),
# #         height =11, width =12, dpi = 300)

## ----multipanelclearancefigureforpaper, eval = execute.vignette---------------
# ggarrange(fig.clhttk.invivo, fig.ratio.httk,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           common.legend = FALSE, legend = "bottom")
# tiff(file = paste0(LOC.WD,"Fig_PFASRatioEval.tiff"),
#          height = 2.5*200, width = 5*200, compression = "lzw")
# ggarrange(fig.clhttk.invivo, fig.ratio.httk,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           hjust=-2.5,
#           common.legend = FALSE, legend = "top",
#           font.label=list(size=20))
# dev.off()

## ----summary_table, eval = execute.vignette-----------------------------------
# ratio.table <- data.frame()
# ratio.table["Non-PFAS In Vivo Clearance Ratio","Median"] <-
#   median(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS")$TK.Ratio.InVivo,na.rm=TRUE)
# ratio.table["Non-PFAS In Vivo Clearance Ratio","Min"] <-
#   min(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS")$TK.Ratio.InVivo,na.rm=TRUE)
# ratio.table["Non-PFAS In Vivo Clearance Ratio","Max"] <-
#   max(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS")$TK.Ratio.InVivo,na.rm=TRUE)
# ratio.table["Non-PFAS Default HTTK Clearance Ratio","Median"] <-
#   median(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS")$TK.Ratio.HTTK, na.rm=TRUE)
# ratio.table["Non-PFAS Default HTTK Clearance Ratio","Min"] <-
#   min(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS")$TK.Ratio.HTTK, na.rm=TRUE)
# ratio.table["Non-PFAS Default HTTK Clearance Ratio","Max"] <-
#   max(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS")$TK.Ratio.HTTK, na.rm=TRUE)
# ratio.table["Non-PFAS Default HTTK Clearance Ratio","RMSLE"] <-
#   RMSLE(subset(cl.table,
#                 Species=="Rat" &
#                   Class != "PFAS"),"TK.Ratio.HTTK", "TK.Ratio.InVivo")
# ratio.table["PFAS In Vivo Clearance Ratio","Median"] <-
#   median(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS")$TK.Ratio.InVivo,na.rm=TRUE)
# ratio.table["PFAS In Vivo Clearance Ratio","Min"] <-
#   min(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS")$TK.Ratio.InVivo,na.rm=TRUE)
# ratio.table["PFAS In Vivo Clearance Ratio","Max"] <-
#   max(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS")$TK.Ratio.InVivo, na.rm=TRUE)
# ratio.table["PFAS Default HTTK Clearance Ratio","Median"] <-
#   median(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS")$TK.Ratio.HTTK, na.rm=TRUE)
# ratio.table["PFAS Default HTTK Clearance Ratio","Min"] <-
#   min(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS")$TK.Ratio.HTTK, na.rm=TRUE)
# ratio.table["PFAS Default HTTK Clearance Ratio","Max"] <-
#   max(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS")$TK.Ratio.HTTK, na.rm=TRUE)
# ratio.table["PFAS Default HTTK Clearance Ratio","RMSLE"] <-
#   RMSLE(subset(cl.table,
#                 Species=="Rat" &
#                   Class == "PFAS"), "TK.Ratio.HTTK", "TK.Ratio.InVivo")
# ratio.table <- signif(ratio.table, 2)
# write.table(ratio.table, file=paste0(LOC.WD,"Ratio-Table.txt"),
#             sep="\t")
# kable(ratio.table)
# 
# cat(paste0("RMSLE is much larger for PFAS (",
#            ratio.table["PFAS Default HTTK Clearance Ratio","RMSLE"],
#            " = ",
#            signif(10^ratio.table["PFAS Default HTTK Clearance Ratio","RMSLE"],2),
#            "-fold error) than for non-PFAS chemicals (",
#            ratio.table["Non-PFAS Default HTTK Clearance Ratio","RMSLE"],
#            " = ",
#            signif(10^ratio.table["Non-PFAS Default HTTK Clearance Ratio","RMSLE"],2),
#            "-fold error).\n"))

## ----make_ml_clint_table, eval = execute.vignette-----------------------------
# load(paste0(LOC.WD,"Clint-Fup-Values.RData"))
# clint.fup.pfas[is.na(clint.fup.pfas$Human.Clint.pValue),"Human.Clint.pValue"] <- 0
# ml.clint.table <- data.frame()
# ml.clint.table["Class 1", "Description"] <- "Rapidly Cleared"
# ml.clint.table["Class 2", "Description"] <- "Moderate"
# ml.clint.table["Class 3", "Description"] <- "Slow"
# ml.clint.table["Class 4", "Description"] <- "Very Slow"
# ml.clint.table["Class 1", "Half-Life"] <- "< 12 h"
# ml.clint.table["Class 2", "Half-Life"] <- "< 1 week"
# ml.clint.table["Class 3", "Half-Life"] <- "< 2 months"
# ml.clint.table["Class 4", "Half-Life"] <- "> 2 months"
# ml.clint.table["Class 1", "Number Crizer (2024) Chemicals"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==1))[1]
# ml.clint.table["Class 2", "Number Crizer (2024) Chemicals"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==2))[1]
# ml.clint.table["Class 3", "Number Crizer (2024) Chemicals"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==3))[1]
# ml.clint.table["Class 4", "Number Crizer (2024) Chemicals"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==4))[1]
# ml.clint.table["Class 1", "Number Non-Zero Clint"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==1 &
#              Human.Clint > 0 &
#              Human.Clint.pValue < 0.05))[1]
# ml.clint.table["Class 2", "Number Non-Zero Clint"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==2 &
#              Human.Clint > 0 &
#              Human.Clint.pValue < 0.05))[1]
# ml.clint.table["Class 3", "Number Non-Zero Clint"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==3 &
#              Human.Clint > 0 &
#              Human.Clint.pValue < 0.05))[1]
# ml.clint.table["Class 4", "Number Non-Zero Clint"] <-
#   dim(subset(clint.fup.pfas,
#              ML.Class==4 &
#              Human.Clint > 0 &
#              Human.Clint.pValue < 0.05))[1]
# write.table(ml.clint.table, file=paste0(LOC.WD,
#                                         "ML-Clint-table.txt"), sep="\t")
# kable(ml.clint.table)

## ----rat_human_scaling, eval = execute.vignette-------------------------------
# half.lives <- NULL
# for (this.chem in new.pfas)
# {
#   new.row <- data.frame(DTXSID=this.chem,
#                         Human.HL=try(calc_half_life(
#                           dtxsid=this.chem,
#                           species="human",
#                           model="sumclearances",
#                           class.exclude=FALSE,
#                           physchem.exclude=FALSE,
#                           suppress.messages=TRUE)),
#                         Rat.HL=try(calc_half_life(
#                           dtxsid=this.chem,
#                           species="rat",
#                           model="sumclearances",
#                           default.to.human=TRUE,
#                           class.exclude=FALSE,
#                           physchem.exclude=FALSE,
#                           suppress.messages=TRUE)))
#   new.row$Type <- "HTTK"
#   half.lives <- rbind(half.lives, new.row)
#   param.human <- try(parameterize_pfas1comp(dtxsid=this.chem,
#                                   species="human",
#                                   suppress.messages=TRUE))
#   param.rat <- try(parameterize_pfas1comp(dtxsid=this.chem,
#                                   species="rat",
#                                   suppress.messages=TRUE))
#   if ("kelim" %in% names(param.human) &
#       "kelim" %in% names(param.rat))
#     new.row <- data.frame(DTXSID=this.chem,
#                         Human.HL=log(2)/param.human$kelim,
#                         Rat.HL=log(2)/param.rat$kelim)
#   new.row$Type <- "ML"
#   half.lives <- rbind(half.lives, new.row)
# }
# half.lives$Ratio <- half.lives$Human.HL / half.lives$Rat.HL
# save(half.lives,file=paste0(LOC.WD,"All-Half-Lives.RData"))

## ----half_life_fig, eval = execute.vignette-----------------------------------
# load(paste0(LOC.WD,"All-Half-Lives.RData"))
# fig.species.scaling <- ggplot(data=half.lives) +
#   geom_histogram(aes(
#     x=Ratio,
#     colour=Type,
#     fill=Type),
#     bins=10) +
#   xlab("Human:Rat PK Half-Life Ratio") +
#   scale_x_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#         scale_fill_viridis(discrete=2)+
#         scale_color_viridis(discrete=2)
# 
# 
# print(fig.species.scaling)

## ----wallis_2023_halflives, eval = execute.vignette---------------------------
# Dawson.known.human.HLs <- c(
# "DTXSID1037303",
# "DTXSID1037303",
# "DTXSID3031860",
# "DTXSID3031860",
# "DTXSID3031862",
# "DTXSID3031862",
# "DTXSID3031864",
# "DTXSID3031864",
# "DTXSID4059916",
# "DTXSID4059916",
# "DTXSID5030030",
# "DTXSID5030030",
# "DTXSID7040150",
# "DTXSID7040150",
# "DTXSID70880215",
# "DTXSID70880215",
# "DTXSID8031863",
# "DTXSID8031863",
# "DTXSID8031865",
# "DTXSID8031865",
# "DTXSID80892506",
# "DTXSID80892506"
# )
# 
# wallis <- as.data.frame(read_excel(paste0(LOC.WD,"Wallis-2023-PFASHL.xlsx")))
# wallis$HL.Low <- sapply(wallis[,4],function(x) as.numeric(strsplit(x,", ")[[1]][1]))
# wallis$HL.High <- sapply(wallis[,4],function(x) as.numeric(strsplit(x,", ")[[1]][2]))
# 
# abraham <- as.data.frame(read_excel(paste0(LOC.WD,"Abraham-2024-PFASHL.xlsx")))
# abraham$HL.Low <- sapply(abraham[,"Halflife.95.d"],function(x) as.numeric(strsplit(x,"–")[[1]][1]))
# abraham$HL.High <- sapply(abraham[,"Halflife.95.d"],function(x) as.numeric(strsplit(x,"–")[[1]][2]))
# abraham$HL.High[is.na(abraham$HL.High)] <- abraham$HL.Low*100
# 
# wallis$Reference <- "Wallis 2023"
# abraham$Reference <- "Abraham 2024"
# 
# human.invivo.hl <- wallis[,c("PREFERRED_NAME","DTXSID","CASRN","Reference","HL.Low","HL.High")]
# human.invivo.hl <- rbind(human.invivo.hl,
#                          abraham[,c("PREFERRED_NAME","DTXSID","CASRN","Reference","HL.Low","HL.High")])
# human.invivo.hl$Status <- "Novel"
# human.invivo.hl[human.invivo.hl$DTXSID %in% Dawson.known.human.HLs, "Status"] <- "Known"
# 
# for (this.chem in human.invivo.hl$DTXSID)
# {
#   param.human <- try(parameterize_pfas1comp(dtxsid=this.chem,
#                                   species="human",
#                                   suppress.messages=TRUE,
#                                   restrict.doa="none"))
#   if ("kelim" %in% names(param.human))
#     human.invivo.hl[human.invivo.hl$DTXSID == this.chem, "HL.ML"] <- log(2)/param.human$kelim/24
# }

## ----results_from_new_model_run, eval = execute.vignette----------------------
# human.invivo.hl[human.invivo.hl$DTXSID=="DTXSID90723993","HL.ML"] <- 29000/24
# human.invivo.hl[human.invivo.hl$DTXSID=="DTXSID50723994","HL.ML"] <- 29000/24
# human.invivo.hl[human.invivo.hl$DTXSID=="DTXSID20892348","HL.ML"] <- 53/24
# for (this.col in c("HL.Low","HL.High","HL.ML"))
# {
#   temp.col <- human.invivo.hl[, this.col]
#   bad.rows <- is.na(temp.col)
#   temp.col[bad.rows] <- -1
#   new.col <- paste0(this.col,".Class")
#     human.invivo.hl[, new.col] <- 4
#   human.invivo.hl[temp.col < 60,
#        new.col] <- 3
#   human.invivo.hl[temp.col < 7,
#        new.col] <- 2
#   human.invivo.hl[temp.col< 0.5,
#        new.col] <- 1
#   human.invivo.hl[bad.rows, new.col] <- NA
# }
# human.invivo.hl$Agree <- human.invivo.hl$HL.Low.Class ==
#                            human.invivo.hl$HL.ML.Class |
#                          human.invivo.hl$HL.High.Class ==
#                            human.invivo.hl$HL.ML.Class
# human.invivo.hl$Underestimate <- human.invivo.hl$HL.High.Class <
#                            human.invivo.hl$HL.ML.Class

## ----human_hl_stats, eval = execute.vignette----------------------------------
# human.invivo.hl$HL.GeoMean <- exp((log(human.invivo.hl$HL.Low)+log(human.invivo.hl$HL.High))/2)
# fit.overall <- lm(log10(HL.GeoMean) ~ log10(HL.ML),
#                   data=human.invivo.hl)
# fit.known <- lm(log10(HL.GeoMean) ~ log10(HL.ML),
#                 data=subset(human.invivo.hl,Status=="Known"))
# fit.novel <- lm(log10(HL.GeoMean) ~log10(HL.ML),
#                 data=subset(human.invivo.hl,Status=="Novel"))
# hl.known <- subset(human.invivo.hl,Status=="Known")
# RMSLE.known <- (mean((log10(hl.known$HL.GeoMean) - log10(hl.known$HL.ML))^2,na.rm=TRUE))^(1/2)
# agree.known <- sum(hl.known$Agree)/dim(hl.known)[1]
# under.known <- sum(hl.known$Underestimate)/dim(hl.known)[1]
# hl.novel <- subset(human.invivo.hl,Status=="Novel" & !is.na(HL.ML))
# RMSLE.novel <- (mean((log10(hl.novel$HL.GeoMean) - log10(hl.novel$HL.ML))^2,na.rm=TRUE))^(1/2)
# agree.novel <- sum(hl.novel$Agree)/dim(hl.novel)[1]
# agree.count.novel <- sum(hl.novel$Agree)
# under.novel <- sum(hl.novel$Underestimate)/dim(hl.known)[1]
# under.count.novel <- sum(hl.novel$Underestimate)
# num.novel.pfas.hl <- dim(hl.novel)[1]
# cat(paste(num.novel.pfas.hl,"novel PFAS human half-lives.\n"))
# save(human.invivo.hl,
#      hl.known,
#      hl.novel,
#      file=paste0(LOC.WD,"Human-Invivo-HL.RData"))

## ----human_hl_eval_fig, eval = execute.vignette-------------------------------
# load(paste0(LOC.WD,"Human-Invivo-HL.RData"))
# set.seed(1234)
# human.invivo.hl$HL.ML.jitter <- human.invivo.hl$HL.ML*(1.0+runif(dim(human.invivo.hl)[1],-0.5,0.5))
# 
# human.invivo.hl.fig <- ggplot(data=human.invivo.hl) +
#   geom_segment(aes(
#     x=HL.ML.jitter,
#     xend=HL.ML.jitter,
#     y=HL.Low,
#     yend=HL.High,
#     colour=Status),
#     linewidth=3,
#     alpha=0.5) +
#   geom_point(aes(
#     x=HL.ML.jitter,
#     y=HL.GeoMean,
#     shape=Reference,
#     colour=Status),
#     size=2) +
#     geom_abline(slope=1,intercept=0,linetype="dashed")+
#   xlab("Machine Learning Half-Life Prediction") +
#   ylab("In Vivo Measurment") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#   theme_bw()  +
#   annotate("text", x = 5, y = 10^4, size=6, hjust=0,
#            label = paste("RMSLE for ",
#                          num.novel.pfas.hl,
#                          " novel PFAS: ",
#                          signif(RMSLE.novel,2),"\n"#,
#                          #agree.count.novel,
#                          #" agree\n",
#                          #under.count.novel,
#                          #" underestimated",
#                          ,sep=""))+
#   theme( text  = element_text(size=FONT.SIZE))+
#         scale_color_viridis(discrete=2)+
#     theme(legend.title = element_text(size = FONT.SIZE/3),
#                legend.text = element_text(size = FONT.SIZE/2))
# 
# print(human.invivo.hl.fig)
# 
# #tiff(file = paste0(LOC.WD,"InVivoComparison.tiff"),
# #     width=9,
# #     height=7,
# #     units="in",
# #     res=300)
# #print(human.invivo.hl.fig)
# #dev.off()
# 
# subset(human.invivo.hl, is.na(HL.ML))

## ----multipanelMLfigureforpaper, eval = execute.vignette----------------------
# ggarrange(fig.species.scaling, human.invivo.hl.fig,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           common.legend = FALSE, legend = "bottom")
# tiff(file = paste0(LOC.WD,"Fig_MLModelEval.tiff"),
#          height = 2.5*200, width = 5*200, compression = "lzw")
# ggarrange(fig.species.scaling, human.invivo.hl.fig,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           hjust=-2.5,
#           common.legend = FALSE, legend = "top",
#           font.label=list(size=20))
# dev.off()

## ----fig_clml, eval = execute.vignette----------------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# fig.clml.invivo  <- ggplot(data=subset(cl.table, Class=="PFAS")) +
#   geom_jitter(aes(
#     x=MachineLearning.Cltot.Lphpkgbw,
#     y=ClLphpkgbw,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE,
#     height=0,width=0.1) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab("In Vivo Clearance (L/h/kg bw)") +
#   xlab("Machine Learning Clearance (L/h/kg bw)") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#    scale_color_viridis(discrete=4)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#   annotate("text", x = 3*10^-5, y = 10^-1,size=FONT.SIZE/3,
#            label = paste("RMSLE:",
#                          RMSLE(cl.table,
#                                "MachineLearning.Cltot.Lphpkgbw",
#                                "ClLphpkgbw")))
# 
# print(fig.clml.invivo)

## ----fig_ratio_ml, eval = execute.vignette------------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# fig.ratio.ml  <- ggplot(data=subset(cl.table, Species!="Human" & Class=="PFAS")) +
#   geom_point(aes(
#     x=TK.Ratio.ML,
#     y=TK.Ratio.InVivo,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab("In Vivo Observed Clearance Ratio") +
#   xlab("Machine Learning Predicted Clearance Ratio") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#    scale_color_viridis(discrete=3)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))
# 
# print(fig.ratio.ml)
#   ggsave(paste0(LOC.WD,
#     "Fig_ml_cl_ratio.tiff"),
#          height = 11, width = 12, dpi = 300)

## ----fig_clhttkpfas, eval = execute.vignette----------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# fig.clhttkpfas.invivo  <- ggplot(data=subset(cl.table, Class=="PFAS")) +
#   geom_point(aes(
#     x=HTTKPFAS.Cltot.Lphpkgbw,
#     y=ClLphpkgbw,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab("PFAS In Vivo Clearance (L/h/kg bw)") +
#   xlab("HTTK-PFAS Clearance (L/h/kg bw)") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#    scale_color_viridis(discrete=4)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))+
#   annotate("text", size=FONT.SIZE/3, x = 10^-6*10, y = 10^-1,
#            label = paste("RMSLE:",
#                          RMSLE(subset(cl.table, Class=="PFAS"),
#                                "HTTKPFAS.Cltot.Lphpkgbw","ClLphpkgbw")))+
#   theme(legend.title = element_text(size = FONT.SIZE/3),
#                legend.text = element_text(size = FONT.SIZE/2))
# 
# print(fig.clhttkpfas.invivo)
# 

## ----multipanelclearancenewmodelfigureforpaper, eval = execute.vignette-------
# multiclerancenewmodel <- ggarrange(fig.clml.invivo+ rremove("ylab") , fig.clhttkpfas.invivo+ rremove("ylab") ,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           common.legend = TRUE, legend = "bottom")
# multiclerancenewmodel <- annotate_figure(multiclerancenewmodel ,
#                 left = textGrob(expression(paste("PFAS ",
#                   italic("In Vivo"),
#                   "Clearance (L/h/kg bw)")),
#                                 rot = 90, vjust = 1, gp = gpar(cex = 1.75)),
#                 )
# print(multiclerancenewmodel)
# tiff(file = paste0(LOC.WD,"Fig_PFASClearanceEval.tiff"),
#          height = 2.5*200, width = 5*200, compression = "lzw")
# print(multiclerancenewmodel)
# dev.off()

## ----fig_ratio_httkpfas, eval = execute.vignette------------------------------
# load(paste0(LOC.WD,"InVivo-CL-Table.RData"))
# fig.ratio.httkpfas  <- ggplot(data=subset(cl.table, Species!="Human" & Class=="PFAS")) +
#   geom_point(aes(
#     x=TK.Ratio.HTTKPFAS,
#     y=TK.Ratio.InVivo,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab(expression(paste(italic("In Vivo"),
#     "Observed Clearance Ratio"))) +
#   xlab("HTTK-PFAS Predicted Clearance Ratio") +
#   scale_x_log10(label=scientific_10)+
#   scale_y_log10(label=scientific_10)+
#   scale_color_viridis(discrete=3)+
#   annotate("text", size=FONT.SIZE/3, x = 5*10^2, y = 3,
#            label = paste("RMSLE:",
#                          RMSLE(subset(cl.table, Species!="Human" & Class=="PFAS"),
#                                "TK.Ratio.HTTKPFAS","TK.Ratio.InVivo")))+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))
# 
# print(fig.ratio.httkpfas)
#   ggsave(paste0(LOC.WD,
#     "Fig_httkpfas_cl_ratio.tiff"),
#          height = 5*1.5, width = 6*1.5, dpi = 300)

## ----MakeSupTableChange, eval = execute.vignette------------------------------
# pfas.httk.table <- get_2023pfasinfo(info="all")
# for (this.chem in pfas.httk.table$DTXSID)
# {
#   this.row <- pfas.httk.table$DTXSID==this.chem
#   pfas.httk.table[this.row, "Css.HTTK.Classic"] <- try(calc_analytic_css(
#                                                 dtxsid=this.chem,
#                                                 class.exclude=FALSE,
#                                                 physchem.exclude=FALSE,
#                                                 suppress.messages=TRUE,
#                                                 model="3compartmentss"))
#   pfas.httk.table[this.row, "Css.HTTK.Exhale"] <- try(calc_analytic_css(
#                                                 dtxsid=this.chem,
#                                                 class.exclude=FALSE,
#                                                 suppress.messages=TRUE,
#                                                 model="sumclearances"))
#   pfas.httk.table[this.row, "Css.HTTK.PFAS"] <- try(calc_analytic_css(
#                                                 dtxsid=this.chem,
#                                                 suppress.messages=TRUE,
#                                                 model="sumclearancespfas"))
#   pfas.httk.table[this.row, "Css.ML"] <- try(calc_analytic_css(
#                                                 dtxsid=this.chem,
#                                                 suppress.messages=TRUE,
#                                                 model="pfas1compartment"))
#     fractions <- try(calc_clearance_frac(dtxsid=this.chem,
#                                        model="sumclearancespfas",
#                                        suppress.messages=TRUE,
#                                       fraction.params = c("Clint",
#                                                           "Qgfrc",
#                                                           "Qalvc")))
#   if (!is(fractions, "try-error"))
#   {
#     pfas.httk.table[this.row, "Frac.Metabolism"] <- fractions[["Clint"]]
#     pfas.httk.table[this.row, "Frac.Glomerular"] <- fractions[["Qgfrc"]]
#     pfas.httk.table[this.row, "Frac.Exhalation"] <- fractions[["Qalvc"]]
#   }
# }
# pfas.httk.table$Css.HTTK.Classic <- as.numeric(pfas.httk.table$Css.HTTK.Classic)
# pfas.httk.table$Css.HTTK.Exhale <- as.numeric(pfas.httk.table$Css.HTTK.Exhale)
# pfas.httk.table$Css.HTTK.PFAS <- as.numeric(pfas.httk.table$Css.HTTK.PFAS)
# pfas.httk.table$Css.ML <- as.numeric(pfas.httk.table$Css.ML)
# pfas.httk.table$Css.Exhale.FoldChange <-
#   signif(pfas.httk.table$Css.HTTK.Exhale/pfas.httk.table$Css.HTTK.Classic, 3)
# pfas.httk.table$Css.PFAS.FoldChange <-
#   signif(pfas.httk.table$Css.HTTK.PFAS/pfas.httk.table$Css.HTTK.Classic, 3)
# pfas.httk.table <-
#   merge(pfas.httk.table,subset(httk::dawson2023,
#                                Species=="Human" &
#                                Sex=="Female" &
#                                DosingAdj=="Oral")[
#   ,c("DTXSID","ClassPredFull")],by="DTXSID",all.x=TRUE)
# colnames(pfas.httk.table)[colnames(pfas.httk.table)=="ClassPredFull"] <- "ML.Class"
# pfas.httk.table$InVitroMetabolism <- "No"
# clint.pvalue <- pfas.httk.table$Human.Clint.pValue
# clint.pvalue[is.na(clint.pvalue)] <- 0
# pfas.httk.table[pfas.httk.table$Human.Clint > 0 &
#                 clint.pvalue < 0.05,
#                 "InVitroMetabolism"] <- "Yes"
# 
# 
# write.table(pfas.httk.table,
#           row.names=FALSE,
#           quote=FALSE,
#           sep="\t",
#           file=paste(LOC.WD,"new-pfas-css-uM.txt",sep=""))
# save(pfas.httk.table,file=paste0(LOC.WD,"PFAS-Css-Table.RData"))

## ----cssfigpanela, eval = execute.vignette------------------------------------
# load(paste0(LOC.WD,"PFAS-Css-Table.RData"))
# pfas.httk.table$ML.Class <- as.factor(pfas.httk.table$ML.Class)
# fig.css.httk  <- ggplot(data=subset(pfas.httk.table,!is.na(ML.Class))) +
#   geom_point(aes(
#     x=Css.HTTK.Classic,
#     y=Css.HTTK.PFAS,
#     shape=InVitroMetabolism,
#     color=ML.Class
#     ),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
#   geom_abline(slope=1,intercept=1,linetype="dotted", color="blue")+
#   geom_abline(slope=1,intercept=-1,linetype="dotted", color="blue") +
#   ylab(expression(paste("HTTK-PFAS",
#                          ~C[ss],
#                          " (",
#                          mu,
#                          "M)"))) +
#   xlab(expression(paste("Default HTTK",
#                          ~C[ss],
#                          " (",
#                          mu,
#                          "M)"))) +
#   scale_x_log10(limits=c(10^-2,10^6),label=scientific_10)+
#   scale_y_log10(limits=c(10^-2,10^6),label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE)) +
#   scale_color_viridis(discrete=3)+
#   guides(color=guide_legend(title="ML Halflife Class"),
#          shape=guide_legend(title="In Vitro Metabolism"))
# 
# print(fig.css.httk)
# #  ggsave(paste0(LOC.WD,
# #    "Fig_httkpfas_cl_ratio.tiff"),
# #         height = 11, width = 12, dpi = 300)

## ----cssfigpanelb, eval = execute.vignette------------------------------------
# load(paste0(LOC.WD,"PFAS-Css-Table.RData"))
# pfas.httk.table$ML.Class <- as.factor(pfas.httk.table$ML.Class)
# fig.css.ml  <- ggplot(data=subset(pfas.httk.table,!is.na(ML.Class))) +
#   geom_point(aes(
#     x=Css.ML,
#     y=Css.HTTK.PFAS,
#     shape=InVitroMetabolism,
#     color=ML.Class
#     ),
#     size=POINT.SIZE) +
#   geom_abline(slope=1,intercept=0,linetype="dashed")+
#   geom_abline(slope=1,intercept=1,linetype="dotted", color="blue")+
#   geom_abline(slope=1,intercept=-1,linetype="dotted", color="blue") +
#   ylab(expression(paste("HTTK-PFAS",
#                          ~C[ss],
#                          " (",
#                          mu,
#                          "M)"))) +
#   xlab(expression(paste("Machine Learning",
#                          ~C[ss],
#                          " (",
#                          mu,
#                          "M)"))) +
#   scale_x_log10(limits=c(10^-2,10^6),label=scientific_10)+
#   scale_y_log10(limits=c(10^-2,10^6),label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE)) +
#   scale_color_viridis(discrete=3)+
#   guides(color=guide_legend(title="ML Halflife Class"),
#          shape=guide_legend(title="In Vitro Metabolism"))
# 
# print(fig.css.ml)
# #  ggsave(paste0(LOC.WD,
# #    "Fig_httkpfas_cl_ratio.tiff"),
# #         height = 11, width = 12, dpi = 300)

## ----multipanelcssfigureforpaper, eval = execute.vignette---------------------
# multipanelcss <- ggarrange(fig.css.httk + rremove("ylab") ,
#                                    fig.css.ml + rremove("ylab") ,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           common.legend = TRUE, legend = "bottom")
# multipanelcss <- annotate_figure(multipanelcss,
#                 left = textGrob(expression(paste("HTTK-PFAS",
#                          ~C[ss],
#                          " (",
#                          mu,
#                          "M)")),
#                 rot = 90,
#                 vjust = 1,
#                 gp = gpar(cex = 1.75)),
#                 )
# print(multipanelcss)
# tiff(file = paste0(LOC.WD,"Fig_Css.tiff"),
#          height = 2.5*200, width = 5*200, compression = "lzw")
# print(multipanelcss)
# dev.off()

## ----text_for_paper, eval = execute.vignette----------------------------------
# cat("\nAbstract\n")
# cat(paste0("For typical organic chemicals, HTTK in vitro data combined with ",
#            "species-specific physiological parameters predicts the Rat:Human ",
#            "clearance ratio with a root mean squared log10 error (RMSLE) of ",
#            signif(RMSLE(subset(cl.table,
#                                Species=="Rat" & Class!="PFAS"),
#                         "TK.Ratio.InVivo", "TK.Ratio.HTTK"),2),
#            ".\n"))
# cat("\n")
# cat(paste0("With this and other modifications, HTTK methods for PFAS achieve ",
#            "improved prediction of interspecies clearance ratios, with an ",
#            "RMSLE of ",
#            signif(RMSLE(subset(cl.table, Species!="Human" & Class=="PFAS"),
#                         "TK.Ratio.HTTKPFAS","TK.Ratio.InVivo"),2),
#            ".\n"))
# 
# cat("\nResults: 3.2\n")
# cat(paste0("RMSLE is much larger for PFAS (",
#            ratio.table["PFAS Default HTTK Clearance Ratio","RMSLE"],
#            " = ",
#            signif(10^ratio.table["PFAS Default HTTK Clearance Ratio","RMSLE"],2),
#            "-fold error) than for non-PFAS chemicals (",
#            ratio.table["Non-PFAS Default HTTK Clearance Ratio","RMSLE"],
#            " = ",
#            signif(10^ratio.table["Non-PFAS Default HTTK Clearance Ratio","RMSLE"],2),
#            "-fold error).\n"))
# 
# cat("\nResults: 3.3\n")
# cat(paste0("The ML performs well on the ",
#            num.novel.pfas.hl,
#            " novel chemicals, with a RMSLE of ",
#             signif(RMSLE.novel,2),
#            ".\n"))
# cat("\n")
# cat(paste0("As shown in panel A of Figure 4, this model does well (RMSLE ",
#            RMSLE(cl.table,
#                  "MachineLearning.Cltot.Lphpkgbw",
#                  "ClLphpkgbw"),
#            ") at reproducing the data on which it was essentially trained.\n"))
# 
# cat("\nResults: 3.4\n")
# cat(paste0("The overall RMSLE is ",
#            RMSLE(subset(cl.table, Class=="PFAS"),
#                  "HTTKPFAS.Cltot.Lphpkgbw",
#                  "ClLphpkgbw"),
#            ".\n"))
# cat("\n")
# cat(paste0("HTTK-PFAS clearance ratios are improved, in contrast to Figure 2, with an RMSLE of ",
#            RMSLE(subset(cl.table, Species!="Human" & Class=="PFAS"),
#                  "TK.Ratio.HTTKPFAS","TK.Ratio.InVivo"),
#            ".\n"))

## ----firstpass, eval = execute.vignette---------------------------------------
# firstpass.table <- NULL
# for (this.chem in get_2023pfasinfo(info="dtxsid", suppress.messages=TRUE,
#                                    model=PFAS.MODEL.USED))
# {
#       params <- parameterize_sumclearances(dtxsid=this.chem,
#                                          suppress.messages=TRUE,
#                                          class.exclude=FALSE)
#       params2 <- parameterize_schmitt(dtxsid=this.chem,
#                                       suppress.messages=TRUE,
#                                       class.exclude=FALSE)
#       ion <- calc_ionization(
#         pH=7.4,
#         pKa_Donor=params2[["pKa_Donor"]],
#         pKa_Accept=params2[["pKa_Accept"]])
#       fraction_neutral <- ion$fraction_neutral
#       if (fraction_neutral < 0.5) params[['Rblood2plasma']] <- 0.5
#       else params[['Rblood2plasma']] <- 20
#   params[["Clmetabolismc"]] <- calc_hep_clearance(parameters=params,
#           hepatic.model='unscaled',
#           suppress.messages=TRUE)#L/h/kg body weight
#   this.row <- data.frame(DTXSID=this.chem,
#                          Fsystemic=calc_hep_bioavailability(
#                            parameters=params)
#   )
#   firstpass.table <- rbind(firstpass.table, this.row)
# }

## ----firstpass_fig, eval = execute.vignette-----------------------------------
# fig.firstpass  <- ggplot(data=firstpass.table) +
#   geom_histogram(aes(
#     x=Fsystemic),bins=10) +
#   ylab("Number of Chemicals") +
#   xlab("Fraction Systemically Available") +
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))
# 
# print(fig.firstpass)

## ----fig_clspecies_invivo, eval = execute.vignette----------------------------
# fig.clspecies.invivo  <- ggplot(data=cl.table) +
#   geom_point(aes(
#     x=Species,
#     y=ClLphpkgbw,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE) +
#   geom_line(aes(x=Species,
#               y=ClLphpkgbw,
#               group = DTXSID)) +
# #  geom_line(aes(x=logP,y=logPfit1),color="black") +
# #  geom_line(aes(x=logP,y=logPfit2),color="blue",linetype = "dashed") +
#   ylab("In Vivo Clearance (L/h/kg bw)") +
#   xlab("Species") +
#   scale_y_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))
# 
# print(fig.clspecies.invivo)

## ----fig_clspecies_httk, eval = execute.vignette------------------------------
# fig.clspecies.httk  <- ggplot(data=cl.table) +
#   geom_point(aes(
#     x=Species,
#     y=HTTK.Cltot.Lphpkgbw,
#     color=Species,
#     shape=Sex),
#     size=POINT.SIZE) +
# #  geom_segment(color="#69b3a2",
# #                  aes(
# #                    x=Species,
# #                    y=HTTK.Cltot.Lphpkgbw,
# #                    xend = c(tail(Species, n = -1), NA),
# #                    yend = c(tail(HTTK.Cltot.Lphpkgbw, n = -1), NA)
# #                  ))+
# geom_line(aes(x=Species,
#               y=HTTK.Cltot.Lphpkgbw,
#               group = DTXSID)) +
# ylab("HTTK Clearance (L/h/kg bw)") +
#   xlab("Species") +
#   scale_y_log10(label=scientific_10)+
#   theme_bw()  +
#   theme( text  = element_text(size=FONT.SIZE))
# 
# print(fig.clspecies.httk)

## ----clearance_fraction, eval = execute.vignette------------------------------
# FigClearanceFrac1<- ggplot(data=pfas.httk.table)+
#    geom_histogram(binwidth = 0.1,alpha=1,fill="Red",aes(Frac.Metabolism))+
#   xlab(" ") +
#   ylab(" ")+
#   ggtitle("Hepatic Metabolism")+
#   scale_x_continuous(limits=c(-.05,1.05))+
#   scale_y_log10(limits=c(0.1,100))
# FigClearanceFrac2<- ggplot(data=pfas.httk.table)+
#    geom_histogram(binwidth = 0.1,alpha=1,fill="Blue",aes(Frac.Glomerular))+
#   xlab(" ") +
#   ylab("Number of chemicals")+
#   ggtitle("Glomerular Filtration")+
#   scale_x_continuous(limits=c(-.05,1.05))+
#   scale_y_log10(limits=c(0.1,100))
# FigClearanceFrac3<- ggplot(data=pfas.httk.table)+
#    geom_histogram(binwidth = 0.1,alpha=1,fill="Green",aes(Frac.Exhalation))+
#   xlab("Fraction of Total Clearance") +
#   ylab(" ")+
#   ggtitle("Exhalation")+
#   scale_x_continuous(limits=c(-.05,1.05))+
#   scale_y_log10(limits=c(0.1,100))
# ggarrange(FigClearanceFrac1, FigClearanceFrac2, FigClearanceFrac3,
#           ncol = 1, nrow = 3,
#           common.legend = TRUE, legend = "right")
# tiff(file = paste0(LOC.WD,"Fig_FracClearance.tiff"),
#          height = 3*100, width = 5*100, compression = "lzw")
# ggarrange(FigClearanceFrac1, FigClearanceFrac2, FigClearanceFrac3,
#           ncol = 1, nrow = 3,
#           common.legend = TRUE, legend = "right")
# dev.off()

## ----two_panel_Css_histogram, eval = execute.vignette-------------------------
# Fig.Cssa <- ggplot(data=pfas.httk.table)+
#   geom_histogram(binwidth = 0.01,aes(Css.Exhale.FoldChange))+
#   xlab("Css Fold Change Due to Exhalation")
# Fig.Cssb <- ggplot(data=pfas.httk.table)+
#   geom_histogram(binwidth = 10,aes(Css.PFAS.FoldChange))+
#   xlab("Css Fold Change Due to PFAS Resorption")
# ggarrange(Fig.Cssa, Fig.Cssb,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           common.legend = TRUE, legend = "bottom")
# tiff(file = paste0(LOC.WD,"Fig_CssFoldChange.tiff"),
#          height = 3*100, width = 5*100, compression = "lzw")
# ggarrange(Fig.Cssa, Fig.Cssb,
#           labels = c("A", "B"),
#           ncol = 2, nrow = 1,
#           common.legend = TRUE, legend = "top")
# dev.off()