## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ORscraper) ## ----------------------------------------------------------------------------- if (!requireNamespace("readxl", quietly = TRUE)) { stop("The readxl package is required for this vignette, install it with install.packages('readxl').") } InputPath <- system.file("extdata", package = "ORscraper") files <- ORscraper::read_pdf_files(InputPath) genes_file <- system.file("extdata/Genes.xlsx", package = "ORscraper") genes <- readxl::read_excel(genes_file) mutations <- unique(genes$GEN) ## ----------------------------------------------------------------------------- lines <- ORscraper::read_pdf_content(files[1]) # Example with the first file head(lines) ## ----------------------------------------------------------------------------- diagnostic <- gender <- tumor_cell_percentage <- quality <- c() diagnostic <- extract_values_start_end(diagnostic, lines, ".*Diagnóstico:\\s") gender <- extract_values_start_end(gender, lines, ".*Sexo:\\s*") tumor_cell_percentage <- extract_values_start_end(tumor_cell_percentage, lines, ".*% células tumorales:\\s") quality <- extract_values_start_end(quality, lines, ".*CALIDAD DE LA MUESTRA /LIMITACIONES PARA SU ANÁLISIS:\\s") ## ----------------------------------------------------------------------------- NHC_Data <- NB_values <- dates <- textDiag <- c() NHC_Data <- extract_intermediate_values(NHC_Data, lines, "NHC:") NB_values <- extract_intermediate_values(NB_values, lines, "biopsia:") dates <- extract_intermediate_values(dates, lines, "Fecha:") textDiag <- extract_intermediate_values(textDiag, lines, "de la muestra:") ## ----------------------------------------------------------------------------- TableValues <- extract_values_from_tables(lines, mutations) mutateGenes <- TableValues[[1]] pathogenity <- TableValues[[2]] frequencies <- TableValues[[3]] codifications <- TableValues[[4]] changes <- TableValues[[5]] ## ----------------------------------------------------------------------------- fusions <- extract_fusions(lines, mutations) ## ----eval=FALSE--------------------------------------------------------------- # search_pathogenity <- search_ncbi_clinvar(pathogenity, mutateGenes, codifications) ## ----------------------------------------------------------------------------- pathogenic_mutations <- filter_pathogenic_only(pathogenity, mutateGenes) pathogenic_changes <- filter_pathogenic_only(pathogenity, changes) pathogenic_frequencies <- filter_pathogenic_only(pathogenity, frequencies) ## ----------------------------------------------------------------------------- biopsies_identifiers <- classify_biopsy(NB_values) ## ----------------------------------------------------------------------------- chips <- extract_chip_id(files)