## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 10 ) ## ----include = FALSE, eval=FALSE---------------------------------------------- # library(devtools) # use_vignette("your-vignette-name") # build_vignettes() # getwd() # rmarkdown::render("Input-data-format.Rmd", output_dir = "../doc/") ## ----setup-------------------------------------------------------------------- library(Aerith) library(dplyr) ## ----ft1-reading-------------------------------------------------------------- rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith") demo_file <- tempfile(fileext = ".FT1") writeLines(readRDS(rds), demo_file) # Read all MS1 scans into memory # This approach is memory-intensive but provides fastest access for subsequent operations all_scans <- readAllScanMS1(demo_file) # Read a specific range of scans (scan numbers 1527-1550) # Recommended when you know the specific scans of interest scan_range <- readScansMS1(demo_file, 1527, 1550) # Read a single scan by scan number # Most memory-efficient for analyzing individual scans single_scan <- readOneScanMS1(demo_file, 1555) # Extract real scan data from the list structure # Converts internal format to user-friendly data structure processed_scan <- getRealScanFromList(all_scans[[88]]) plot(processed_scan) ## ----ft2-reading-------------------------------------------------------------- demo_file <- system.file("extdata", "demo.FT2", package = "Aerith") # Read all MS2 scans all_ms2_scans <- readAllScanMS2(demo_file) # Read specific scan range for MS2 data ms2_range <- readScansMS2(demo_file, 1399, 1500) # Read individual MS2 scan single_ms2_scan <- readOneScanMS2(demo_file, 1371) # Process and visualize MS2 spectrum processed_ms2_scan <- getRealScanFromList(all_ms2_scans[[128]]) plot(processed_ms2_scan) ## ----ft1-writing-------------------------------------------------------------- rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith") demo_file <- tempfile(fileext = ".FT1") writeLines(readRDS(rds), demo_file) # Read file header information (essential for maintaining file integrity) header <- readFTheader(demo_file) # Read all scans from the original file ft1_data <- readAllScanMS1(demo_file) # Create output directory output_dir <- tempdir() # Write subset containing first 10 scans # This preserves file format while reducing file size significantly writeAllScanMS1(header, ft1_data[1:10], file.path(output_dir, "demo10.FT1")) # Verify file creation subset_files <- list.files(output_dir, pattern = "demo10.FT1", full.names = TRUE) print(paste("Created subset file:", subset_files)) ## ----ft2-writing-------------------------------------------------------------- demo_file <- system.file("extdata", "demo.FT2", package = "Aerith") # Read header and scan data header <- readFTheader(demo_file) ft2_data <- readAllScanMS2(demo_file) # Create subset with first 10 MS2 scans output_dir <- tempdir() writeAllScanMS2(header, ft2_data[1:10], file.path(output_dir, "demo10.FT2")) # Confirm successful file creation subset_files <- list.files(output_dir, pattern = "demo10.FT2", full.names = TRUE) print(paste("Created MS2 subset file:", subset_files)) ## ----mzml-reading------------------------------------------------------------- # mzML support requires the mzR package demo_file <- system.file("extdata", "demo.mzML", package = "Aerith") # Read MS1 data from mzML file # mzML files can contain both MS1 and MS2 data in a single file mzml_ms1_data <- readMzmlMS1(demo_file) # Extract and visualize a specific MS1 scan ms1_spectrum <- getRealScan(16, mzml_ms1_data) plot(ms1_spectrum) # Read MS2 data from the same mzML file mzml_ms2_data <- readMzmlMS2(demo_file) # Extract and visualize a specific MS2 scan ms2_spectrum <- getRealScan(18, mzml_ms2_data) plot(ms2_spectrum) ## ----mgf-reading-------------------------------------------------------------- # MGF support requires the MSnbase package demo_file <- system.file("extdata", "demo.mgf", package = "Aerith") # Read MGF file containing MS2 spectra # MGF files typically contain only MS2 spectra with associated metadata mgf_data <- readMgf(demo_file) # Extract and visualize a specific spectrum selected_spectrum <- getRealScan(2688, mgf_data) plot(selected_spectrum) ## ----pepxml-reading----------------------------------------------------------- # pepXML parsing requires the mzR package demo_file <- system.file("extdata", "demo.pepXML", package = "Aerith") # Parse pepXML file to extract peptide identification results # This creates a structured data frame with all identification information pepxml_results <- readPepXMLtable(demo_file) # Display structure of the results str(pepxml_results) ## ----psm-reading-------------------------------------------------------------- demo_file <- system.file("extdata", "demo.psm.txt", package = "Aerith") # Read peptide-spectrum match results # Contains identification scores, modifications, and SIP-specific metrics psm_data <- readPSMtsv(demo_file) # Display key columns and data structure head(psm_data) ## ----protein-cluster-reading-------------------------------------------------- demo_file <- system.file("extdata", "demo.pro.cluster.txt", package = "Aerith") # Read protein clustering and grouping information # Essential for protein-level quantification and SIP analysis protein_clusters <- readPSMtsv(demo_file) # Examine protein grouping structure head(protein_clusters) ## ----sip-reading-------------------------------------------------------------- demo_file <- system.file("extdata", "demo.sip", package = "Aerith") # Read SIP analysis results containing isotope incorporation metrics # This file type is unique to SIP proteomics workflows sip_results <- readPSMtsv(demo_file) # Display SIP-specific columns head(sip_results) ## ----spe2pep-reading---------------------------------------------------------- target_file <- system.file("extdata", "demo_target.Spe2Pep.txt", package = "Aerith") # Read spectrum-to-peptide mapping files # These files link MS2 spectra to peptide identifications spe2pep_data <- readSpe2Pep(target_file) # Extract PSM information from the parsed data psm_from_spe2pep <- spe2pep_data$PSM # Display mapping structure head(psm_from_spe2pep) ## ----tic-analysis------------------------------------------------------------- # Analyze TIC from MS1 data rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith") demo_file <- tempfile(fileext = ".FT1") writeLines(readRDS(rds), demo_file) ms1_scans <- readAllScanMS1(demo_file) ms1_tic <- getTIC(ms1_scans) # Create TIC plot with specified retention time breaks # The breaks parameter allows customization of the x-axis for better visualization plotTIC(ms1_tic, seq(9, 10, by = 0.2)) # Analyze TIC from MS2 data demo_file <- system.file("extdata", "demo.FT2", package = "Aerith") ms2_scans <- readAllScanMS2(demo_file) ms2_tic <- getTIC(ms2_scans) # Plot MS2 TIC with the same retention time range for comparison plotTIC(ms2_tic, seq(9, 10, by = 0.2)) ## ----scan-frequency-analysis-------------------------------------------------- # Process MS2 data to extract retention time and precursor information demo_file <- system.file("extdata", "demo.FT2", package = "Aerith") ms2_scan_data <- readAllScanMS2(demo_file) ms2_retention_info <- getRetentionTimeAndPrecursorInfo(ms2_scan_data) # Process MS1 data for comparison rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith") demo_file <- tempfile(fileext = ".FT1") writeLines(readRDS(rds), demo_file) ms1_scan_data <- readAllScanMS1(demo_file) ms1_retention_info <- getRetentionTimeAndPrecursorInfo(ms1_scan_data) # Create combined scan frequency visualization # This plot shows the temporal distribution of MS1 and MS2 scans combined_plot <- plotScanFrequency(ms2_retention_info, binwidth = 0.1, breaks = seq(9, 10, by = 0.2) ) + plotScanFrequencyMS2(ms1_retention_info, binwidth = 0.1) print(combined_plot) ## ----precursor-analysis------------------------------------------------------- demo_file <- system.file("extdata", "demo.FT2", package = "Aerith") ms2_data <- readAllScanMS2(demo_file) precursor_info <- getRetentionTimeAndPrecursorInfo(ms2_data) # Create precursor m/z frequency plot # This visualization shows how precursors are distributed across m/z and retention time plotPrecursorMzFrequency(precursor_info, timeBinWidth = 0.1, x_breaks = seq(8, 11, by = 0.2) ) ## ----session-info------------------------------------------------------------- sessionInfo()