## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6 ) ## ----setup-------------------------------------------------------------------- library(manureshed) ## ----custom_wwtp_basic, eval=FALSE-------------------------------------------- # # Use your WWTP files for 2020 # results_2020 <- run_builtin_analysis( # scale = "huc8", # year = 2020, # Agricultural data available 1987-2016 # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "nitrogen_2021.csv", # wwtp_load_units = "kg" # ) # # # 3. Check results # summarize_results(results_custom) # quick_check(results_custom) # # # 4. Create maps # nitrogen_map <- map_agricultural_classification( # results_custom$integrated$nitrogen, # "nitrogen", "combined_N_class", # "2021 Nitrogen with Custom WWTP Data" # ) # # save_plot(nitrogen_map, "custom_analysis_2021.png") # # # 5. Save results # save_spatial_data( # results_custom$integrated$nitrogen, # "nitrogen_2021_results.rds" # ) ## ----other_sources, eval=FALSE------------------------------------------------ # # Example: Adding industrial sources # industrial_data <- data.frame( # Facility_Name = c("Steel Plant A", "Chemical Plant B", "Food Processor C"), # Lat = c(41.5, 40.7, 42.1), # Long = c(-81.7, -82.1, -83.5), # N_Load_tons = c(50, 75, 30), # P_Load_tons = c(10, 15, 8), # Source_Type = "Industrial" # ) # # # Convert to spatial format # industrial_sf <- st_as_sf(industrial_data, # coords = c("Long", "Lat"), # crs = 4326) %>% # st_transform(5070) # Transform to analysis CRS # # # Aggregate by spatial units (example for HUC8) # boundaries <- load_builtin_boundaries("huc8") # industrial_aggregated <- wwtp_aggregate_by_boundaries( # industrial_sf, boundaries, "nitrogen", "huc8" # ) # # # Add to existing results # # (You would modify the integration functions to include industrial sources) ## ----time_periods, eval=FALSE------------------------------------------------- # # Create a time series dataset # years_to_analyze <- 2018:2022 # time_series_results <- list() # # for (year in years_to_analyze) { # # Check if you have WWTP data for this year # wwtp_file <- paste0("wwtp_nitrogen_", year, ".csv") # # if (file.exists(wwtp_file)) { # time_series_results[[as.character(year)]] <- run_builtin_analysis( # scale = "huc8", # year = year, # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = wwtp_file, # save_outputs = FALSE, # verbose = FALSE # ) # } else { # # Agricultural only if no WWTP data # time_series_results[[as.character(year)]] <- run_builtin_analysis( # scale = "huc8", # year = year, # nutrients = "nitrogen", # include_wwtp = FALSE, # save_outputs = FALSE, # verbose = FALSE # ) # } # } # # # Compare results across years # yearly_summary <- do.call(rbind, lapply(names(time_series_results), function(year) { # result <- time_series_results[[year]] # classes <- table(result$agricultural$N_class) # # data.frame( # Year = as.numeric(year), # Total_Units = sum(classes), # Source_Units = classes["Source"] %||% 0, # Sink_Units = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE), # Excluded_Units = classes["Excluded"] %||% 0 # ) # })) # # print(yearly_summary) ## ----custom_agricultural, eval=FALSE------------------------------------------ # # Example: Custom agricultural data format # custom_farm_data <- data.frame( # County_FIPS = c("39001", "39003", "39005"), # Manure_N_kg = c(100000, 150000, 200000), # Manure_P_kg = c(20000, 30000, 40000), # Fertilizer_N_kg = c(50000, 75000, 100000), # Fertilizer_P_kg = c(10000, 15000, 20000), # N_Fixation_kg = c(25000, 40000, 35000), # Crop_N_Removal_kg = c(80000, 120000, 140000), # Crop_P_Removal_kg = c(15000, 25000, 30000), # Cropland_acres = c(45000, 67000, 52000) # ) # # # Convert to package format # standardized_farm_data <- data.frame( # ID = custom_farm_data$County_FIPS, # NAME = paste("County", substr(custom_farm_data$County_FIPS, 3, 5)), # manure_N = custom_farm_data$Manure_N_kg, # manure_P = custom_farm_data$Manure_P_kg, # fertilizer_N = custom_farm_data$Fertilizer_N_kg, # fertilizer_P = custom_farm_data$Fertilizer_P_kg, # N_fixation = custom_farm_data$N_Fixation_kg, # crop_removal_N = custom_farm_data$Crop_N_Removal_kg, # crop_removal_P = custom_farm_data$Crop_P_Removal_kg, # cropland = custom_farm_data$Cropland_acres # ) # # # Apply classifications # custom_classified <- standardized_farm_data %>% # agri_classify_nitrogen(cropland_threshold = 1234, scale = "county") %>% # agri_classify_phosphorus(cropland_threshold = 1234, scale = "county") # # print(custom_classified) ## ----data_validation, eval=FALSE---------------------------------------------- # # Function to validate your custom data # validate_custom_data <- function(data, data_type = "wwtp") { # # issues <- list() # # if (data_type == "wwtp") { # # Check required columns # required_cols <- c("Facility_Name", "Lat", "Long") # missing_cols <- setdiff(required_cols, names(data)) # if (length(missing_cols) > 0) { # issues$missing_columns <- missing_cols # } # # # Check coordinate ranges (for US) # if ("Lat" %in% names(data) && "Long" %in% names(data)) { # invalid_coords <- sum(data$Lat < 24 | data$Lat > 50 | # data$Long < -125 | data$Long > -66, na.rm = TRUE) # if (invalid_coords > 0) { # issues$invalid_coordinates <- invalid_coords # } # } # # # Check for negative loads # load_cols <- names(data)[grepl("Load|load", names(data))] # for (col in load_cols) { # if (col %in% names(data)) { # negative_loads <- sum(data[[col]] < 0, na.rm = TRUE) # if (negative_loads > 0) { # issues[[paste0("negative_", col)]] <- negative_loads # } # } # } # } # # if (data_type == "agricultural") { # # Check for negative values in nutrient data # nutrient_cols <- c("manure_N", "manure_P", "fertilizer_N", "fertilizer_P", # "crop_removal_N", "crop_removal_P", "cropland") # # for (col in nutrient_cols) { # if (col %in% names(data)) { # negative_values <- sum(data[[col]] < 0, na.rm = TRUE) # if (negative_values > 0) { # issues[[paste0("negative_", col)]] <- negative_values # } # } # } # } # # if (length(issues) == 0) { # message("✓ Data validation passed") # } else { # message("⚠ Data validation issues found:") # for (issue in names(issues)) { # message(" ", issue, ": ", issues[[issue]]) # } # } # # return(issues) # } # # # Use the validation function # # validate_custom_data(your_wwtp_data, "wwtp") # # validate_custom_data(your_farm_data, "agricultural") ## ----export_results, eval=FALSE----------------------------------------------- # # Export results in different formats # export_analysis_results <- function(results, output_dir = "exports") { # # dir.create(output_dir, showWarnings = FALSE) # # # Export agricultural data as CSV # agri_csv <- st_drop_geometry(results$agricultural) # write.csv(agri_csv, file.path(output_dir, "agricultural_results.csv"), row.names = FALSE) # # # Export as shapefile (if sf package available) # if (requireNamespace("sf", quietly = TRUE)) { # st_write(results$agricultural, file.path(output_dir, "agricultural_results.shp"), # delete_dsn = TRUE, quiet = TRUE) # } # # # Export WWTP facilities if available # if ("wwtp" %in% names(results)) { # for (nutrient in names(results$wwtp)) { # facility_data <- results$wwtp[[nutrient]]$facility_data # write.csv(facility_data, # file.path(output_dir, paste0("wwtp_", nutrient, "_facilities.csv")), # row.names = FALSE) # } # } # # # Export integrated results if available # if ("integrated" %in% names(results)) { # for (nutrient in names(results$integrated)) { # integrated_csv <- st_drop_geometry(results$integrated[[nutrient]]) # write.csv(integrated_csv, # file.path(output_dir, paste0("integrated_", nutrient, "_results.csv")), # row.names = FALSE) # } # } # # message("Results exported to: ", output_dir) # } # # # Use the export function # # export_analysis_results(results_custom, "my_exports") ## ----troubleshooting_wwtp, eval=FALSE----------------------------------------- # # Common WWTP data problems and solutions # # # Problem 1: "No valid facilities remaining after cleaning" # # Solution: Check coordinates and load values # wwtp_data <- read.csv("your_wwtp_file.csv") # summary(wwtp_data) # Look for obvious issues # # # Check coordinate ranges # summary(wwtp_data$Latitude) # Should be ~24-50 for US # summary(wwtp_data$Longitude) # Should be ~-125 to -66 for US # # # Check load values # summary(wwtp_data$Load) # Should be positive numbers # # # Problem 2: Wrong coordinate system # # Solution: Make sure coordinates are in decimal degrees (not UTM) # # Example conversion if needed: # # library(sf) # # points_utm <- st_as_sf(data, coords = c("X_UTM", "Y_UTM"), crs = 32617) # # points_dd <- st_transform(points_utm, 4326) # # coords_dd <- st_coordinates(points_dd) # # # Problem 3: Mixed units in same file # # Solution: Standardize units before analysis # standardize_mixed_units <- function(data, load_col, unit_col) { # data$standardized_load <- ifelse( # data[[unit_col]] == "kg", data[[load_col]], # ifelse(data[[unit_col]] == "lbs", data[[load_col]] / 2.20462, # data[[load_col]] * 907.185) # assuming tons # ) # return(data) # } ## ----troubleshooting_agricultural, eval=FALSE--------------------------------- # # Common agricultural data problems # # # Problem: Impossible nutrient balances # # Solution: Check your units and conversion factors # check_nutrient_balance <- function(data) { # # Calculate simple checks # data$total_inputs_N <- data$manure_N + data$fertilizer_N + data$N_fixation # data$efficiency_N <- data$crop_removal_N / data$total_inputs_N # # # Flag suspicious values # suspicious <- data$efficiency_N > 2.0 | data$efficiency_N < 0.1 # # if (sum(suspicious, na.rm = TRUE) > 0) { # message("Warning: ", sum(suspicious, na.rm = TRUE), " units have suspicious N efficiency") # print(data[suspicious, c("ID", "total_inputs_N", "crop_removal_N", "efficiency_N")]) # } # # return(data) # } # # # Problem: Missing spatial boundaries # # Solution: Use built-in boundaries or provide your own # # county_boundaries <- load_builtin_boundaries("county") # # your_data_with_boundaries <- merge(your_data, county_boundaries, by.x = "FIPS", by.y = "FIPS") ## ----help, eval=FALSE--------------------------------------------------------- # # Get help on specific functions # ?load_user_wwtp # ?run_builtin_analysis # ?wwtp_clean_data # # # Check what data is available # check_builtin_data() # list_available_years() # # # Package health check # health_check() # # # Test data download connection # test_osf_connection() ## ----epa_standard, eval=FALSE------------------------------------------------- # # Standard EPA format (auto-detected) # results <- run_builtin_analysis( # scale = "county", # year = 2018, # nutrients = c("nitrogen", "phosphorus"), # include_wwtp = TRUE, # custom_wwtp_nitrogen = "nitrogen_2018.csv", # custom_wwtp_phosphorus = "phosphorus_2018.csv" # ) ## ----different_units, eval=FALSE---------------------------------------------- # # If your data uses pounds instead of kg # results <- run_builtin_analysis( # scale = "huc8", # year = 2019, # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "nitrogen_pounds_2019.csv", # wwtp_load_units = "lbs" # Converts automatically # ) # # # Other units: "kg", "lbs", "pounds", "tons" ## ----different_format, eval=FALSE--------------------------------------------- # # If headers are in different rows # results <- run_builtin_analysis( # scale = "county", # year = 2021, # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "nitrogen_2021.csv", # wwtp_skip_rows = 3, # Skip first 3 rows # wwtp_header_row = 1 # Headers in row 1 after skipping # ) ## ----custom_columns, eval=FALSE----------------------------------------------- # # If your columns have different names # custom_mapping <- list( # facility_name = "Plant_Name", # latitude = "Lat_DD", # longitude = "Long_DD", # pollutant_load = "Annual_Load_kg", # state = "State_Code" # ) # # results <- run_builtin_analysis( # scale = "huc8", # year = 2022, # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "custom_format.csv", # wwtp_column_mapping = custom_mapping # ) ## ----manual_wwtp, eval=FALSE-------------------------------------------------- # # Step 1: Load your WWTP file # wwtp_raw <- load_user_wwtp( # file_path = "nitrogen_2020.csv", # nutrient = "nitrogen", # load_units = "lbs" # ) # # # Step 2: Clean the data # wwtp_clean <- wwtp_clean_data(wwtp_raw, "nitrogen") # # # Step 3: Classify facilities by size # wwtp_classified <- wwtp_classify_sources(wwtp_clean, "nitrogen") # # # Step 4: Convert to spatial format # wwtp_spatial <- wwtp_to_spatial(wwtp_classified) # # # Step 5: Load boundaries and aggregate by spatial units # boundaries <- load_builtin_boundaries("huc8") # wwtp_aggregated <- wwtp_aggregate_by_boundaries( # wwtp_spatial, boundaries, "nitrogen", "huc8" # ) # # # Step 6: Integrate with agricultural data # agri_data <- load_builtin_nugis("huc8", 2020) # agri_processed <- agri_classify_complete(agri_data, "huc8") # # integrated <- integrate_wwtp_agricultural( # agri_processed, wwtp_aggregated, "nitrogen", # cropland_threshold = 1234, scale = "huc8" # ) ## ----unit_conversions, eval=FALSE--------------------------------------------- # # Convert between units # kg_loads <- c(1000, 2000, 5000) # tons_loads <- convert_load_units(kg_loads, "kg") # # lbs_loads <- c(2000, 4000, 10000) # tons_loads <- convert_load_units(lbs_loads, "lbs") # # print(data.frame( # Original_kg = kg_loads, # Converted_tons = convert_load_units(kg_loads, "kg") # )) ## ----p2o5_conversion, eval=FALSE---------------------------------------------- # # If you have P2O5 data, convert to P # p2o5_values <- c(100, 200, 300) # kg P2O5 # p_values <- p2o5_values * P2O5_TO_P # Convert to P # # print(paste("P2O5 to P conversion factor:", P2O5_TO_P)) ## ----county_data, eval=FALSE-------------------------------------------------- # # County analysis - make sure you have 5-digit FIPS codes # county_results <- run_builtin_analysis( # scale = "county", # year = 2016, # nutrients = "nitrogen" # ) # # # Your county WWTP data should have state and county info ## ----huc8_data, eval=FALSE---------------------------------------------------- # # HUC8 analysis - 8-digit watershed codes # huc8_results <- run_builtin_analysis( # scale = "huc8", # year = 2016, # nutrients = "nitrogen" # ) # # # Make sure HUC8 codes are 8 digits (add leading zero if needed) # huc8_codes <- c("4110001", "4110002") # 7 digits # formatted_codes <- format_huc8(huc8_codes) # Adds leading zero # print(formatted_codes) # "04110001", "04110002" ## ----huc2_data, eval=FALSE---------------------------------------------------- # # HUC2 analysis - 2-digit regional codes # huc2_results <- run_builtin_analysis( # scale = "huc2", # year = 2016, # nutrients = "nitrogen" # ) ## ----state_analysis, eval=FALSE----------------------------------------------- # # Analyze just one state # iowa_results <- run_state_analysis( # state = "IA", # scale = "county", # year = 2016, # nutrients = "nitrogen", # include_wwtp = TRUE # ) # # # With custom WWTP data # texas_results <- run_state_analysis( # state = "TX", # scale = "huc8", # year = 2020, # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "texas_wwtp_2020.csv" # ) ## ----multi_state, eval=FALSE-------------------------------------------------- # # Analyze several states # midwest_states <- c("IA", "IL", "IN", "OH") # state_results <- list() # # for (state in midwest_states) { # state_results[[state]] <- run_state_analysis( # state = state, # scale = "county", # year = 2016, # nutrients = "nitrogen", # include_wwtp = TRUE # ) # } ## ----data_validation2, eval=FALSE--------------------------------------------- # # Check your results make sense # quick_check(results) # # # Look at the classifications # table(results$agricultural$N_class) # # # Check WWTP facility counts # if ("wwtp" %in% names(results)) { # print(paste("WWTP facilities:", nrow(results$wwtp$nitrogen$facility_data))) # } ## ----data_issues, eval=FALSE-------------------------------------------------- # # Problem: Negative nutrient values # # Solution: Check your data source and units # # # Problem: All facilities excluded # # Solution: Check coordinate system (should be decimal degrees) # # # Problem: No WWTP facilities found # # Solution: Check facility coordinates are in correct format # # # Problem: Classification doesn't make sense # # Solution: Check cropland threshold and nutrient balance calculations ## ----time_series, eval=FALSE-------------------------------------------------- # # Analyze multiple years # years_to_analyze <- 2014:2016 # # batch_results <- batch_analysis_years( # years = years_to_analyze, # scale = "huc8", # nutrients = "nitrogen", # include_wwtp = TRUE # ) # # # With custom WWTP data for some years # custom_wwtp_files <- list( # "2018" = "nitrogen_2018.csv", # "2019" = "nitrogen_2019.csv", # "2020" = "nitrogen_2020.csv" # ) # # # Process each year with custom data # custom_results <- list() # for (year in names(custom_wwtp_files)) { # custom_results[[year]] <- run_builtin_analysis( # scale = "county", # year = as.numeric(year), # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = custom_wwtp_files[[year]] # ) # } ## ----eval=FALSE--------------------------------------------------------------- # # Organize your data files # # project_folder/ # # ├── wwtp_data/ # # │ ├── nitrogen_2018.csv # # │ ├── nitrogen_2019.csv # # │ └── phosphorus_2018.csv # # ├── results/ # # └── maps/ # # # Set working directory # setwd("project_folder") # # # Run analysis with organized files # results <- run_builtin_analysis( # scale = "huc8", # year = 2018, # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "wwtp_data/nitrogen_2018.csv", # output_dir = "results" # ) ## ----memory_management, eval=FALSE-------------------------------------------- # # For large datasets, clear cache periodically # clear_data_cache() # # # Check package health # health_check() # # # Free up memory between analyses # rm(large_results) # gc() ## ----complete_example, eval=FALSE--------------------------------------------- # # 1. Prepare your WWTP file (nitrogen_2021.csv) # # Make sure it has columns: Facility Name, Latitude, Longitude, Load (kg/yr), State # # # 2. Run analysis # results_custom <- run_builtin_analysis( # scale = "huc8", # year = 2021, # Agricultural data extrapolated to 2021 # nutrients = "nitrogen", # include_wwtp = TRUE, # custom_wwtp_nitrogen = "nitrogen_2021.csv", # wwtp_load_units = "kg" # ) # # # 3. Check results # summarize_results(results_custom) # quick_check(results_custom) # # # 4. Create maps # nitrogen_map <- map_agricultural_classification( # results_custom$integrated$nitrogen, # "nitrogen", "combined_N_class", # "2021 Nitrogen with Custom WWTP Data" # ) # # save_plot(nitrogen_map, "custom_analysis_2021.png") # # # 5. Save results # save_spatial_data( # results_custom$integrated$nitrogen, # "nitrogen_2021_results.rds" # ) ## ----troubleshooting_integration, eval=FALSE---------------------------------- # # Common integration problems # # # Problem: WWTP facilities not matching spatial units # # Solution: Check coordinate systems and boundary files # check_wwtp_spatial_match <- function(wwtp_data, boundaries) { # # # Convert WWTP to spatial # wwtp_sf <- st_as_sf(wwtp_data, coords = c("Long", "Lat"), crs = 4326) # wwtp_projected <- st_transform(wwtp_sf, st_crs(boundaries)) # # # Check spatial intersection # intersections <- st_intersects(wwtp_projected, boundaries) # # # Count facilities per spatial unit # matches <- lengths(intersections) # # message("WWTP spatial matching summary:") # message(" Total facilities: ", nrow(wwtp_data)) # message(" Facilities matched to boundaries: ", sum(matches > 0)) # message(" Facilities not matched: ", sum(matches == 0)) # # if (sum(matches == 0) > 0) { # unmatched <- wwtp_data[matches == 0, ] # message(" Check coordinates for unmatched facilities") # print(head(unmatched)) # } # # return(matches) # } # # # Problem: Scale mismatch between data sources # # Solution: Ensure consistent spatial scales # verify_scale_consistency <- function(agricultural_data, wwtp_data, scale) { # # message("Scale consistency check for: ", scale) # # # Check ID formats # if (scale == "county") { # # FIPS codes should be 5 digits # invalid_fips <- sum(nchar(agricultural_data$ID) != 5) # message(" Invalid FIPS codes: ", invalid_fips) # } else if (scale == "huc8") { # # HUC8 codes should be 8 digits # invalid_huc8 <- sum(nchar(agricultural_data$ID) != 8) # message(" Invalid HUC8 codes: ", invalid_huc8) # } # # # Check coordinate ranges # coord_summary <- list( # lat_range = range(wwtp_data$Lat, na.rm = TRUE), # long_range = range(wwtp_data$Long, na.rm = TRUE) # ) # # message(" WWTP coordinate ranges:") # message(" Latitude: ", paste(round(coord_summary$lat_range, 2), collapse = " to ")) # message(" Longitude: ", paste(round(coord_summary$long_range, 2), collapse = " to ")) # # return(coord_summary) # } ## ----file_organization, eval=FALSE-------------------------------------------- # # Recommended project structure # create_project_structure <- function(project_name) { # # # Create main directories # dir.create(project_name, showWarnings = FALSE) # dir.create(file.path(project_name, "data"), showWarnings = FALSE) # dir.create(file.path(project_name, "data", "wwtp"), showWarnings = FALSE) # dir.create(file.path(project_name, "data", "agricultural"), showWarnings = FALSE) # dir.create(file.path(project_name, "results"), showWarnings = FALSE) # dir.create(file.path(project_name, "maps"), showWarnings = FALSE) # dir.create(file.path(project_name, "exports"), showWarnings = FALSE) # # # Create README # readme_content <- paste( # "# Manureshed Analysis Project", # "", # "## Data Files", # "- data/wwtp/ - WWTP facility data files", # "- data/agricultural/ - Custom agricultural data", # "", # "## Outputs", # "- results/ - Analysis results (.rds files)", # "- maps/ - Generated maps (.png files)", # "- exports/ - Exported data (.csv, .shp files)", # "", # "## Analysis Scripts", # "- analysis.R - Main analysis script", # "", # sep = "\n" # ) # # writeLines(readme_content, file.path(project_name, "README.md")) # # message("Project structure created: ", project_name) # message("Add your data files to the appropriate folders") # } # # # Create organized project # # create_project_structure("my_nutrient_analysis") ## ----data_documentation, eval=FALSE------------------------------------------- # # Document your custom data sources # document_data_sources <- function(wwtp_files = NULL, agricultural_files = NULL, # output_file = "data_documentation.txt") { # # doc_content <- c( # "DATA SOURCES DOCUMENTATION", # "============================", # "", # "Analysis Date: ", as.character(Sys.Date()), # "Package Version: ", as.character(packageVersion("manureshed")), # "" # ) # # if (!is.null(wwtp_files)) { # doc_content <- c(doc_content, # "WWTP DATA FILES:", # "----------------" # ) # # for (file in wwtp_files) { # if (file.exists(file)) { # file_info <- file.info(file) # doc_content <- c(doc_content, # paste("File:", file), # paste(" Size:", round(file_info$size / 1024, 2), "KB"), # paste(" Modified:", file_info$mtime), # paste(" Rows:", nrow(read.csv(file))), # "" # ) # } # } # } # # if (!is.null(agricultural_files)) { # doc_content <- c(doc_content, # "AGRICULTURAL DATA FILES:", # "------------------------" # ) # # for (file in agricultural_files) { # if (file.exists(file)) { # file_info <- file.info(file) # doc_content <- c(doc_content, # paste("File:", file), # paste(" Size:", round(file_info$size / 1024, 2), "KB"), # paste(" Modified:", file_info$mtime), # paste(" Rows:", nrow(read.csv(file))), # "" # ) # } # } # } # # doc_content <- c(doc_content, # "ANALYSIS PARAMETERS:", # "--------------------", # "Built-in data years: 1987-2016 (Agricultural), 2007-2016 (WWTP)", # "Spatial scales: county, huc8, huc2", # "Default cropland threshold: 500 ha (1,234 acres)", # "" # ) # # writeLines(doc_content, output_file) # message("Data documentation saved to: ", output_file) # } # # # Use documentation function # # document_data_sources( # # wwtp_files = c("data/wwtp/nitrogen_2020.csv", "data/wwtp/phosphorus_2020.csv"), # # agricultural_files = c("data/agricultural/custom_farm_data.csv") # # ) ## ----qa_workflow, eval=FALSE-------------------------------------------------- # # Complete quality assurance workflow # quality_assurance_workflow <- function(results, data_sources = NULL) { # # qa_report <- list() # qa_report$timestamp <- Sys.time() # qa_report$data_sources <- data_sources # # # 1. Basic data checks # qa_report$basic_checks <- list( # total_spatial_units = nrow(results$agricultural), # missing_classifications = sum(is.na(results$agricultural$N_class)) # ) # # # 2. Classification distribution # if ("N_class" %in% names(results$agricultural)) { # n_dist <- table(results$agricultural$N_class) # qa_report$classification_distribution <- list( # nitrogen = n_dist, # excluded_percentage = round(n_dist["Excluded"] / sum(n_dist) * 100, 1) # ) # } # # # 3. WWTP integration checks # if ("wwtp" %in% names(results)) { # qa_report$wwtp_checks <- list() # # for (nutrient in names(results$wwtp)) { # facilities <- results$wwtp[[nutrient]]$facility_data # qa_report$wwtp_checks[[nutrient]] <- list( # total_facilities = nrow(facilities), # facilities_with_loads = sum(!is.na(facilities[[paste0(toupper(substr(nutrient, 1, 1)), "_Load_tons")]])) # ) # } # } # # # 4. Spatial validation # if (inherits(results$agricultural, "sf")) { # qa_report$spatial_checks <- list( # invalid_geometries = sum(!st_is_valid(results$agricultural)), # crs = st_crs(results$agricultural)$input # ) # } # # # 5. Range checks # if ("integrated" %in% names(results)) { # for (nutrient in names(results$integrated)) { # data <- results$integrated[[nutrient]] # surplus_col <- paste0(substr(nutrient, 1, 1), "_surplus") # # if (surplus_col %in% names(data)) { # qa_report$range_checks[[nutrient]] <- list( # min_surplus = min(data[[surplus_col]], na.rm = TRUE), # max_surplus = max(data[[surplus_col]], na.rm = TRUE), # extreme_values = sum(abs(data[[surplus_col]]) > 1000, na.rm = TRUE) # ) # } # } # } # # # Generate QA summary # qa_summary <- list( # passed = TRUE, # warnings = character(), # errors = character() # ) # # # Check for issues # if (qa_report$basic_checks$missing_classifications > 0) { # qa_summary$warnings <- c(qa_summary$warnings, # paste("Missing classifications:", qa_report$basic_checks$missing_classifications)) # } # # if (qa_report$classification_distribution$excluded_percentage > 50) { # qa_summary$warnings <- c(qa_summary$warnings, # paste("High exclusion rate:", qa_report$classification_distribution$excluded_percentage, "%")) # } # # if ("spatial_checks" %in% names(qa_report) && qa_report$spatial_checks$invalid_geometries > 0) { # qa_summary$errors <- c(qa_summary$errors, # paste("Invalid geometries:", qa_report$spatial_checks$invalid_geometries)) # qa_summary$passed <- FALSE # } # # # Print summary # message("Quality Assurance Summary:") # message(" Status: ", ifelse(qa_summary$passed, "PASSED", "FAILED")) # # if (length(qa_summary$warnings) > 0) { # message(" Warnings:") # for (warning in qa_summary$warnings) { # message(" - ", warning) # } # } # # if (length(qa_summary$errors) > 0) { # message(" Errors:") # for (error in qa_summary$errors) { # message(" - ", error) # } # } # # qa_report$summary <- qa_summary # return(qa_report) # } # # # Run QA workflow # # qa_results <- quality_assurance_workflow(results_custom, "Custom 2021 WWTP data") ## ----eval=FALSE--------------------------------------------------------------- # # Get help on specific functions # ?load_user_wwtp # ?run_builtin_analysis # ?wwtp_clean_data # ?validate_columns # # # Check what data is available # check_builtin_data() # list_available_years() # # # Package health check # health_check() # # # Test data download connection # test_osf_connection()