## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6
)

## ----setup--------------------------------------------------------------------
library(manureshed)

## ----custom_wwtp_basic, eval=FALSE--------------------------------------------
# # Use your WWTP files for 2020
# results_2020 <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2020,  # Agricultural data available 1987-2016
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "nitrogen_2021.csv",
#   wwtp_load_units = "kg"
# )
# 
# # 3. Check results
# summarize_results(results_custom)
# quick_check(results_custom)
# 
# # 4. Create maps
# nitrogen_map <- map_agricultural_classification(
#   results_custom$integrated$nitrogen,
#   "nitrogen", "combined_N_class",
#   "2021 Nitrogen with Custom WWTP Data"
# )
# 
# save_plot(nitrogen_map, "custom_analysis_2021.png")
# 
# # 5. Save results
# save_spatial_data(
#   results_custom$integrated$nitrogen,
#   "nitrogen_2021_results.rds"
# )

## ----other_sources, eval=FALSE------------------------------------------------
# # Example: Adding industrial sources
# industrial_data <- data.frame(
#   Facility_Name = c("Steel Plant A", "Chemical Plant B", "Food Processor C"),
#   Lat = c(41.5, 40.7, 42.1),
#   Long = c(-81.7, -82.1, -83.5),
#   N_Load_tons = c(50, 75, 30),
#   P_Load_tons = c(10, 15, 8),
#   Source_Type = "Industrial"
# )
# 
# # Convert to spatial format
# industrial_sf <- st_as_sf(industrial_data,
#                          coords = c("Long", "Lat"),
#                          crs = 4326) %>%
#   st_transform(5070)  # Transform to analysis CRS
# 
# # Aggregate by spatial units (example for HUC8)
# boundaries <- load_builtin_boundaries("huc8")
# industrial_aggregated <- wwtp_aggregate_by_boundaries(
#   industrial_sf, boundaries, "nitrogen", "huc8"
# )
# 
# # Add to existing results
# # (You would modify the integration functions to include industrial sources)

## ----time_periods, eval=FALSE-------------------------------------------------
# # Create a time series dataset
# years_to_analyze <- 2018:2022
# time_series_results <- list()
# 
# for (year in years_to_analyze) {
#   # Check if you have WWTP data for this year
#   wwtp_file <- paste0("wwtp_nitrogen_", year, ".csv")
# 
#   if (file.exists(wwtp_file)) {
#     time_series_results[[as.character(year)]] <- run_builtin_analysis(
#       scale = "huc8",
#       year = year,
#       nutrients = "nitrogen",
#       include_wwtp = TRUE,
#       custom_wwtp_nitrogen = wwtp_file,
#       save_outputs = FALSE,
#       verbose = FALSE
#     )
#   } else {
#     # Agricultural only if no WWTP data
#     time_series_results[[as.character(year)]] <- run_builtin_analysis(
#       scale = "huc8",
#       year = year,
#       nutrients = "nitrogen",
#       include_wwtp = FALSE,
#       save_outputs = FALSE,
#       verbose = FALSE
#     )
#   }
# }
# 
# # Compare results across years
# yearly_summary <- do.call(rbind, lapply(names(time_series_results), function(year) {
#   result <- time_series_results[[year]]
#   classes <- table(result$agricultural$N_class)
# 
#   data.frame(
#     Year = as.numeric(year),
#     Total_Units = sum(classes),
#     Source_Units = classes["Source"] %||% 0,
#     Sink_Units = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE),
#     Excluded_Units = classes["Excluded"] %||% 0
#   )
# }))
# 
# print(yearly_summary)

## ----custom_agricultural, eval=FALSE------------------------------------------
# # Example: Custom agricultural data format
# custom_farm_data <- data.frame(
#   County_FIPS = c("39001", "39003", "39005"),
#   Manure_N_kg = c(100000, 150000, 200000),
#   Manure_P_kg = c(20000, 30000, 40000),
#   Fertilizer_N_kg = c(50000, 75000, 100000),
#   Fertilizer_P_kg = c(10000, 15000, 20000),
#   N_Fixation_kg = c(25000, 40000, 35000),
#   Crop_N_Removal_kg = c(80000, 120000, 140000),
#   Crop_P_Removal_kg = c(15000, 25000, 30000),
#   Cropland_acres = c(45000, 67000, 52000)
# )
# 
# # Convert to package format
# standardized_farm_data <- data.frame(
#   ID = custom_farm_data$County_FIPS,
#   NAME = paste("County", substr(custom_farm_data$County_FIPS, 3, 5)),
#   manure_N = custom_farm_data$Manure_N_kg,
#   manure_P = custom_farm_data$Manure_P_kg,
#   fertilizer_N = custom_farm_data$Fertilizer_N_kg,
#   fertilizer_P = custom_farm_data$Fertilizer_P_kg,
#   N_fixation = custom_farm_data$N_Fixation_kg,
#   crop_removal_N = custom_farm_data$Crop_N_Removal_kg,
#   crop_removal_P = custom_farm_data$Crop_P_Removal_kg,
#   cropland = custom_farm_data$Cropland_acres
# )
# 
# # Apply classifications
# custom_classified <- standardized_farm_data %>%
#   agri_classify_nitrogen(cropland_threshold = 1234, scale = "county") %>%
#   agri_classify_phosphorus(cropland_threshold = 1234, scale = "county")
# 
# print(custom_classified)

## ----data_validation, eval=FALSE----------------------------------------------
# # Function to validate your custom data
# validate_custom_data <- function(data, data_type = "wwtp") {
# 
#   issues <- list()
# 
#   if (data_type == "wwtp") {
#     # Check required columns
#     required_cols <- c("Facility_Name", "Lat", "Long")
#     missing_cols <- setdiff(required_cols, names(data))
#     if (length(missing_cols) > 0) {
#       issues$missing_columns <- missing_cols
#     }
# 
#     # Check coordinate ranges (for US)
#     if ("Lat" %in% names(data) && "Long" %in% names(data)) {
#       invalid_coords <- sum(data$Lat < 24 | data$Lat > 50 |
#                            data$Long < -125 | data$Long > -66, na.rm = TRUE)
#       if (invalid_coords > 0) {
#         issues$invalid_coordinates <- invalid_coords
#       }
#     }
# 
#     # Check for negative loads
#     load_cols <- names(data)[grepl("Load|load", names(data))]
#     for (col in load_cols) {
#       if (col %in% names(data)) {
#         negative_loads <- sum(data[[col]] < 0, na.rm = TRUE)
#         if (negative_loads > 0) {
#           issues[[paste0("negative_", col)]] <- negative_loads
#         }
#       }
#     }
#   }
# 
#   if (data_type == "agricultural") {
#     # Check for negative values in nutrient data
#     nutrient_cols <- c("manure_N", "manure_P", "fertilizer_N", "fertilizer_P",
#                        "crop_removal_N", "crop_removal_P", "cropland")
# 
#     for (col in nutrient_cols) {
#       if (col %in% names(data)) {
#         negative_values <- sum(data[[col]] < 0, na.rm = TRUE)
#         if (negative_values > 0) {
#           issues[[paste0("negative_", col)]] <- negative_values
#         }
#       }
#     }
#   }
# 
#   if (length(issues) == 0) {
#     message("âœ“ Data validation passed")
#   } else {
#     message("âš  Data validation issues found:")
#     for (issue in names(issues)) {
#       message("  ", issue, ": ", issues[[issue]])
#     }
#   }
# 
#   return(issues)
# }
# 
# # Use the validation function
# # validate_custom_data(your_wwtp_data, "wwtp")
# # validate_custom_data(your_farm_data, "agricultural")

## ----export_results, eval=FALSE-----------------------------------------------
# # Export results in different formats
# export_analysis_results <- function(results, output_dir = "exports") {
# 
#   dir.create(output_dir, showWarnings = FALSE)
# 
#   # Export agricultural data as CSV
#   agri_csv <- st_drop_geometry(results$agricultural)
#   write.csv(agri_csv, file.path(output_dir, "agricultural_results.csv"), row.names = FALSE)
# 
#   # Export as shapefile (if sf package available)
#   if (requireNamespace("sf", quietly = TRUE)) {
#     st_write(results$agricultural, file.path(output_dir, "agricultural_results.shp"),
#              delete_dsn = TRUE, quiet = TRUE)
#   }
# 
#   # Export WWTP facilities if available
#   if ("wwtp" %in% names(results)) {
#     for (nutrient in names(results$wwtp)) {
#       facility_data <- results$wwtp[[nutrient]]$facility_data
#       write.csv(facility_data,
#                 file.path(output_dir, paste0("wwtp_", nutrient, "_facilities.csv")),
#                 row.names = FALSE)
#     }
#   }
# 
#   # Export integrated results if available
#   if ("integrated" %in% names(results)) {
#     for (nutrient in names(results$integrated)) {
#       integrated_csv <- st_drop_geometry(results$integrated[[nutrient]])
#       write.csv(integrated_csv,
#                 file.path(output_dir, paste0("integrated_", nutrient, "_results.csv")),
#                 row.names = FALSE)
#     }
#   }
# 
#   message("Results exported to: ", output_dir)
# }
# 
# # Use the export function
# # export_analysis_results(results_custom, "my_exports")

## ----troubleshooting_wwtp, eval=FALSE-----------------------------------------
# # Common WWTP data problems and solutions
# 
# # Problem 1: "No valid facilities remaining after cleaning"
# # Solution: Check coordinates and load values
# wwtp_data <- read.csv("your_wwtp_file.csv")
# summary(wwtp_data)  # Look for obvious issues
# 
# # Check coordinate ranges
# summary(wwtp_data$Latitude)   # Should be ~24-50 for US
# summary(wwtp_data$Longitude)  # Should be ~-125 to -66 for US
# 
# # Check load values
# summary(wwtp_data$Load)  # Should be positive numbers
# 
# # Problem 2: Wrong coordinate system
# # Solution: Make sure coordinates are in decimal degrees (not UTM)
# # Example conversion if needed:
# # library(sf)
# # points_utm <- st_as_sf(data, coords = c("X_UTM", "Y_UTM"), crs = 32617)
# # points_dd <- st_transform(points_utm, 4326)
# # coords_dd <- st_coordinates(points_dd)
# 
# # Problem 3: Mixed units in same file
# # Solution: Standardize units before analysis
# standardize_mixed_units <- function(data, load_col, unit_col) {
#   data$standardized_load <- ifelse(
#     data[[unit_col]] == "kg", data[[load_col]],
#     ifelse(data[[unit_col]] == "lbs", data[[load_col]] / 2.20462,
#            data[[load_col]] * 907.185)  # assuming tons
#   )
#   return(data)
# }

## ----troubleshooting_agricultural, eval=FALSE---------------------------------
# # Common agricultural data problems
# 
# # Problem: Impossible nutrient balances
# # Solution: Check your units and conversion factors
# check_nutrient_balance <- function(data) {
#   # Calculate simple checks
#   data$total_inputs_N <- data$manure_N + data$fertilizer_N + data$N_fixation
#   data$efficiency_N <- data$crop_removal_N / data$total_inputs_N
# 
#   # Flag suspicious values
#   suspicious <- data$efficiency_N > 2.0 | data$efficiency_N < 0.1
# 
#   if (sum(suspicious, na.rm = TRUE) > 0) {
#     message("Warning: ", sum(suspicious, na.rm = TRUE), " units have suspicious N efficiency")
#     print(data[suspicious, c("ID", "total_inputs_N", "crop_removal_N", "efficiency_N")])
#   }
# 
#   return(data)
# }
# 
# # Problem: Missing spatial boundaries
# # Solution: Use built-in boundaries or provide your own
# # county_boundaries <- load_builtin_boundaries("county")
# # your_data_with_boundaries <- merge(your_data, county_boundaries, by.x = "FIPS", by.y = "FIPS")

## ----help, eval=FALSE---------------------------------------------------------
# # Get help on specific functions
# ?load_user_wwtp
# ?run_builtin_analysis
# ?wwtp_clean_data
# 
# # Check what data is available
# check_builtin_data()
# list_available_years()
# 
# # Package health check
# health_check()
# 
# # Test data download connection
# test_osf_connection()

## ----epa_standard, eval=FALSE-------------------------------------------------
# # Standard EPA format (auto-detected)
# results <- run_builtin_analysis(
#   scale = "county",
#   year = 2018,
#   nutrients = c("nitrogen", "phosphorus"),
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "nitrogen_2018.csv",
#   custom_wwtp_phosphorus = "phosphorus_2018.csv"
# )

## ----different_units, eval=FALSE----------------------------------------------
# # If your data uses pounds instead of kg
# results <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2019,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "nitrogen_pounds_2019.csv",
#   wwtp_load_units = "lbs"  # Converts automatically
# )
# 
# # Other units: "kg", "lbs", "pounds", "tons"

## ----different_format, eval=FALSE---------------------------------------------
# # If headers are in different rows
# results <- run_builtin_analysis(
#   scale = "county",
#   year = 2021,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "nitrogen_2021.csv",
#   wwtp_skip_rows = 3,      # Skip first 3 rows
#   wwtp_header_row = 1      # Headers in row 1 after skipping
# )

## ----custom_columns, eval=FALSE-----------------------------------------------
# # If your columns have different names
# custom_mapping <- list(
#   facility_name = "Plant_Name",
#   latitude = "Lat_DD",
#   longitude = "Long_DD",
#   pollutant_load = "Annual_Load_kg",
#   state = "State_Code"
# )
# 
# results <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2022,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "custom_format.csv",
#   wwtp_column_mapping = custom_mapping
# )

## ----manual_wwtp, eval=FALSE--------------------------------------------------
# # Step 1: Load your WWTP file
# wwtp_raw <- load_user_wwtp(
#   file_path = "nitrogen_2020.csv",
#   nutrient = "nitrogen",
#   load_units = "lbs"
# )
# 
# # Step 2: Clean the data
# wwtp_clean <- wwtp_clean_data(wwtp_raw, "nitrogen")
# 
# # Step 3: Classify facilities by size
# wwtp_classified <- wwtp_classify_sources(wwtp_clean, "nitrogen")
# 
# # Step 4: Convert to spatial format
# wwtp_spatial <- wwtp_to_spatial(wwtp_classified)
# 
# # Step 5: Load boundaries and aggregate by spatial units
# boundaries <- load_builtin_boundaries("huc8")
# wwtp_aggregated <- wwtp_aggregate_by_boundaries(
#   wwtp_spatial, boundaries, "nitrogen", "huc8"
# )
# 
# # Step 6: Integrate with agricultural data
# agri_data <- load_builtin_nugis("huc8", 2020)
# agri_processed <- agri_classify_complete(agri_data, "huc8")
# 
# integrated <- integrate_wwtp_agricultural(
#   agri_processed, wwtp_aggregated, "nitrogen",
#   cropland_threshold = 1234, scale = "huc8"
# )

## ----unit_conversions, eval=FALSE---------------------------------------------
# # Convert between units
# kg_loads <- c(1000, 2000, 5000)
# tons_loads <- convert_load_units(kg_loads, "kg")
# 
# lbs_loads <- c(2000, 4000, 10000)
# tons_loads <- convert_load_units(lbs_loads, "lbs")
# 
# print(data.frame(
#   Original_kg = kg_loads,
#   Converted_tons = convert_load_units(kg_loads, "kg")
# ))

## ----p2o5_conversion, eval=FALSE----------------------------------------------
# # If you have P2O5 data, convert to P
# p2o5_values <- c(100, 200, 300)  # kg P2O5
# p_values <- p2o5_values * P2O5_TO_P  # Convert to P
# 
# print(paste("P2O5 to P conversion factor:", P2O5_TO_P))

## ----county_data, eval=FALSE--------------------------------------------------
# # County analysis - make sure you have 5-digit FIPS codes
# county_results <- run_builtin_analysis(
#   scale = "county",
#   year = 2016,
#   nutrients = "nitrogen"
# )
# 
# # Your county WWTP data should have state and county info

## ----huc8_data, eval=FALSE----------------------------------------------------
# # HUC8 analysis - 8-digit watershed codes
# huc8_results <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2016,
#   nutrients = "nitrogen"
# )
# 
# # Make sure HUC8 codes are 8 digits (add leading zero if needed)
# huc8_codes <- c("4110001", "4110002")  # 7 digits
# formatted_codes <- format_huc8(huc8_codes)  # Adds leading zero
# print(formatted_codes)  # "04110001", "04110002"

## ----huc2_data, eval=FALSE----------------------------------------------------
# # HUC2 analysis - 2-digit regional codes
# huc2_results <- run_builtin_analysis(
#   scale = "huc2",
#   year = 2016,
#   nutrients = "nitrogen"
# )

## ----state_analysis, eval=FALSE-----------------------------------------------
# # Analyze just one state
# iowa_results <- run_state_analysis(
#   state = "IA",
#   scale = "county",
#   year = 2016,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE
# )
# 
# # With custom WWTP data
# texas_results <- run_state_analysis(
#   state = "TX",
#   scale = "huc8",
#   year = 2020,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "texas_wwtp_2020.csv"
# )

## ----multi_state, eval=FALSE--------------------------------------------------
# # Analyze several states
# midwest_states <- c("IA", "IL", "IN", "OH")
# state_results <- list()
# 
# for (state in midwest_states) {
#   state_results[[state]] <- run_state_analysis(
#     state = state,
#     scale = "county",
#     year = 2016,
#     nutrients = "nitrogen",
#     include_wwtp = TRUE
#   )
# }

## ----data_validation2, eval=FALSE---------------------------------------------
# # Check your results make sense
# quick_check(results)
# 
# # Look at the classifications
# table(results$agricultural$N_class)
# 
# # Check WWTP facility counts
# if ("wwtp" %in% names(results)) {
#   print(paste("WWTP facilities:", nrow(results$wwtp$nitrogen$facility_data)))
# }

## ----data_issues, eval=FALSE--------------------------------------------------
# # Problem: Negative nutrient values
# # Solution: Check your data source and units
# 
# # Problem: All facilities excluded
# # Solution: Check coordinate system (should be decimal degrees)
# 
# # Problem: No WWTP facilities found
# # Solution: Check facility coordinates are in correct format
# 
# # Problem: Classification doesn't make sense
# # Solution: Check cropland threshold and nutrient balance calculations

## ----time_series, eval=FALSE--------------------------------------------------
# # Analyze multiple years
# years_to_analyze <- 2014:2016
# 
# batch_results <- batch_analysis_years(
#   years = years_to_analyze,
#   scale = "huc8",
#   nutrients = "nitrogen",
#   include_wwtp = TRUE
# )
# 
# # With custom WWTP data for some years
# custom_wwtp_files <- list(
#   "2018" = "nitrogen_2018.csv",
#   "2019" = "nitrogen_2019.csv",
#   "2020" = "nitrogen_2020.csv"
# )
# 
# # Process each year with custom data
# custom_results <- list()
# for (year in names(custom_wwtp_files)) {
#   custom_results[[year]] <- run_builtin_analysis(
#     scale = "county",
#     year = as.numeric(year),
#     nutrients = "nitrogen",
#     include_wwtp = TRUE,
#     custom_wwtp_nitrogen = custom_wwtp_files[[year]]
#   )
# }

## ----eval=FALSE---------------------------------------------------------------
# # Organize your data files
# # project_folder/
# #   â”œâ”€â”€ wwtp_data/
# #   â”‚   â”œâ”€â”€ nitrogen_2018.csv
# #   â”‚   â”œâ”€â”€ nitrogen_2019.csv
# #   â”‚   â””â”€â”€ phosphorus_2018.csv
# #   â”œâ”€â”€ results/
# #   â””â”€â”€ maps/
# 
# # Set working directory
# setwd("project_folder")
# 
# # Run analysis with organized files
# results <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2018,
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "wwtp_data/nitrogen_2018.csv",
#   output_dir = "results"
# )

## ----memory_management, eval=FALSE--------------------------------------------
# # For large datasets, clear cache periodically
# clear_data_cache()
# 
# # Check package health
# health_check()
# 
# # Free up memory between analyses
# rm(large_results)
# gc()

## ----complete_example, eval=FALSE---------------------------------------------
# # 1. Prepare your WWTP file (nitrogen_2021.csv)
# # Make sure it has columns: Facility Name, Latitude, Longitude, Load (kg/yr), State
# 
# # 2. Run analysis
# results_custom <- run_builtin_analysis(
#   scale = "huc8",
#   year = 2021,  # Agricultural data extrapolated to 2021
#   nutrients = "nitrogen",
#   include_wwtp = TRUE,
#   custom_wwtp_nitrogen = "nitrogen_2021.csv",
#   wwtp_load_units = "kg"
# )
# 
# # 3. Check results
# summarize_results(results_custom)
# quick_check(results_custom)
# 
# # 4. Create maps
# nitrogen_map <- map_agricultural_classification(
#   results_custom$integrated$nitrogen,
#   "nitrogen", "combined_N_class",
#   "2021 Nitrogen with Custom WWTP Data"
# )
# 
# save_plot(nitrogen_map, "custom_analysis_2021.png")
# 
# # 5. Save results
# save_spatial_data(
#   results_custom$integrated$nitrogen,
#   "nitrogen_2021_results.rds"
# )

## ----troubleshooting_integration, eval=FALSE----------------------------------
# # Common integration problems
# 
# # Problem: WWTP facilities not matching spatial units
# # Solution: Check coordinate systems and boundary files
# check_wwtp_spatial_match <- function(wwtp_data, boundaries) {
# 
#   # Convert WWTP to spatial
#   wwtp_sf <- st_as_sf(wwtp_data, coords = c("Long", "Lat"), crs = 4326)
#   wwtp_projected <- st_transform(wwtp_sf, st_crs(boundaries))
# 
#   # Check spatial intersection
#   intersections <- st_intersects(wwtp_projected, boundaries)
# 
#   # Count facilities per spatial unit
#   matches <- lengths(intersections)
# 
#   message("WWTP spatial matching summary:")
#   message("  Total facilities: ", nrow(wwtp_data))
#   message("  Facilities matched to boundaries: ", sum(matches > 0))
#   message("  Facilities not matched: ", sum(matches == 0))
# 
#   if (sum(matches == 0) > 0) {
#     unmatched <- wwtp_data[matches == 0, ]
#     message("  Check coordinates for unmatched facilities")
#     print(head(unmatched))
#   }
# 
#   return(matches)
# }
# 
# # Problem: Scale mismatch between data sources
# # Solution: Ensure consistent spatial scales
# verify_scale_consistency <- function(agricultural_data, wwtp_data, scale) {
# 
#   message("Scale consistency check for: ", scale)
# 
#   # Check ID formats
#   if (scale == "county") {
#     # FIPS codes should be 5 digits
#     invalid_fips <- sum(nchar(agricultural_data$ID) != 5)
#     message("  Invalid FIPS codes: ", invalid_fips)
#   } else if (scale == "huc8") {
#     # HUC8 codes should be 8 digits
#     invalid_huc8 <- sum(nchar(agricultural_data$ID) != 8)
#     message("  Invalid HUC8 codes: ", invalid_huc8)
#   }
# 
#   # Check coordinate ranges
#   coord_summary <- list(
#     lat_range = range(wwtp_data$Lat, na.rm = TRUE),
#     long_range = range(wwtp_data$Long, na.rm = TRUE)
#   )
# 
#   message("  WWTP coordinate ranges:")
#   message("    Latitude: ", paste(round(coord_summary$lat_range, 2), collapse = " to "))
#   message("    Longitude: ", paste(round(coord_summary$long_range, 2), collapse = " to "))
# 
#   return(coord_summary)
# }

## ----file_organization, eval=FALSE--------------------------------------------
# # Recommended project structure
# create_project_structure <- function(project_name) {
# 
#   # Create main directories
#   dir.create(project_name, showWarnings = FALSE)
#   dir.create(file.path(project_name, "data"), showWarnings = FALSE)
#   dir.create(file.path(project_name, "data", "wwtp"), showWarnings = FALSE)
#   dir.create(file.path(project_name, "data", "agricultural"), showWarnings = FALSE)
#   dir.create(file.path(project_name, "results"), showWarnings = FALSE)
#   dir.create(file.path(project_name, "maps"), showWarnings = FALSE)
#   dir.create(file.path(project_name, "exports"), showWarnings = FALSE)
# 
#   # Create README
#   readme_content <- paste(
#     "# Manureshed Analysis Project",
#     "",
#     "## Data Files",
#     "- data/wwtp/ - WWTP facility data files",
#     "- data/agricultural/ - Custom agricultural data",
#     "",
#     "## Outputs",
#     "- results/ - Analysis results (.rds files)",
#     "- maps/ - Generated maps (.png files)",
#     "- exports/ - Exported data (.csv, .shp files)",
#     "",
#     "## Analysis Scripts",
#     "- analysis.R - Main analysis script",
#     "",
#     sep = "\n"
#   )
# 
#   writeLines(readme_content, file.path(project_name, "README.md"))
# 
#   message("Project structure created: ", project_name)
#   message("Add your data files to the appropriate folders")
# }
# 
# # Create organized project
# # create_project_structure("my_nutrient_analysis")

## ----data_documentation, eval=FALSE-------------------------------------------
# # Document your custom data sources
# document_data_sources <- function(wwtp_files = NULL, agricultural_files = NULL,
#                                  output_file = "data_documentation.txt") {
# 
#   doc_content <- c(
#     "DATA SOURCES DOCUMENTATION",
#     "============================",
#     "",
#     "Analysis Date: ", as.character(Sys.Date()),
#     "Package Version: ", as.character(packageVersion("manureshed")),
#     ""
#   )
# 
#   if (!is.null(wwtp_files)) {
#     doc_content <- c(doc_content,
#       "WWTP DATA FILES:",
#       "----------------"
#     )
# 
#     for (file in wwtp_files) {
#       if (file.exists(file)) {
#         file_info <- file.info(file)
#         doc_content <- c(doc_content,
#           paste("File:", file),
#           paste("  Size:", round(file_info$size / 1024, 2), "KB"),
#           paste("  Modified:", file_info$mtime),
#           paste("  Rows:", nrow(read.csv(file))),
#           ""
#         )
#       }
#     }
#   }
# 
#   if (!is.null(agricultural_files)) {
#     doc_content <- c(doc_content,
#       "AGRICULTURAL DATA FILES:",
#       "------------------------"
#     )
# 
#     for (file in agricultural_files) {
#       if (file.exists(file)) {
#         file_info <- file.info(file)
#         doc_content <- c(doc_content,
#           paste("File:", file),
#           paste("  Size:", round(file_info$size / 1024, 2), "KB"),
#           paste("  Modified:", file_info$mtime),
#           paste("  Rows:", nrow(read.csv(file))),
#           ""
#         )
#       }
#     }
#   }
# 
#   doc_content <- c(doc_content,
#     "ANALYSIS PARAMETERS:",
#     "--------------------",
#     "Built-in data years: 1987-2016 (Agricultural), 2007-2016 (WWTP)",
#     "Spatial scales: county, huc8, huc2",
#     "Default cropland threshold: 500 ha (1,234 acres)",
#     ""
#   )
# 
#   writeLines(doc_content, output_file)
#   message("Data documentation saved to: ", output_file)
# }
# 
# # Use documentation function
# # document_data_sources(
# #   wwtp_files = c("data/wwtp/nitrogen_2020.csv", "data/wwtp/phosphorus_2020.csv"),
# #   agricultural_files = c("data/agricultural/custom_farm_data.csv")
# # )

## ----qa_workflow, eval=FALSE--------------------------------------------------
# # Complete quality assurance workflow
# quality_assurance_workflow <- function(results, data_sources = NULL) {
# 
#   qa_report <- list()
#   qa_report$timestamp <- Sys.time()
#   qa_report$data_sources <- data_sources
# 
#   # 1. Basic data checks
#   qa_report$basic_checks <- list(
#     total_spatial_units = nrow(results$agricultural),
#     missing_classifications = sum(is.na(results$agricultural$N_class))
#   )
# 
#   # 2. Classification distribution
#   if ("N_class" %in% names(results$agricultural)) {
#     n_dist <- table(results$agricultural$N_class)
#     qa_report$classification_distribution <- list(
#       nitrogen = n_dist,
#       excluded_percentage = round(n_dist["Excluded"] / sum(n_dist) * 100, 1)
#     )
#   }
# 
#   # 3. WWTP integration checks
#   if ("wwtp" %in% names(results)) {
#     qa_report$wwtp_checks <- list()
# 
#     for (nutrient in names(results$wwtp)) {
#       facilities <- results$wwtp[[nutrient]]$facility_data
#       qa_report$wwtp_checks[[nutrient]] <- list(
#         total_facilities = nrow(facilities),
#         facilities_with_loads = sum(!is.na(facilities[[paste0(toupper(substr(nutrient, 1, 1)), "_Load_tons")]]))
#       )
#     }
#   }
# 
#   # 4. Spatial validation
#   if (inherits(results$agricultural, "sf")) {
#     qa_report$spatial_checks <- list(
#       invalid_geometries = sum(!st_is_valid(results$agricultural)),
#       crs = st_crs(results$agricultural)$input
#     )
#   }
# 
#   # 5. Range checks
#   if ("integrated" %in% names(results)) {
#     for (nutrient in names(results$integrated)) {
#       data <- results$integrated[[nutrient]]
#       surplus_col <- paste0(substr(nutrient, 1, 1), "_surplus")
# 
#       if (surplus_col %in% names(data)) {
#         qa_report$range_checks[[nutrient]] <- list(
#           min_surplus = min(data[[surplus_col]], na.rm = TRUE),
#           max_surplus = max(data[[surplus_col]], na.rm = TRUE),
#           extreme_values = sum(abs(data[[surplus_col]]) > 1000, na.rm = TRUE)
#         )
#       }
#     }
#   }
# 
#   # Generate QA summary
#   qa_summary <- list(
#     passed = TRUE,
#     warnings = character(),
#     errors = character()
#   )
# 
#   # Check for issues
#   if (qa_report$basic_checks$missing_classifications > 0) {
#     qa_summary$warnings <- c(qa_summary$warnings,
#                              paste("Missing classifications:", qa_report$basic_checks$missing_classifications))
#   }
# 
#   if (qa_report$classification_distribution$excluded_percentage > 50) {
#     qa_summary$warnings <- c(qa_summary$warnings,
#                              paste("High exclusion rate:", qa_report$classification_distribution$excluded_percentage, "%"))
#   }
# 
#   if ("spatial_checks" %in% names(qa_report) && qa_report$spatial_checks$invalid_geometries > 0) {
#     qa_summary$errors <- c(qa_summary$errors,
#                            paste("Invalid geometries:", qa_report$spatial_checks$invalid_geometries))
#     qa_summary$passed <- FALSE
#   }
# 
#   # Print summary
#   message("Quality Assurance Summary:")
#   message("  Status: ", ifelse(qa_summary$passed, "PASSED", "FAILED"))
# 
#   if (length(qa_summary$warnings) > 0) {
#     message("  Warnings:")
#     for (warning in qa_summary$warnings) {
#       message("    - ", warning)
#     }
#   }
# 
#   if (length(qa_summary$errors) > 0) {
#     message("  Errors:")
#     for (error in qa_summary$errors) {
#       message("    - ", error)
#     }
#   }
# 
#   qa_report$summary <- qa_summary
#   return(qa_report)
# }
# 
# # Run QA workflow
# # qa_results <- quality_assurance_workflow(results_custom, "Custom 2021 WWTP data")

## ----eval=FALSE---------------------------------------------------------------
# # Get help on specific functions
# ?load_user_wwtp
# ?run_builtin_analysis
# ?wwtp_clean_data
# ?validate_columns
# 
# # Check what data is available
# check_builtin_data()
# list_available_years()
# 
# # Package health check
# health_check()
# 
# # Test data download connection
# test_osf_connection()