--- title: "Reproducing Reported road casualties in Great Britain: pedestrian factsheet" author: "Blaise Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_document: toc: true toc_float: true vignette: > %\VignetteIndexEntry{Reproducing Reported road casualties in Great Britain: pedestrian factsheet} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( eval = FALSE, collapse = TRUE, comment = "#>" ) ``` > **Note:** This vignette is not evaluated during package checks to reduce build time and dependencies. > > **See rendered results online:** > > - [2024 Pedestrian Factsheet](https://rpubs.com/Blaise/stats19_pfs_2024) > - [2023 Pedestrian Factsheet](https://rpubs.com/Blaise/stats19_pfs_2023) > > To reproduce the results yourself, download the source code of this vignette and set `eval=TRUE` in the knitr options, or view the discussion in [GitHub issue #240](https://github.com/ropensci/stats19/issues/240). ```{r setup, echo=FALSE, warning=FALSE, error=FALSE, message=FALSE} # https://www.gov.uk/government/statistics/reported-road-casualties-great-britain-pedestrian-factsheet-2023/reported-road-casualties-in-great-britain-pedestrian-factsheet-2023 #library(stats19) library(stats19) library(sf) library(dplyr) library(lubridate) library(reshape2) library(ggplot2) library(knitr) library(readODS) library(gt) library(clock) library(stringr) library(tidyr) # define plot dimensions knitr::opts_chunk$set( out.width = "100%" # scale relative to text width ) # what casualty is the report for? options Pedestrian, Cyclist, escooters report_casualty <- "Pedestrian" # stats19 usually updated in September, so if it is October last years data should be there yr2calc <- 2024 # request collision data (entering 2004 results in a table with all years) crashes = get_stats19(year = "2004", type = "collision", ask = FALSE, format = TRUE, output_format = "data.frame") |> filter(collision_year >= 2004) # import the adjusted casualty data ready to join to the original #adj <- get_stats19_adjustments() ## request casualty casualties = get_stats19(year = "2004", type = "casualty", ask = FALSE, format = TRUE, output_format = "data.frame") |> filter(collision_year >= 2004) |> mutate(fatal_count = if_else(casualty_severity == "Fatal", 1, 0)) # add a column for fatal tally to enable same method to be used for serious and slight ## request vehicle vehicles = get_stats19(year = "2004", type = "vehicle", ask = FALSE, format = TRUE, output_format = "data.frame") |> filter(collision_year >= 2004) # get population data from https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates uk_pop <- read.csv("https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationestimatestimeseriesdataset/current/pop.csv", skip = 7) uk_pop <- uk_pop[,c(1,6)] names(uk_pop) <- c("Year", "Population") # get trip data NTS0303 https://www.gov.uk/government/statistical-data-sets/nts03-modal-comparisons download.file("https://assets.publishing.service.gov.uk/media/66ce0f118e33f28aae7e1f75/nts0303.ods", destfile = "nts0303.ods", mode = "wb") # dataset links to this sheet, but it is total distance and not dissagregated by travel mode #download.file("https://assets.publishing.service.gov.uk/media/66ce0e818e33f28aae7e1f71/nts0101.ods", destfile = "nts0101.ods", mode = "wb") # read in trip data trips <- read_ods("nts0303.ods", sheet = "NTS0303c_miles", skip = 5) |> left_join(uk_pop, by = "Year") |> mutate(tot_dist_billion_miles = (`Walk [notes 2, 3]`*Population)/10^9) # speed up debugging by saving these key dfs locally # save(casualties,crashes,vehicles, trips, file = "all_years.RData") #load("all_years.RData") # # most of the data is based on the last 5 years, speed up calcs by creating df for this #crashes$number_of_casualties <- as.numeric(crashes$number_of_casualties) cra_L5Y <- filter(crashes, collision_year <= yr2calc & collision_year >= yr2calc-4) cas_L5Y <- filter(casualties, collision_year <= yr2calc & collision_year >= yr2calc-4) veh_L5Y <- filter(vehicles, collision_year <= yr2calc & collision_year >= yr2calc-4) # # ``` ```{r, echo = FALSE, warning=FALSE, message=FALSE} # pick out data only for 2004 fat_cas_2004 <- casualties |> filter(collision_year == "2004" & casualty_severity == "Fatal" & casualty_class == report_casualty) #casualties this year (TY) fat_cas_TY <- casualties |> filter(collision_year == yr2calc & casualty_severity == "Fatal" & casualty_class == report_casualty) if(NROW(fat_cas_2004)>NROW(fat_cas_TY)){ ud <- "down" fat_cas_diff <- round((1-NROW(fat_cas_TY)/NROW(fat_cas_2004))*100) } else { ud <- "up" fat_cas_diff <- round((1-NROW(fat_cas_2004)/NROW(fat_cas_TY))*100) } ser_cas_2004 <- casualties |> filter(collision_year == "2004" & casualty_class == report_casualty) #casualties this year (TY) ser_cas_TY <- casualties |> filter(collision_year == yr2calc & casualty_class == report_casualty) if(NROW(ser_cas_2004)>NROW(ser_cas_TY)){ id <- "decreased" ser_cas_diff <-(((sum(ser_cas_2004$casualty_adjusted_severity_serious, na.rm = TRUE)-sum(ser_cas_TY$casualty_adjusted_severity_serious,na.rm = TRUE))/sum(ser_cas_2004$casualty_adjusted_severity_serious, na.rm = TRUE))*100) } else { id <- "increased" ser_cas_diff <- round((1-sum(ser_cas_2004$casualties,na.rm = TRUE)/sum(ser_cas_TY$casualties))*100) } dist_walked_2004 <- filter(trips, Year == "2004") dist_walked_TY <- filter(trips, Year == yr2calc) if(dist_walked_TY$tot_dist_billion_miles - fatalities were down 39% from 677 to 409 - serious injuries (adjusted) decreased by 48% - pedestrian traffic (distance walked) decreased by 13% Averaged over the period 2020 to 2024: ```{r, echo = FALSE, warning=FALSE, message=FALSE} crash_cas <- inner_join(cas_L5Y,cra_L5Y) |> filter(casualty_type == report_casualty) # create column for weeks dths_per_wk_fat <- crash_cas |> filter(casualty_severity == "Fatal" & casualty_class == report_casualty) |> ## pick out casualty the stats will focus on mutate(wk = isoweek(date),## determine the week number of each date yr = year(date)) |> ## add year so we can include all weeks over the 5 years group_by(wk,yr) |> summarise(casualties = sum(fatal_count)) # create column for weeks dths_per_wk_ser <- crash_cas %>% mutate(wk = isoweek(date),# determine the week number of each date yr = year(date)) %>% ## add year so all weeks over the 5 years are included filter(casualty_class == report_casualty) |> ## pick out casualty the stats will focus on group_by(wk,yr) %>% summarise(casualties = sum(casualty_adjusted_severity_serious,na.rm = TRUE)) ``` - an average of 8 pedestrians died and 115 were seriously injured (adjusted) per week in reported road collisions ```{r, echo = FALSE, warning=FALSE, message=FALSE} cas_summary <- cas_L5Y |> select(collision_index, casualty_class, fatal_count, casualty_adjusted_severity_serious, casualty_adjusted_severity_slight) |> filter(casualty_class == report_casualty) |> group_by(collision_index, casualty_class) |> summarise(Fatal = sum(fatal_count), Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE), Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |> tidyr::pivot_wider(names_from = "casualty_class", values_from = c("Fatal","Serious","Slight")) if(yr2calc > 2023){ junction_pc <- cas_summary |> left_join(cra_L5Y) |> group_by(junction_detail) |> summarise(fatal_casualties = sum(Fatal_Pedestrian), serious_casualties = sum(Serious_Pedestrian,na.rm = TRUE), slight_casualties = sum(Slight_Pedestrian,na.rm = TRUE)) |> rowwise() |> mutate(All = sum(fatal_casualties, serious_casualties, slight_casualties)) |> ungroup() |> transmute(Junction = junction_detail, Fatalities = fatal_casualties/sum(fatal_casualties)*100, Serious = serious_casualties/sum(serious_casualties)*100, Slight = slight_casualties/sum(slight_casualties)*100, All = All/sum(All)*100) |> mutate_if(is.numeric, round,1) |> arrange(desc(All)) } else { junction_pc <- cas_summary |> left_join(cra_L5Y) |> group_by(junction_detail_historic) |> summarise(fatal_casualties = sum(Fatal_Pedestrian), serious_casualties = sum(Serious_Pedestrian,na.rm = TRUE), slight_casualties = sum(Slight_Pedestrian,na.rm = TRUE)) |> rowwise() |> mutate(All = sum(fatal_casualties, serious_casualties, slight_casualties)) |> ungroup() |> transmute(Junction = junction_detail_historic, Fatalities = fatal_casualties/sum(fatal_casualties)*100, Serious = serious_casualties/sum(serious_casualties)*100, Slight = slight_casualties/sum(slight_casualties)*100, All = All/sum(All)*100) |> mutate_if(is.numeric, round,1) |> arrange(desc(All)) } ## stats for within 20m of junctions not_within_20 <- junction_pc %>% filter(Junction == "Not at junction or within 20 metres") ``` - a majority of pedestrian fatalities (58%) do not occur at or within 20m of a junction compared to 42% of all seriously injured (adjusted) casualties ```{r, echo = FALSE, warning=FALSE, message=FALSE} ## create some approximate groups vehicle_groups <- data.frame(summary_group = c("pedal cycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle", "car", "other vehicle", "other vehicle", "car", "bus or coach", "car", "light goods vehicle", "heavy goods vehicle", "heavy goods vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle"), vehicle_type = c("Pedal cycle","Motorcycle - unknown cc", "Electric motorcycle" , "Motorcycle 125cc and under", "Motorcycle 50cc and under","Motorcycle over 125cc and up to 500cc", "Motorcycle over 500cc", "Car", "Agricultural vehicle", "Tram","Taxi/Private hire car", "Bus or coach (17 or more pass seats)", "Minibus (8 - 16 passenger seats)", "Van / Goods 3.5 tonnes mgw or under", "Goods over 3.5t. and under 7.5t", "Goods 7.5 tonnes mgw and over", "Other vehicle", "Unknown vehicle type (self rep only)", "Goods vehicle - unknown weight", "Mobility scooter", "Ridden horse", "Data missing or out of range")) # join the casualty summary table with crashes and vehicles veh_cas_sum <- cas_summary |> left_join(veh_L5Y, by = "collision_index") |> # join the vehicles to get data on type of vehicle left_join(cra_L5Y, by = "collision_index") |> # join crashes as number of vehicles is included, quicker than calculating from veh table select(collision_index, vehicle_type, number_of_vehicles, Fatal_Pedestrian, Serious_Pedestrian, Slight_Pedestrian) |> left_join(vehicle_groups, by = "vehicle_type") |> distinct(collision_index, .keep_all = TRUE) # there is a row for each vehicle in a collision, get rid of duplicates all_vehicles <- veh_cas_sum |> group_by(number_of_vehicles, summary_group) |> summarise(Fatal = round(sum(Fatal_Pedestrian)), Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)), Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |> rowwise() |> mutate(All = sum(Fatal,Serious,Slight)) |> ungroup() |> mutate(pc_fat = round(Fatal/sum(Fatal)*100,1)) |> arrange(desc(Fatal)) single_car <- filter(all_vehicles, number_of_vehicles == 1 & summary_group == "car") ``` - 66% of pedestrian fatalities were in collisions involving a single car ```{r, echo = FALSE, warning=FALSE, message=FALSE} road_type <- cas_summary |> left_join(cra_L5Y, by = "collision_index") |> #select(collision_severity, casualty_type, datetime, first_road_class, urban_or_rural_area, number_of_casualties,collision_reference) %>% mutate(first_road_class = case_when(first_road_class == "A" ~ "Other",first_road_class == "B" ~ "Other",first_road_class == "C" ~ "Other", first_road_class == "Unclassified" ~ "Other",first_road_class == "A(M)" ~ "Other",first_road_class == "Motorway" ~ "Motorway")) |> group_by(first_road_class, urban_or_rural_area) %>% summarise(Fatal = round(sum(Fatal_Pedestrian)), Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)), Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |> filter(!urban_or_rural_area == "Unallocated") m_way <- road_type %>% filter(first_road_class == "Motorway") |> select(road_class = first_road_class, Fatal, Serious, Slight) rural_urban <- road_type %>% filter(first_road_class == "Other") |> ungroup() |> select(road_class = urban_or_rural_area, Fatal, Serious, Slight) road_types <- rbind(m_way, rural_urban)|> group_by(road_class) |> summarise(Fatal = round(sum(Fatal)), Serious = round(sum(Serious,na.rm = TRUE)), Slight = round(sum(Slight,na.rm = TRUE))) |> tidyr::pivot_longer(cols = c("Fatal", "Serious", "Slight")) |> group_by(road_class, name) %>% summarise(value = sum(value)) |> group_by(name) %>% mutate(pc = value/sum(value)*100) road_type_all <- rbind(m_way, rural_urban)|> group_by(road_class) |> summarise(Fatal = round(sum(Fatal)), Serious = round(sum(Serious,na.rm = TRUE)), Slight = round(sum(Slight,na.rm = TRUE))) |> tidyr::pivot_longer(cols = c("Fatal", "Serious", "Slight")) |> group_by(road_class) %>% summarise(value = sum(value)) %>% mutate(pc = value/sum(value)*100) |> mutate(name = "All casualties") rural_all <- filter(road_type_all, road_class == "Rural") fatal_rural <- filter(road_types,road_class == "Rural" & name == "Fatal") ``` - 33% of pedestrian fatalities occurred on rural roads compared to 14% of all pedestrian casualties ```{r, echo = FALSE, warning=FALSE, message=FALSE} ## male female sex_casualty <- cas_L5Y %>% filter(casualty_type == "Pedestrian") %>% group_by(sex_of_casualty) %>% summarise(fatal_casualties = sum(fatal_count), serious_casualties = sum(casualty_adjusted_severity_serious,na.rm = TRUE), slight_casualties = sum(casualty_adjusted_severity_slight, na.rm = TRUE)) serious_fatal_male <- sex_casualty %>% rowwise() |> #filter(sex_of_casualty == "Male") |> mutate(KSI = sum(fatal_casualties, serious_casualties)) |> ungroup() |> mutate(pc_ksi = KSI/sum(KSI)) pc_fatal_serious_male <- filter(serious_fatal_male, sex_of_casualty == "Male") ``` - 65% of pedestrian killed or seriously injured (KSI) (adjusted) casualties were male The most common contributory factor allocated to pedestrians *no contributory factors included in the public data* # 2. Pedestrian traffic and reported casualties ```{r, echo = FALSE, warning=FALSE, message=FALSE} ## only latest year (TY) in report fatal_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty, casualty_severity == "Fatal") serious_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty) slight_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty) ``` In 2024, 409 pedestrians were killed in Great Britain, whilst 5965 were reported to be seriously injured and 13381 slightly injured. ```{r, echo = FALSE, warning=FALSE, message=FALSE} trips <- select(trips, Year, Traffic = tot_dist_billion_miles) table_1 <- casualties |> filter(casualty_type == report_casualty & collision_year >= 2004 & collision_year <= yr2calc) |> # add in filter for calculating past years when later data is available group_by(collision_year) |> summarise(Fatal = sum(fatal_count), Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE), Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |> select(collision_year,Fatal, Serious, Slight) |> #tidyr::pivot_wider(names_from = "casualty_severity", values_from = "casualties") |> rowwise() |> mutate(All = sum(c(Fatal, Serious, Slight))) |> left_join(trips, by = c("collision_year" = "Year")) dist_walked_2004 <- filter(trips, Year == 2004) dist_walked_TY <- filter(trips, Year == yr2calc) if(dist_walked_2004$Traffic > dist_walked_TY$Traffic){ dw <- "decreased" } else { dw <- "increased" } ``` Table 1 and Chart 1 show that pedestrian traffic (measured by distance walked) has decreased between 2004 and 2024 whilst fatalities, serious and slight injuries have fallen. ```{r, echo = FALSE, warning=FALSE, message=FALSE} ## change between this year (TY) and last year (LY) diff_fatal <- (table_1$Fatal[NROW(table_1)]-table_1$Fatal[NROW(table_1)-1])/table_1$Fatal[NROW(table_1)-1] diff_trips <- abs(table_1$Traffic[NROW(table_1)]-table_1$Traffic[NROW(table_1)-1])/table_1$Traffic[NROW(table_1)-1] # pedestrian fatalities increased or decreased if(table_1$Fatal[NROW(table_1)]>table_1$Fatal[NROW(table_1)-1]){ pf <- "increased" } else { pf <- "decreased" } # pedestrian casualties all severities increased or decreased if(table_1$All[NROW(table_1)]>table_1$All[NROW(table_1)-1]){ pcr <- "increased" } else { pcr <- "fallen" } ``` Between 2023 and 2024, pedestrian fatalities decreased by 3% while pedestrian traffic (distance walked) increased by 1%. ```{r, echo = FALSE, warning=FALSE, message=FALSE} bm_vals <- table_1 %>% filter(collision_year == 2004) %>% select(collision_year,Fatal, Serious, Slight, Traffic) rates <- table_1 %>% select(collision_year,Fatal, Serious,Slight,Traffic) |> mutate(Fatal = Fatal/bm_vals$Fatal*100, Serious = Serious/bm_vals$Serious*100, Slight = Slight/bm_vals$Slight*100, Traffic = Traffic/bm_vals$Traffic*100) chart_1 <- rates |> melt("collision_year") cols <- rev(c("#001a70", "#ff7733", "#1de9b6","#006853")) cust_theme <- theme(panel.grid.major = element_line(size = 2)) # put the elements in a list dft_theme <- list(cust_theme, scale_color_manual(values = cols)) chart_1 %>% ggplot(aes(collision_year, value, color = variable)) + geom_line(size = 2, alpha = .8) + dft_theme+ theme(panel.background = element_blank(), legend.position = "top", legend.title = element_blank()) + scale_x_continuous(expand = c(0, 0)) + geom_hline(yintercept=100, linetype='dotted', col = 'black')+ ggtitle(paste0("Chart 1: Index of casualties by severity, GB: 2004 to ", yr2calc," (Index 2004=100)")) + scale_x_continuous(name = NULL, breaks = seq(2004, 2023, by = 2) # Add more tick marks ) + labs(caption = "Source: Stats19")+ theme(panel.border = element_blank()) # round all the values for the table print table_1_out <- table_1 |> mutate(Fatal = round(Fatal), Serious = round(Serious), Slight = round(Slight), All = round(All), Traffic = round(Traffic,2)) gt(table_1_out,auto_align = TRUE) |> cols_width(collision_year ~px(60)) |> cols_label(collision_year = md("**Year**"), Fatal = md("**Killed**"), Serious = md("**Serious**"), Slight = md("**Slight**"), All = md("**All**"), Traffic = md("**Traffic**")) |> tab_footnote(md("**Source: DfT STATS19, National Travel Survey and Office for National\nStatistics population data**")) |> tab_header( title = md(paste0("**Table 1: Number of reported pedestrian casualties by severity and traffic\n(pedestrian billion miles walked), GB: 2004 to ", yr2calc,"**"))) |> tab_options(heading.align = "left", column_labels.border.top.style = "none", table.border.top.style = "none", column_labels.border.bottom.style = "none", column_labels.border.bottom.width = 1, column_labels.border.bottom.color = "black", table_body.border.top.style = "none", table_body.border.bottom.color = "white", heading.border.bottom.style = "none", table.border.bottom.style = "none",) |> tab_style( style = cell_text(weight = "bold"), locations = list( cells_column_labels(columns = c(collision_year)), cells_body(columns = c(collision_year)) )) |> tab_style( style = cell_fill(color = "white"), locations = cells_body(columns = everything()) ) table_2 <- table_1 |> transmute(collision_year, Fatal = round(Fatal/Traffic), Serious = round(Serious/Traffic), Slight = round(Slight/Traffic), All = round(All/Traffic)) bm_vals_2 <- table_2 %>% filter(collision_year == 2004) %>% select(collision_year,Fatal, Serious, Slight,All) rates_2 <- table_2 %>% mutate(Fatal = Fatal/bm_vals_2$Fatal*100, Serious = Serious/bm_vals_2$Serious*100, Slight = Slight/bm_vals_2$Slight*100, All = All/bm_vals_2$All*100) ``` # 3. How far do pedestrians travel? The National Travel Survey (NTS) which provides the [number of trips and average distance travelled](https://www.gov.uk/government/statistical-data-sets/nts03-modal-comparisons) (NTS0303) by person per year for English residents. This is used to derive casualty rates per mile travelled for pedestrians, which also use the Great Britain [population figure](https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationestimatestimeseriesdataset/current/pop.csv) to estimate total distance walked each year. # 4. Casualty rates per mile travelled The pedestrian casualty rate has fallen for all severities in 2024 compared to 2004. ```{r, echo = FALSE, warning=FALSE, message=FALSE} # compare the last year to 2004 for all stats diff_all_2004 <- (1-(rates_2$All[NROW(rates_2)]/rates_2$All[1]))*100 diff_fat_2004 <- (1-(rates_2$Fatal[NROW(rates_2)]/rates_2$Fatal[1]))*100 diff_sev_2004 <- (1-(rates_2$Serious[NROW(rates_2)]/rates_2$Serious[1]))*100 diff_sli_2004 <- (1-(rates_2$Slight[NROW(rates_2)]/rates_2$Slight[1]))*100 ``` The overall casualty rate decreased by 53%. The fatality rate decreased by 30% compared to a 41% reduction for serious injuries and a 57% reduction for slight injuries. ```{r, echo = FALSE, warning=FALSE, message=FALSE} # pick out traffic chart_2 <- rates_2 |> melt("collision_year") |> filter(!variable == "All") # define the colour palette cols <- rev(c("#ff7733", "#1de9b6","#006853")) cust_theme <- theme(panel.grid.major = element_line(size = 2)) # put the elements in a list dft_theme <- list(cust_theme, scale_color_manual(values = cols)) chart_2 %>% ggplot(aes(collision_year, value, color = variable)) + geom_line(size = 2, alpha = .8) + dft_theme+ theme(panel.background = element_blank(), legend.position = "top", legend.title = element_blank()) + scale_x_continuous(expand = c(0, 0)) + geom_hline(yintercept=100, linetype='dotted', col = 'black')+ ggtitle(paste0("Chart 2: Index of casualties by severity, GB: 2004 to ", yr2calc," (Index 2004=100)")) + scale_x_continuous(name = NULL, breaks = seq(2004, 2023, by = 2) # Add more tick marks ) + labs(caption = "Source: Stats19") gt(table_2,auto_align = FALSE) |> cols_label(collision_year = md("**Year**"), Fatal = md("**Killed**"), Serious = md("**Serious**"), Slight = md("**Slight**"), All = md("**All**")) |> tab_header( title = md(paste0("**Table 2: Casualty rates of pedestrian casualties by severity per billion miles walked, GB: 2004 to ",yr2calc,"**"))) |> tab_options(heading.align = "left", column_labels.border.top.style = "none", table.border.top.style = "none", column_labels.border.bottom.style = "none", column_labels.border.bottom.width = 1, column_labels.border.bottom.color = "#334422", table_body.border.top.style = "none", table_body.border.bottom.color = "white", heading.border.bottom.style = "none", table.border.bottom.style = "none") |> tab_style( style = cell_text(weight = "bold"), locations = list( cells_column_labels(columns = c(collision_year)), cells_body(columns = c(collision_year)) )) ``` # 5. Sex and age comparisons ```{r, echo = FALSE, warning=FALSE, message=FALSE} # by sex and age # use raw casualty df as there are multiple rows per collision. Bin the ages into custom bins that match the document sac_all <- cas_L5Y %>% filter(casualty_type == report_casualty) |> mutate(age_band = cut(as.numeric(age_of_casualty), breaks=c(0,11,15,19,24,29,39,49,59,69,100),labels=c("0-11","12-15","16-19","20-24","25-29","30-39","40-49","50-59","60-69","70+"))) |> group_by(sex_of_casualty, age_band) %>% summarise(Fatal = sum(fatal_count), Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE), Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |> filter(!is.na(age_band)) |> rowwise() |> mutate(All = sum(Fatal,Serious)) mf <- sac_all |> group_by(sex_of_casualty) |> summarise(all = sum(All)) |> ungroup() |> mutate(pc = all/sum(all)) male_tot <- filter(mf, sex_of_casualty == "Male") female_tot <- filter(mf, sex_of_casualty == "Female") male_times <- male_tot$all/female_tot$all # age band 1 ab1 <- "30-39" # male female casualties for this age band sac_ab1 <- sac_all |> filter(age_band == ab1) ab2 <- "0-11" # male female casualties for this age band sac_ab2 <- sac_all |> filter(age_band == ab2) ab3 <- "70+" # male female casualties for this age band sac_ab3 <- sac_all |> filter(age_band == ab3) # add pc_ksi for only Male and Female sac_all <- sac_all |> ungroup() |> mutate(pc_ksi = (All/sum(All))*100) |> filter(sex_of_casualty %in% c("Male", "Female")) ``` Between 2020 and 2024, 65% of pedestrian casualties were male and 35% female. There are 1.9 times more male than female pedestrian casualties overall. This compares to 2.3 more for 30-39, 1.5 more for 0-11 and 0.9 more for 70+ - the only age group in which female casualties outnumber males. ```{r, echo = FALSE, warning=FALSE, message=FALSE} # Define colours and theme cols <- rev(c("#1de9b6", "#006853")) cust_theme <- theme(panel.grid.major = element_line(size = 2)) dft_theme <- list(cust_theme, scale_fill_manual(values = cols)) # use fill, not color ggplot(sac_all, aes(x = age_band, y = pc_ksi, fill = sex_of_casualty)) + geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) + geom_text( aes(label = paste0(round(pc_ksi),"%")), # Round values to 1 decimal place position = position_dodge(width = 0.7), vjust = -0.5, size = 3 ) + ggtitle(paste0("Chart 3: Percentage of ", tolower(report_casualty), " KSI casualties, by sex and age, GB: ", yr2calc-4, " to ", yr2calc)) + dft_theme + theme( panel.background = element_blank(), legend.position = "top", legend.title = element_blank() ) + ylab(NULL)+ xlab(NULL)+ labs(caption = "Source: Stats19") ``` # 6. Which vehicles are involved in collisions with pedestrians? ```{r, echo = FALSE, warning=FALSE, message=FALSE} # split up different vehicle numbers into individual dfs and join later for the table single_vehicles <- veh_cas_sum |> filter(number_of_vehicles == 1) |> group_by(summary_group) |> summarise(Fatal = round(sum(Fatal_Pedestrian)), Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)), Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |> rowwise() |> mutate(All = sum(Fatal,Serious,Slight)) |> ungroup() |> mutate(pc_fat = round(Fatal/All*100,1)) |> mutate(summary_group = factor(summary_group, levels = c("pedal cycle","motorcycle", "car","bus or coach","light goods vehicle", "heavy goods vehicle", "other vehicle"))) |> arrange(summary_group) |> mutate(summary_group = paste(1,summary_group)) two_vehicles <- veh_cas_sum |> filter(number_of_vehicles == 2) |> mutate(summary_group = "2 vehicles involved") |> group_by(summary_group) |> summarise(Fatal = round(sum(Fatal_Pedestrian)), Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)), Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |> rowwise() |> mutate(All = sum(Fatal,Serious,Slight)) |> ungroup() |> mutate(pc_fat = round(Fatal/All*100,1)) GT_two_vehicles <- veh_cas_sum |> filter(number_of_vehicles > 2) |> mutate(summary_group = "3 or more other vehicles involved") |> group_by(summary_group) |> summarise(Fatal = round(sum(Fatal_Pedestrian)), Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)), Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |> rowwise() |> mutate(All = round(sum(Fatal,Serious,Slight))) |> ungroup() |> mutate(pc_fat = round(Fatal/All*100,1)) # sort by percentage fatal for the text most_fat <- rbind(single_vehicles,two_vehicles,GT_two_vehicles) |> arrange(desc(pc_fat)) # create a totals row totals <- rbind(single_vehicles,two_vehicles,GT_two_vehicles) |> summarise(across(where(is.numeric), sum), group = "Total") |> mutate(pc_fat = round(Fatal/All*100,1), summary_group = group) |> select(-group) ``` Between 2020 and 2024, most pedestrian fatalities occurred in 1 vehicle collisions involving a car (275). However, the highest proportion of casualties from single vehicle collisions involve 1 heavy goods vehicle (4.8%). The second highest proportion (3.3%) occurred in collisions when 3 or more other vehicles involved. ```{r, echo = FALSE, warning=FALSE, message=FALSE} # bind them all together for table 3 table_3 <- rbind(single_vehicles,two_vehicles,GT_two_vehicles, totals) # create table 3 gt(table_3,auto_align = FALSE) |> cols_label(summary_group = md("**Vehicles**"), Fatal = md("**Fatalities**"), Serious = md("**Serious injuries**"), Slight = md("**Slight injuries**"), All = md("**All casualties**"), pc_fat = md("**% Fatalities**")) |> tab_header( title = md(paste0("**Table 3: Pedestrian casualties in reported road collisions by severity showing other vehicles involved GB: ", yr2calc-4, " to ",yr2calc,"**"))) |> tab_options(heading.align = "left", column_labels.border.top.style = "none", table.border.top.style = "none", column_labels.border.bottom.style = "none", column_labels.border.bottom.width = 1, column_labels.border.bottom.color = "#334422", table_body.border.top.style = "none", table_body.border.bottom.color = "white", heading.border.bottom.style = "none", table.border.bottom.style = "none") |> tab_style( style = cell_text(weight = "bold"), locations = list( cells_column_labels(columns = c(summary_group)), cells_body(columns = c(summary_group)) )) ``` # 7. Time of day of collisions ```{r, echo = FALSE, warning=FALSE, message=FALSE} ## create a table of severity by year # used clock as much faster than lubridate crash_time <- cas_summary |> left_join(cra_L5Y, by = "collision_index") |> # join crashes as number of vehicles is included, quicker than calculating from veh table select(datetime, Fatal_Pedestrian, Serious_Pedestrian) |> mutate(#collision_hr = lubridate::hour(datetime), dow = clock::date_weekday_factor(datetime, abbreviate = FALSE), collision_hr = get_hour(datetime), KSI = sum(Fatal_Pedestrian, Serious_Pedestrian)) |> mutate(dow = case_when(dow == "Monday" ~ "Monday to Friday", dow == "Tuesday" ~ "Monday to Friday", dow == "Wednesday" ~ "Monday to Friday", dow == "Thursday" ~ "Monday to Friday", dow == "Friday" ~ "Monday to Friday", dow == "Saturday" ~ "Saturday", dow == "Sunday" ~ "Sunday")) |> #mutate(dow = case_when(dow > 1 & dow < 7 ~ "Monday to Friday", dow == 7 ~ "Saturday", dow == 1 ~ "Sunday")) |> group_by(collision_hr, dow) |> summarise(KSI = sum(KSI)) |> mutate(KSI = if_else(dow == "Monday to Friday", KSI/5, KSI)) MF_peak <- crash_time |> filter(dow == "Monday to Friday") |> arrange(desc(KSI)) |> mutate(hr = str_sub(gsub(" ","", tolower(format(strptime(collision_hr, format = "%H"), "%I %p"))),2)) SS_peak <- crash_time |> filter(dow %in% c("Saturday", "Sunday")) |> group_by(collision_hr) |> summarise(KSI = sum(KSI)) |> arrange(desc(KSI)) |> mutate(hr = str_sub(gsub(" ","", tolower(format(strptime(collision_hr, format = "%H"), "%I %p"))),2)) # define the colour palette cols <- rev(c("#ff7733", "#1de9b6","#006853")) cust_theme <- theme(panel.grid.major = element_line(size = 2)) # put the elements in a list dft_theme <- list(cust_theme, scale_color_manual(values = cols)) crash_time %>% ggplot(aes(collision_hr, KSI, color = dow)) + geom_line(size = 2, alpha = .8) + dft_theme+ theme(panel.background = element_blank(), legend.position = "top", legend.title = element_blank()) + scale_x_continuous(expand = c(0, 0)) + ggtitle(paste0("Chart 4: Reported ", tolower(report_casualty), " KSIs by hour of day and day of week, GB: ", yr2calc-4, " to ", yr2calc)) + ylab(NULL)+ labs(x = "Hour starting", caption = "Source: Stats19") ``` The weekday peak time for pedestrian KSIs is from 3pm to 6pm. By contrast, the peak is from 1am to 9pm at weekends. # 8. What type of road? ```{r, echo = FALSE, warning=FALSE, message=FALSE} # see section 1 for original table road_type_bar <- rbind(road_types, road_type_all) |> filter(!road_class == "Data missing or out of range") fatal_urban <- filter(road_type_bar, name == "Fatal" & road_class == "Urban") all_cas <- filter(road_type_bar, name == "All casualties" & road_class == "Urban") fatal_mway <- filter(road_type_bar, name == "Fatal" & road_class == "Motorway") ``` Chart 5 shows that between 2020 and 2024, 63% of pedestrian fatalities occurred on urban roads compared to 85% of all pedestrian casualties. 4% of pedestrian fatalities occurred on motorways. This would be people outside their vehicles whether they are moving at the time or not. ```{r, echo = FALSE, warning=FALSE, message=FALSE} # define the colour palette cols <- rev(c("#ff7733", "#1de9b6","#006853")) cust_theme <- theme(panel.grid.major = element_line(size = 2)) dft_theme <- list(cust_theme, scale_fill_manual(values = cols)) # use fill, not co # Grouped bar ggplot(road_type_bar, aes(fill=road_class, y=pc, x=name, label = paste0(round(pc),"%"))) + geom_bar(position="dodge", stat="identity") + dft_theme + theme( panel.background = element_blank(), legend.position = "top", legend.title = element_blank() ) + theme(panel.background = element_blank()) + geom_text(position = position_dodge2(width = 0.9, preserve = "single"), angle = 0, vjust=-0.5, hjust=0.5) + xlab(NULL)+ ylab(NULL)+ ggtitle(paste0("Chart 5: Percentage of pedestrian casualties, by urban or rural classification and severity, GB: ", yr2calc-4, " to ", yr2calc)) + labs(caption = "Source: Stats19") ``` In this report, urban roads are defined as those within an area of population of 10,000 or more in England and Wales or more than 3,000 in Scotland - roads outside of these areas are classified as rural [https://www.gov.uk/government/publications/road-length-statistics-information/road-lengths-in-great-britain-statistics-notes-and-definitions](https://www.gov.uk/government/publications/road-length-statistics-information/road-lengths-in-great-britain-statistics-notes-and-definitions). # 9. Vehicle movement on the road ```{r, echo = FALSE, warning=FALSE, message=FALSE} # see section one highest_fat <- junction_pc |> arrange(desc(Fatalities)) highest_sev <- junction_pc |> arrange(desc(Serious)) if(yr2calc > 2023){ junc <- junction_pc |> filter(Junction %in% c("T or staggered junction", "Crossroads", "More than 4 arms (not roundabout)")) } else { junc <- junction_pc |> filter(Junction %in% c("T or staggered junction", "Other junction", "Crossroads", "More than 4 arms (not roundabout)")) roundabouts <- junction_pc |> filter(Junction %in% c("roundabout", "Mini-roundabout")) } ``` A majority of pedestrian fatalities( 58 %) occur not at junction or within 20 metres compared to 42 % of serious injuries (adjusted). However, 29 % of fatalities occur at a junction compared to 43 % of serious injuries (adjusted). ```{r, echo = FALSE, warning=FALSE, message=FALSE} table_4 <- junction_pc |> arrange(desc(All)) |> mutate_if(is.numeric, round,1) |> # round all values to 1 dp mutate_if(is.numeric, ~ paste0(.x, "%")) # add a % sign after each value # create table 3 gt(table_4,auto_align = FALSE) |> cols_label(Junction = md("**Junction**"), Fatalities = md("**Fatalities**"), Serious = md("**Serious**"), Slight = md("**Slight**"), All = md("**All casualties**")) |> tab_header( title = md(paste0("**Table 4: Percentage of pedestrian KSI casualties by severity and junction detail where the collision occurred, GB: ", yr2calc-4, " to ",yr2calc,"**"))) |> tab_options(heading.align = "left", column_labels.border.top.style = "none", table.border.top.style = "none", column_labels.border.bottom.style = "none", column_labels.border.bottom.width = 1, column_labels.border.bottom.color = "#334422", table_body.border.top.style = "none", table_body.border.bottom.color = "white", heading.border.bottom.style = "none", table.border.bottom.style = "none") |> tab_style( style = cell_text(weight = "bold"), locations = list( cells_column_labels(columns = c(Junction)), cells_body(columns = c(Junction)) )) ``` Sections 10 not reproducible as data is not public and sections 11 and 14 are simply explanatory text. This document is also useful for understanding the methodology behind the categories https://assets.publishing.service.gov.uk/media/68373a464115cfe5bfaa2cd4/STATS20_2024_specification.pdf