## ----------------------------------------------------------------------------- #| label: setup library(phutil) library(ggplot2) ## ----define small PDs--------------------------------------------------------- X <- rbind( c(1, 3), c(3, 5) ) Y <- rbind( c(3, 4) ) ## ----------------------------------------------------------------------------- #| label: fig-plot-small #| fig-width: 4 #| fig-height: 4 #| fig-align: center #| fig-cap: "Overlaid persistence diagrams $X$ (circles) and $Y$ (diamond) with dashed segments connecting optimally matched pairs." oldpar <- par(mar = c(4, 4, 1, 1) + .1) plot( NA_real_, xlim = c(0, 6), ylim = c(0, 6), asp = 1, xlab = "birth", ylab = "death" ) abline(a = 0, b = 1) points(X, pch = 1) points(Y, pch = 5) segments(X[, 1], X[, 2], c(2, Y[, 1]), c(2, Y[, 2]), lty = 2) par(oldpar) ## ----validate small PDs with Hera--------------------------------------------- wasserstein_distance(X, Y, p = 1) wasserstein_distance(X, Y, p = 2) bottleneck_distance(X, Y) ## ----validate small PDs with Dionysus----------------------------------------- TDA::wasserstein(cbind(0, X), cbind(0, Y), p = 1, dimension = 0) sqrt(TDA::wasserstein(cbind(0, X), cbind(0, Y), p = 2, dimension = 0)) TDA::bottleneck(cbind(0, X), cbind(0, Y), dimension = 0) ## ----validate small PD vs empty----------------------------------------------- # empty PD E <- matrix(NA_real_, nrow = 0, ncol = 2) # with dimension column E_ <- cbind(matrix(NA_real_, nrow = 0, ncol = 1), E) # distance from empty using phutil/Hera wasserstein_distance(E, X, p = 1) wasserstein_distance(E, X, p = 2) bottleneck_distance(E, X) # distance from empty using TDA/Dionysus TDA::wasserstein(E_, cbind(0, X), p = 1, dimension = 0) sqrt(TDA::wasserstein(E_, cbind(0, X), p = 2, dimension = 0)) TDA::bottleneck(E_, cbind(0, X), dimension = 0) ## ----compute large PDs fake, eval=FALSE--------------------------------------- # set.seed(28415) # n <- 24 # PDs1 <- lapply(seq(n), function(i) { # S1 <- tdaunif::sample_trefoil(n = 120, sd = .05) # as_persistence(TDA::ripsDiag(S1, maxdimension = 2, maxscale = 6)) # }) # PDs2 <- lapply(seq(n), function(i) { # S2 <- cbind(tdaunif::sample_arch_spiral(n = 120, arms = 2), 0) # S2 <- tdaunif::add_noise(S2, sd = .05) # as_persistence(TDA::ripsDiag(S2, maxdimension = 2, maxscale = 6)) # }) ## ----compute large PDs true, echo=FALSE--------------------------------------- n <- 24 PDs1 <- trefoils PDs2 <- arch_spirals ## ----------------------------------------------------------------------------- #| label: benchmark phutil and TDA #| warning: false PDs1_ <- lapply(lapply(PDs1, as.data.frame), as.matrix) PDs2_ <- lapply(lapply(PDs2, as.data.frame), as.matrix) # iterate over homological degrees and Wasserstein powers bm_all <- list() PDs_i <- seq_along(PDs1) for (dimension in seq(0, 2)) { # compute bm_1 <- do.call(rbind, lapply(seq_along(PDs1), function(i) { as.data.frame(microbenchmark::microbenchmark( TDA = TDA::wasserstein( PDs1_[[i]], PDs2_[[i]], dimension = dimension, p = 1 ), phutil = wasserstein_distance( PDs1[[i]], PDs2[[i]], dimension = dimension, p = 1 ), times = 1, unit = "ns" )) })) bm_2 <- do.call(rbind, lapply(seq_along(PDs1), function(i) { as.data.frame(microbenchmark::microbenchmark( TDA = sqrt(TDA::wasserstein( PDs1_[[i]], PDs2_[[i]], dimension = dimension, p = 2 )), phutil = wasserstein_distance( PDs1[[i]], PDs2[[i]], dimension = dimension, p = 2 ), times = 1, unit = "ns" )) })) bm_inf <- do.call(rbind, lapply(seq_along(PDs1), function(i) { as.data.frame(microbenchmark::microbenchmark( TDA = TDA::bottleneck( PDs1_[[i]], PDs2_[[i]], dimension = dimension ), phutil = bottleneck_distance( PDs1[[i]], PDs2[[i]], dimension = dimension ), times = 1, unit = "ns" )) })) # annotate and combine bm_1$power <- 1; bm_2$power <- 2; bm_inf$power <- Inf bm_res <- rbind(bm_1, bm_2, bm_inf) bm_res$degree <- dimension bm_all <- c(bm_all, list(bm_res)) } bm_all <- do.call(rbind, bm_all) ## ----------------------------------------------------------------------------- #| label: fig-benchmark-large #| fig-width: 8 #| fig-height: 3 #| fig-align: 'center' #| fig-retina: 2 #| fig-cap: "Benchmark comparison of Dionysus via {TDA} and Hera via {phutil} on #| large persistence diagrams: Violin plots of runtime distributions on a common #| scale." bm_all <- transform(bm_all, expr = as.character(expr), time = unlist(time)) bm_all <- subset(bm_all, select = c(expr, degree, power, time)) ggplot(bm_all, aes(x = time * 10e-9, y = expr)) + facet_grid( rows = vars(power), cols = vars(degree), labeller = label_both ) + geom_violin() + scale_x_continuous( transform = "log10", labels = scales::label_timespan(units = "secs") ) + labs(x = NULL, y = NULL)