From f850802793b864d6ada0b8703f07a46b9df490fb Mon Sep 17 00:00:00 2001 From: Ethan Plunkett Date: Fri, 29 Mar 2024 16:29:12 -0400 Subject: [PATCH] Update calc_flux() - fix memory problem - add tests - add radius based scaling #123 --- DESCRIPTION | 2 +- NEWS.md | 16 +++ R/calc_flux.R | 44 ++++++-- R/get_marginal.R | 2 +- R/is_between.R | 150 ++++++++++++++++++++------- R/plot_flux.R | 25 ++--- man/animate_flux.Rd | 9 +- man/calc_flux.Rd | 58 +++++++++-- man/is_between.Rd | 55 +++++++--- man/plot_flux.Rd | 15 +-- tests/testthat/_snaps/calc_flux.md | 13 +++ tests/testthat/_snaps/is_between.md | 7 ++ tests/testthat/test-calc_flux.R | 151 ++++++++++++++++++++++++++++ tests/testthat/test-is_between.R | 13 +++ 14 files changed, 471 insertions(+), 89 deletions(-) create mode 100644 tests/testthat/_snaps/calc_flux.md create mode 100644 tests/testthat/_snaps/is_between.md create mode 100644 tests/testthat/test-calc_flux.R create mode 100644 tests/testthat/test-is_between.R diff --git a/DESCRIPTION b/DESCRIPTION index 381af099..be127d9d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BirdFlowR Title: Predict and Visualize Bird Movement -Version: 0.1.0.9054 +Version: 0.1.0.9055 Authors@R: c(person("Ethan", "Plunkett", email = "plunkett@umass.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4405-2251")), diff --git a/NEWS.md b/NEWS.md index 64e687de..d8edfd9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,19 @@ +# BirdFlowR 0.1.0.9055 +2023-03-29 +## Flux II +* Flux values are now divided by the radius to standardize units at + at P/km/week/ where P is the proportion of the population. +* Add tests for `calc_flux()` and `is_between()` +* Add analysis of flux radius parameter's affect on flux values. It's in + `"test-calc_flux.R"`. +* Add limit to radius argument in `calc_flux()` and ability to bypass it with + `check_radius` argument. +* Modified `is_between()` to only calculate the result for transitions that +are non-zero (at any timestep) in `bf`. This provides a slight performance +benefit for standard models and a large one for sparse models. +* `is_betweeen()` now processes connection lines in batches, controlled by + `batch_size` argument to it and `calc_flux()`. This is for memory management. + # BirdFlowR 0.1.0.9054 ## Flux I diff --git a/R/calc_flux.R b/R/calc_flux.R index dfaf04b8..89199887 100644 --- a/R/calc_flux.R +++ b/R/calc_flux.R @@ -1,8 +1,30 @@ -#' Estimate net bird movement +#' Estimate bird flux #' -#' `calc_flux()` estimates the proportion of the species that passes through +#' `calc_flux()` estimates the proportion of the species that passes near #' a set of points during each transition in a BirdFlow model. #' +#' @section Units: +#' +#' The total relative abundance passing through the circle centered on the point +#' is divided by the diameter of the circle in kilometers. The units of the +#' returned value is therefore roughly the proportion +#' (\eqn{P}) of the +#' species's population that is expected to pass through each km +#' of a line oriented perpendicular to the movement at each point: +#' \eqn{\frac{P}{km \cdot week}} +#' +#' Multiplying the result by the total population would yield: +#' \eqn{\frac{birds}{km \cdot week}} +#' +#' @section Limitations: +#' +#' `calc_flux()` makes the incorrect simplifying assumption +#' that birds follow the shortest (great circle) path +#' between the center of the the source and destination raster cells. Caution +#' should be used when interpreting the results especially around +#' major geographic features such as coasts, large lakes, mountain ranges, and +#' ecological system boundaries that might result in non-linear migration paths. +#' #' @param bf A BirdFlow model #' @param points A set of points to calculate movement through. If `points` is #' `NULL` they will default to the BirdFlow model cells that are either active @@ -15,13 +37,13 @@ #' or an even number. #' @param format The format to return the results in one of: #' \describe{ -#' \item{`"points"`}{Returns a list with `net_movement` a matrix or array of -#' movement, and `points` a data frame of either the input `points` or the -#' point locations } +#' \item{`"points"`}{Returns a list with `flux` a matrix or array of +#' flux values, and `points` a data frame of either the input `points` or the +#' default cell center derived points.} #' \item{`"dataframe"`}{Returns a "long" data frame with columns: #' * `x` and `y` coordinates of the points. #' * `transition` Transition code. -#' * `movement` The total abundance moving through the point in the transition. +#' * `flux` The flux at the point. See "Units" below . #' * `date` The date associated with the transition, will be at the midpoint #' between timesteps. #' @@ -29,6 +51,7 @@ #' \item{`"SpatRaster"`}{Returns a `terra::SpatRaster` with layers for each #' transition.} #'} +#' @inheritParams is_between #' #' @return See `format` argument. #' @export @@ -45,7 +68,7 @@ #' } #' calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1, - format = NULL) { + format = NULL, batch_size = 1e6, check_radius = TRUE) { if (n_directions != 1) stop("Only one directional flux is supported at the moment.") @@ -65,6 +88,7 @@ calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1, result <- is_between(bf, points, radius, n_directions) between <- result$between points <- result$points + radius_km <- result$radius / 1000 timesteps <- lookup_timestep_sequence(bf) transitions <- lookup_transitions(bf) @@ -96,9 +120,11 @@ calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1, } } + # Standardize to P of population to pass through KM of transect in a week + net_movement <- net_movement / (radius_km * 2) if (format == "points") { - return(list(net_movement, points)) + return(list(flux = net_movement, point = points)) } if (format == "spatraster") { @@ -124,7 +150,7 @@ calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1, wide <- cbind(as.data.frame(points)[, c("x", "y")], net_movement) long <- tidyr::pivot_longer(wide, cols = setdiff(names(wide), c("x", "y")), - names_to = "transition", values_to = "movement") + names_to = "transition", values_to = "flux") long$date <- as.character(lookup_date(long$transition, bf)) diff --git a/R/get_marginal.R b/R/get_marginal.R index e8937650..7090e171 100644 --- a/R/get_marginal.R +++ b/R/get_marginal.R @@ -31,7 +31,7 @@ get_marginal <- function(x, marginal = NULL, from = NULL) { length(from) == 1, from %in% seq_len(n_timesteps(x))) if (from == n_timesteps(x)) { - if(!is_cyclical(x)) + if (!is_cyclical(x)) stop("x isn't cyclical so there's no marginal with from = ", from) to <- 1 } else { diff --git a/R/is_between.R b/R/is_between.R index 1331c42c..d7d5903b 100644 --- a/R/is_between.R +++ b/R/is_between.R @@ -10,16 +10,20 @@ if (FALSE) { #' #' This internal function is used to create a betweenness array with dimensions #' [n_active(bf)](n_active()), [n_active(bf)](n_active()), and `length(points)`. -#' The first two dimensions represent pairs of locations within the -#' BirdFlow model (active cells). -#' The third dimension represents the `points`. Each value in the array is -#' `TRUE` if the point is between the associated model cells. Specifically, -#' if the point is within `radius` meters (along a great circle) +#' The first two dimensions from and to cells of possible connections +#' between pairs of locations within the BirdFlow model and both +#' `n_active()` elements. +#' The third dimension represents `points` that might be between each +#' connection. +#' Cell values are `TRUE` if the point is between the associated model cells. +#' Specifically, if the point is within `radius` meters (along a great circle) #' of the great circle line connecting the cell centers. #' #' If `points` and `radius` are `NULL` they default to the cell radius and the #' center of all cells within the BirdFlow extent that fall between -#' any active cells. This is usually a superset of the active cells. +#' any active cells. This includes all cell centers within a convex hull +#' (in spherical coordinates) around the active cells in `bf`. +#' #' #' @param bf A BirdFlow model #' @param points The points to evaluate betweenness on. If NULL the cell @@ -31,19 +35,35 @@ if (FALSE) { #' locations. `radius` defaults to half the cell size (`mean(res(bf))/2`). #' @param n_direction The number of (equally spaced) directional bins to #' classify bearings into. Currently only `1` is supported. -#' +#' @param skip_unconnected If `TRUE` then only connections that exist in `bf` +#' will be evaluated, and between matrix will erroneously indicate that +#' no points are between locations that are not connected. +#' The resulting array can still be used with the model it was built for because +#' those missing connections would always have zero probability. +#' @param batch_size controls the number of movement lines that are processed +#' at a time. A smaller +#' `batch_size` will conserve memory at a slight performance cost. The number +#' of batches will be less than or equal to `n_active(bf)^2 / batch_size`. +#' @param check_radius If `TRUE` an error will be thrown if the radius +#' is not between the resolution and 1/4 the resolution of `bf`. Outside of +#' that range the algorithm is likely to yield distorted results. +#' `0.5 * mean(res(bf))` is the default, and recommended radius. #' @return A list with: #' \item{between}{An array with dimensions representing the #' "from" location, the "to" location, and the `points`. Cells are `TRUE` -#' if the associated point is between the associated from and to locations.} -#' \item{points}{A dataframe of points that define the third dimension -#' in between. It is identical to the input `points` if they are not `NULL`. -#' Otherwise it will be a data frame with columns x, y, and i corresponding to -#' the third dimension in `between`. Note `i` will be `NA` for some points that -#' aren't within the mask but do fall between active cells.} +#' if the point is between the associated from and to locations.} +#' \item{points}{A data,frame of points that define the third dimension +#' in `between`. It is identical to the input `points` if they are not `NULL`. +#' Otherwise it will be a data frame with columns `x`, `y`, and `i` +#' corresponding to the third dimension in `between`. +#' `i` will be `NA` for points that are not within the mask but +#' fall between active cells.} +#' \item{radius}{The radius of the circle in meters.} #' @keywords internal -is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) { - +is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1, + skip_unconnected = TRUE, batch_size = 1e6, + check_radius = TRUE) { + bf_msg("Generating between array.\n") # Force use of s2 o_use_s2 <- sf::sf_use_s2() on.exit(sf::sf_use_s2(o_use_s2)) @@ -60,8 +80,17 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) { if (is.null(radius)) { radius <- mean(res(bf)) / 2 - } + } else if (check_radius) { + # Analysis of the effect of changing radius is in test-calc_flux.R + radius_cells <- radius / mean(res(bf)) # radius converted to cells + if (radius_cells <= 0.25 || radius_cells >= 1) { + stop("radius should be less than the resolution and more than 1/4 the ", + "resolution or the flux is likely to be biased. ", + "Set check_radius to FALSE to ignore this advice.") + } + } + bf_msg("Creating points\n") if (is.null(points)) { # Create points at all cell centers in the rectangular raster @@ -99,6 +128,13 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) { points <- points[sv, , drop = FALSE] } + + + + + + bf_msg("Preparing points\n") + # Initialize array (All FALSE) to hold betweenness between <- array(data = FALSE, dim = c(n_active(bf), n_active(bf), nrow(points)), @@ -139,24 +175,70 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) { # They represent stop overs or seasonal residence, not migratory movement pairs <- pairs[pairs$from != pairs$to, , drop = FALSE] - # Lines connecting pairs of points - lines <- vector(mode = "list", length = nrow(pairs)) - for (i in seq_along(lines)) { - lines[[i]] <- sf::st_linestring(a_coords[c(pairs$from[i], pairs$to[i]), ]) - } - # Convert to simple features column and add CRS - lines_sfc <- sf::st_sfc(lines, crs = "EPSG:4326") - - # Determine if each location is between each possible pair of model locations - # based on whether it's within the `radius` distance (along a great circle) - # to each of the great circle lines connecting the pairs. - within_dist <- sf::st_is_within_distance(lines_sfc, points_sph, - dist = units::set_units(radius, "m")) - for (i in seq_len(nrow(pairs))) { - between[pairs$from[i], pairs$to[i], within_dist[[i]]] <- TRUE - between[pairs$to[i], pairs$from[i], within_dist[[i]]] <- TRUE + + + # Drop pairs that aren't ever connected + if (skip_unconnected) { + # Create matrix indicating which active cells are connected to + # each other via a non-zero marginal at any timestep + # With sparse models this will eliminate a lot of connections + # With non-sparse models it will still eliminate some + # due to dynamic masking. + ever_connected <- matrix(FALSE, n_active(bf), n_active(bf)) + dm <- get_dynamic_mask(bf) + mi <- bf$marginals$index + mi <- mi[mi$direction == "forward", ] + + for (i in seq_len(nrow(mi))) { + from_dm <- dm[, mi$from[i]] + to_dm <- dm[, mi$to[i]] + marg <- get_marginal(bf, mi$marginal[i]) + ever_connected[from_dm, to_dm] <- as.matrix(marg != 0) + } + ever_connected <- ever_connected | t(ever_connected) # backwards counts + + connected_pairs <- data.frame(from = row(ever_connected)[ever_connected], + to = col(ever_connected)[ever_connected]) + connected_pairs$id <- with(connected_pairs, paste(from, "-", to)) + + + pairs <- pairs[pairs$id %in% connected_pairs$id, , drop = FALSE] + } + # To avoid + + # Add batch number + pairs$batch <- rep(seq_len(nrow(pairs)), + each = batch_size, + length.out = nrow(pairs)) + n_batch <- max(pairs$batch) + + bf_msg("Processing points in ", n_batch, " batches \n") + for (batch in seq_len(n_batch)) { + bf_msg(round(batch / n_batch * 100, 2), "%\n") + b_pairs <- pairs[pairs$batch == batch, , drop = FALSE] + # Lines connecting pairs of points + lines <- vector(mode = "list", length = nrow(b_pairs)) + for (i in seq_along(lines)) { + lines[[i]] <- + sf::st_linestring(a_coords[c(b_pairs$from[i], b_pairs$to[i]), ]) + } + + # Convert to simple features column and add CRS + lines_sfc <- sf::st_sfc(lines, crs = "EPSG:4326") + + # Determine if each location is between each possible pair of model + # locations based on whether it's within the `radius` distance (along a + # great circle) to each of the great circle lines connecting the pairs. + within_dist <- + sf::st_is_within_distance(lines_sfc, points_sph, + dist = units::set_units(radius, "m")) + for (i in seq_len(nrow(b_pairs))) { + between[b_pairs$from[i], b_pairs$to[i], within_dist[[i]]] <- TRUE + between[b_pairs$to[i], b_pairs$from[i], within_dist[[i]]] <- TRUE + } + } # End batch loop if (FALSE) { # visualize - for coding or debugging @@ -172,7 +254,5 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) { col = "green", add = TRUE) } - - - return(list(between = between, points = points)) + return(list(between = between, points = points, radius = radius)) } diff --git a/R/plot_flux.R b/R/plot_flux.R index 4c66fc59..b9a4d356 100644 --- a/R/plot_flux.R +++ b/R/plot_flux.R @@ -1,12 +1,13 @@ -#' Plot net bird movement +#' Plot bird flux #' -#' @param flux A flux object as created by [calc_flux(format = "dataframe")][calc_flux] +#' @param flux A flux object as created by +#' [calc_flux(format = "dataframe")][calc_flux] #' @param bf A BirdFlow object #' @param subset A subset of the transitions in `flux` to plot, can be #' a logical vector of the same length as the number of transitions in `flux`; #' a numeric index of transitions in `flux`, or a subset of the transition names #' in `flux`. -#' @param limits Two numbers representing the range in movement values to +#' @param limits Two numbers representing the range in flux values to #' display. Values outside of this range will be truncated to the range. With #' the default of `NULL` the entire range is plotted. #' @param dynamic_scale If `TRUE` then the range of the data in each @@ -17,10 +18,10 @@ #' to skip plotting the coastline. #' @param coast_color The color used to plot the coastline, or `NULL` to skip #' plotting the coastline. -#' @param gradient_colors The colors palette used to represent the movement +#' @param gradient_colors The colors palette used to represent the flux #' intensity. #' @param title The plot title -#' @param value_label The label for the net movement values. +#' @param value_label The label for the flux values. #' @return `plot_flux` returns a **ggplot2** object. It can be displayed with #' `print()`. #' @export @@ -34,7 +35,7 @@ plot_flux <- function(flux, coast_color = gray(0.5), gradient_colors = NULL, title = species(bf), - value_label = "Movement") { + value_label = "Flux") { if (!is.null(limits) && dynamic_scale) { stop("Do not set dynamic_scale to TRUE while also setting limits.") @@ -46,14 +47,14 @@ plot_flux <- function(flux, if (is.null(limits)) { - limits <- range(flux$movement, na.rm = TRUE) + limits <- range(flux$flux, na.rm = TRUE) } else { stopifnot(is.numeric(limits), length(limits) == 2, all(!is.na(limits)), limits[1] < limits[2]) # Truncate to limits - flux$movement[flux$movement < limits[1]] <- limits[1] - flux$movement[flux$movement > limits[2]] <- limits[2] + flux$flux[flux$flux < limits[1]] <- limits[1] + flux$flux[flux$flux > limits[2]] <- limits[2] } @@ -101,7 +102,7 @@ plot_flux <- function(flux, p <- flux |> #dplyr::filter(transition %in% transitions[seq(4, 50, 4)]) |> ggplot2::ggplot(ggplot2::aes(x = .data$x, y = .data$y, - fill = .data$movement)) + + fill = .data$flux)) + ggplot2::geom_raster() + ggplot2::scale_fill_gradientn(colors = gradient_colors) @@ -111,11 +112,11 @@ plot_flux <- function(flux, # Multiple transitions, facet wrap on date and add species title p <- p + ggplot2::facet_wrap(ggplot2::vars(.data$date)) + - ggplot2::ggtitle(species(bf)) + ggplot2::ggtitle(title) } else { # Single transition add species title AND date subtitle p <- p + - ggplot2::ggtitle(species(bf), subtitle = flux$date[1]) + ggplot2::ggtitle(title, subtitle = flux$date[1]) } # Add coastline diff --git a/man/animate_flux.Rd b/man/animate_flux.Rd index 3613d7b1..68ee7fdf 100644 --- a/man/animate_flux.Rd +++ b/man/animate_flux.Rd @@ -7,7 +7,8 @@ animate_flux(flux, bf, title = species(bf), ...) } \arguments{ -\item{flux}{A flux object as created by \link[=calc_flux]{calc_flux(format = "dataframe")}} +\item{flux}{A flux object as created by +\link[=calc_flux]{calc_flux(format = "dataframe")}} \item{bf}{A BirdFlow object} @@ -20,7 +21,7 @@ animate_flux(flux, bf, title = species(bf), ...) a logical vector of the same length as the number of transitions in \code{flux}; a numeric index of transitions in \code{flux}, or a subset of the transition names in \code{flux}.} - \item{\code{limits}}{Two numbers representing the range in movement values to + \item{\code{limits}}{Two numbers representing the range in flux values to display. Values outside of this range will be truncated to the range. With the default of \code{NULL} the entire range is plotted.} \item{\code{dynamic_scale}}{If \code{TRUE} then the range of the data in each @@ -31,9 +32,9 @@ scale among transitions.} to skip plotting the coastline.} \item{\code{coast_color}}{The color used to plot the coastline, or \code{NULL} to skip plotting the coastline.} - \item{\code{gradient_colors}}{The colors palette used to represent the movement + \item{\code{gradient_colors}}{The colors palette used to represent the flux intensity.} - \item{\code{value_label}}{The label for the net movement values.} + \item{\code{value_label}}{The label for the flux values.} }} } \value{ diff --git a/man/calc_flux.Rd b/man/calc_flux.Rd index 6ca88848..7ab1f4cc 100644 --- a/man/calc_flux.Rd +++ b/man/calc_flux.Rd @@ -2,9 +2,17 @@ % Please edit documentation in R/calc_flux.R \name{calc_flux} \alias{calc_flux} -\title{Estimate net bird movement} +\title{Estimate bird flux} \usage{ -calc_flux(bf, points = NULL, radius = NULL, n_directions = 1, format = NULL) +calc_flux( + bf, + points = NULL, + radius = NULL, + n_directions = 1, + format = NULL, + batch_size = 1e+06, + check_radius = TRUE +) } \arguments{ \item{bf}{A BirdFlow model} @@ -23,14 +31,14 @@ or an even number.} \item{format}{The format to return the results in one of: \describe{ -\item{\code{"points"}}{Returns a list with \code{net_movement} a matrix or array of -movement, and \code{points} a data frame of either the input \code{points} or the -point locations } +\item{\code{"points"}}{Returns a list with \code{flux} a matrix or array of +flux values, and \code{points} a data frame of either the input \code{points} or the +default cell center derived points.} \item{\code{"dataframe"}}{Returns a "long" data frame with columns: \itemize{ \item \code{x} and \code{y} coordinates of the points. \item \code{transition} Transition code. -\item \code{movement} The total abundance moving through the point in the transition. +\item \code{flux} The flux at the point. See "Units" below . \item \code{date} The date associated with the transition, will be at the midpoint between timesteps. } @@ -39,14 +47,50 @@ between timesteps. \item{\code{"SpatRaster"}}{Returns a \code{terra::SpatRaster} with layers for each transition.} }} + +\item{batch_size}{controls the number of movement lines that are processed +at a time. A smaller +\code{batch_size} will conserve memory at a slight performance cost. The number +of batches will be less than or equal to \code{n_active(bf)^2 / batch_size}.} + +\item{check_radius}{If \code{TRUE} an error will be thrown if the radius +is not between the resolution and 1/4 the resolution of \code{bf}. Outside of +that range the algorithm is likely to yield distorted results. +\code{0.5 * mean(res(bf))} is the default, and recommended radius.} } \value{ See \code{format} argument. } \description{ -\code{calc_flux()} estimates the proportion of the species that passes through +\code{calc_flux()} estimates the proportion of the species that passes near a set of points during each transition in a BirdFlow model. } +\section{Units}{ + + +The total relative abundance passing through the circle centered on the point +is divided by the diameter of the circle in kilometers. The units of the +returned value is therefore roughly the proportion +(\eqn{P}) of the +species's population that is expected to pass through each km +of a line oriented perpendicular to the movement at each point: +\eqn{\frac{P}{km \cdot week}} + +Multiplying the result by the total population would yield: +\eqn{\frac{birds}{km \cdot week}} +} + +\section{Limitations}{ + + +\code{calc_flux()} makes the incorrect simplifying assumption +that birds follow the shortest (great circle) path +between the center of the the source and destination raster cells. Caution +should be used when interpreting the results especially around +major geographic features such as coasts, large lakes, mountain ranges, and +ecological system boundaries that might result in non-linear migration paths. +} + \examples{ \dontrun{ diff --git a/man/is_between.Rd b/man/is_between.Rd index ab1e3824..37e5e310 100644 --- a/man/is_between.Rd +++ b/man/is_between.Rd @@ -4,7 +4,15 @@ \alias{is_between} \title{Determine if points are between BirdFlow cells} \usage{ -is_between(bf, points = NULL, radius = NULL, n_directions = 1) +is_between( + bf, + points = NULL, + radius = NULL, + n_directions = 1, + skip_unconnected = TRUE, + batch_size = 1e+06, + check_radius = TRUE +) } \arguments{ \item{bf}{A BirdFlow model} @@ -18,6 +26,22 @@ cell centers to a buffered convex hull around the active cell centers.} \code{radius} meters (along a great circle) of the great circle line between the locations. \code{radius} defaults to half the cell size (\code{mean(res(bf))/2}).} +\item{skip_unconnected}{If \code{TRUE} then only connections that exist in \code{bf} +will be evaluated, and between matrix will erroneously indicate that +no points are between locations that are not connected. +The resulting array can still be used with the model it was built for because +those missing connections would always have zero probability.} + +\item{batch_size}{controls the number of movement lines that are processed +at a time. A smaller +\code{batch_size} will conserve memory at a slight performance cost. The number +of batches will be less than or equal to \code{n_active(bf)^2 / batch_size}.} + +\item{check_radius}{If \code{TRUE} an error will be thrown if the radius +is not between the resolution and 1/4 the resolution of \code{bf}. Outside of +that range the algorithm is likely to yield distorted results. +\code{0.5 * mean(res(bf))} is the default, and recommended radius.} + \item{n_direction}{The number of (equally spaced) directional bins to classify bearings into. Currently only \code{1} is supported.} } @@ -25,26 +49,31 @@ classify bearings into. Currently only \code{1} is supported.} A list with: \item{between}{An array with dimensions representing the "from" location, the "to" location, and the \code{points}. Cells are \code{TRUE} -if the associated point is between the associated from and to locations.} -\item{points}{A dataframe of points that define the third dimension -in between. It is identical to the input \code{points} if they are not \code{NULL}. -Otherwise it will be a data frame with columns x, y, and i corresponding to -the third dimension in \code{between}. Note \code{i} will be \code{NA} for some points that -aren't within the mask but do fall between active cells.} +if the point is between the associated from and to locations.} +\item{points}{A data,frame of points that define the third dimension +in \code{between}. It is identical to the input \code{points} if they are not \code{NULL}. +Otherwise it will be a data frame with columns \code{x}, \code{y}, and \code{i} +corresponding to the third dimension in \code{between}. +\code{i} will be \code{NA} for points that are not within the mask but +fall between active cells.} +\item{radius}{The radius of the circle in meters.} } \description{ This internal function is used to create a betweenness array with dimensions \href{n_active()}{n_active(bf)}, \href{n_active()}{n_active(bf)}, and \code{length(points)}. -The first two dimensions represent pairs of locations within the -BirdFlow model (active cells). -The third dimension represents the \code{points}. Each value in the array is -\code{TRUE} if the point is between the associated model cells. Specifically, -if the point is within \code{radius} meters (along a great circle) +The first two dimensions from and to cells of possible connections +between pairs of locations within the BirdFlow model and both +\code{n_active()} elements. +The third dimension represents \code{points} that might be between each +connection. +Cell values are \code{TRUE} if the point is between the associated model cells. +Specifically, if the point is within \code{radius} meters (along a great circle) of the great circle line connecting the cell centers. } \details{ If \code{points} and \code{radius} are \code{NULL} they default to the cell radius and the center of all cells within the BirdFlow extent that fall between -any active cells. This is usually a superset of the active cells. +any active cells. This includes all cell centers within a convex hull +(in spherical coordinates) around the active cells in \code{bf}. } \keyword{internal} diff --git a/man/plot_flux.Rd b/man/plot_flux.Rd index dfe1e05e..633395fb 100644 --- a/man/plot_flux.Rd +++ b/man/plot_flux.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot_flux.R \name{plot_flux} \alias{plot_flux} -\title{Plot net bird movement} +\title{Plot bird flux} \usage{ plot_flux( flux, @@ -14,11 +14,12 @@ plot_flux( coast_color = gray(0.5), gradient_colors = NULL, title = species(bf), - value_label = "Movement" + value_label = "Flux" ) } \arguments{ -\item{flux}{A flux object as created by \link[=calc_flux]{calc_flux(format = "dataframe")}} +\item{flux}{A flux object as created by +\link[=calc_flux]{calc_flux(format = "dataframe")}} \item{bf}{A BirdFlow object} @@ -27,7 +28,7 @@ a logical vector of the same length as the number of transitions in \code{flux}; a numeric index of transitions in \code{flux}, or a subset of the transition names in \code{flux}.} -\item{limits}{Two numbers representing the range in movement values to +\item{limits}{Two numbers representing the range in flux values to display. Values outside of this range will be truncated to the range. With the default of \code{NULL} the entire range is plotted.} @@ -42,19 +43,19 @@ to skip plotting the coastline.} \item{coast_color}{The color used to plot the coastline, or \code{NULL} to skip plotting the coastline.} -\item{gradient_colors}{The colors palette used to represent the movement +\item{gradient_colors}{The colors palette used to represent the flux intensity.} \item{title}{The plot title} -\item{value_label}{The label for the net movement values.} +\item{value_label}{The label for the flux values.} } \value{ \code{plot_flux} returns a \strong{ggplot2} object. It can be displayed with \code{print()}. } \description{ -Plot net bird movement +Plot bird flux } \examples{ diff --git a/tests/testthat/_snaps/calc_flux.md b/tests/testthat/_snaps/calc_flux.md new file mode 100644 index 00000000..dfd3ba6e --- /dev/null +++ b/tests/testthat/_snaps/calc_flux.md @@ -0,0 +1,13 @@ +# calc_flux() works without directionality + + Code + top + Output + x y transition flux date + 65 -2025000 -225000 T_01-02 3.623e-09 2021-01-07 + 66 -2025000 -225000 T_02-03 1.897e-08 2021-01-14 + 109 -1875000 -225000 T_01-02 1.061e-08 2021-01-07 + 110 -1875000 -225000 T_02-03 8.883e-08 2021-01-14 + 114 -1875000 -375000 T_02-03 1.173e-10 2021-01-14 + 149 -1725000 375000 T_01-02 2.287e-10 2021-01-07 + diff --git a/tests/testthat/_snaps/is_between.md b/tests/testthat/_snaps/is_between.md new file mode 100644 index 00000000..8104fa25 --- /dev/null +++ b/tests/testthat/_snaps/is_between.md @@ -0,0 +1,7 @@ +# is_between() works + + Code + sum(between[[1]]) + Output + [1] 14278 + diff --git a/tests/testthat/test-calc_flux.R b/tests/testthat/test-calc_flux.R new file mode 100644 index 00000000..28556174 --- /dev/null +++ b/tests/testthat/test-calc_flux.R @@ -0,0 +1,151 @@ +test_that("calc_flux() works without directionality", { + + # Sparsify and truncate to speed things up + bf <- BirdFlowModels::amewoo + bf <- truncate_birdflow(bf, start = 1, end = 5) + bf <- sparsify(bf, "conditional", .9, p_protected = 0.05) + + expect_no_error(f <- calc_flux(bf)) + + # Snapshot of first 6 non-zero movements + top <- head(f[!f$flux == 0, ], 6) + top$flux <- signif(top$flux, 4) + expect_snapshot(top) + + # Visualizations + expect_no_error(plot_flux(f, bf)) + expect_no_error(animate_flux(f, bf)) + +}) + + +test_that("Test sensativity of flux to radius", { + + testthat::skip("In depth flux radius analysis - always skipped") + + # Sparsify and truncate to speed things up + bf <- BirdFlowModels::amewoo + bf <- truncate_birdflow(bf, start = 1, end = 5) + bf <- sparsify(bf, "conditional", .9, p_protected = 0.05) + + # Set radii to calculate fluxes for + rads <- mean(res(bf)) / 2 * c(0.1, 0.25, 0.5, 1, 4, 8, 16, 32, 64) + + # Calculate fluxes + fluxes <- vector(mode = "list", length(rads)) + for (i in seq_along(rads)) { + fluxes[[i]] <- calc_flux(bf, radius = rads[i], check_radius = FALSE) + } + + + # Plot fluxes + plot_dir <- tempdir() + files <- file.path(plot_dir, + paste0("flux_", rads * 2 / 1000, ".png")) + for (i in seq_along(rads)) { + ragg::agg_png(filename = files[i], width = 6, height = 6, + res = 150, units = "in") + plot_flux(fluxes[[i]], bf, + title = paste0("Radius: ", rads[i] / 1000, " km")) |> + print() + + dev.off() + } + + # Assess how total flux changes with radius + + + # Total flux + # With larger radius more response points are added + tf <- sapply(fluxes, function(x) sum(x$flux)) + + # Create uniform set of points by filtering to just the points that are + # in the smallest radius flux + add_id <- function(x) { + x$id <- paste0(x$x, "-", x$y) + x + } + fluxes <- lapply(fluxes, add_id) + ids <- unique(fluxes[[1]]$id) + fluxes2 <- lapply(fluxes, function(x) x[x$id %in% ids, , drop = FALSE]) + + # total flux by radius with fixed points + tf2 <- sapply(fluxes2, function(x) sum(x$flux)) + + # Prep for plotting + df <- data.frame(radius = rads, expanding = tf, fixed = tf2) + ldf <- tidyr::pivot_longer(df, cols = c(expanding, fixed), + values_to = "flux", + names_to = "points") + ldf$diameter <- 2 * ldf$radius / 1000 # diameter in km + dim <- mean(res(bf)) / 1000 + dim_col <- rgb(0, 0, 0, .5) + e <- ext(bf) / 1000 + width <- e[2] - e[1] + height <- e[4] - e[3] + + # Make plot of how the total flux changes with diameter + p <- ggplot2::ggplot(data = ldf, ggplot2::aes(x = .data$diameter, + y = .data$flux, + color = .data$points)) + + ggplot2::geom_line() + ggplot2::geom_point() + + ggplot2::geom_vline(xintercept = c(dim, width, height), color = dim_col) + + ggplot2::geom_text(ggplot2::aes(x = dim, + label = paste0("\nCell dimension (", + round(dim), + " km)"), + y = 0.035), + color = dim_col, angle = 90) + + ggplot2::geom_text(ggplot2::aes(x = height, + label = paste0("\nExtent height (", + round(height), + " km)"), + y = 0.043), + color = dim_col, angle = 90) + + ggplot2::geom_text(ggplot2::aes(x = width, + label = paste0("\nExtent width (", + round(width), + " km)"), + y = 0.044), + color = dim_col, angle = 90) + + ggplot2::ggtitle("Total Flux") + + ggplot2::scale_x_log10() + + ragg::agg_png(filename = file.path(plot_dir, "total_flux_diam.png"), + width = 6, height = 6, res = 150, units = "in") + + print(p) + dev.off() + + # I think there are a few things going on here: + # 1. As radius approaches zero the flux increases. I suspect this is because + # the response points are on the same grid as the starting and ending + # locations. As the radius approaches zero you multiply the flux by a number + # that approaches infinity but our points are positioned such that the + # flux value doesn't fall off as fast as we would expect given say, random + # points. + # 2. As radius increases substantially beyond the cell size than you can pick + # up points that are beyond the movement line. This means it begins + # to act more like a squared function than a linear function. + # At extremes if the radius is much larger than the movement distances + # than the lines we are intersecting begin to look a lot more like points + # and the relationship between radius and abundance that intersects the + # circle approaches the square of the radius. + # 3. (2) begins to break down as the diameter approaches the extent width, + # at that point you've already captured most of the movent at every point + # and a larger radius just means you divide that total by a bigger number. + # + # Two conclusions: + # 1. as implemented a diameter equal to the cell size is a sweet spot, and + # keeping the diameter within the range between > 0.5 and < 2 cell widths + # is a good idea. + # 2. If we want the algorithm to work with larger diameters it would make + # sense to not allow points beyond the end of the line to be counted + # (even if they are within the radius of the line). I suspect this + # would slow things down unless I implemented it in C++ as I don't think + # I can do it with any kind of vectorization in R. + + + + +}) diff --git a/tests/testthat/test-is_between.R b/tests/testthat/test-is_between.R new file mode 100644 index 00000000..f6b70feb --- /dev/null +++ b/tests/testthat/test-is_between.R @@ -0,0 +1,13 @@ +test_that("is_between() works", { + + # Sparsifying and truncating to speed things up + bf <- BirdFlowModels::amewoo + bf <- truncate_birdflow(bf, start = 1, end = 5) + bf <- sparsify(bf, "conditional", .9, p_protected = 0.05) + + between <- is_between(bf) + + expect_snapshot(sum(between[[1]])) + + +})