Skip to content

Commit

Permalink
Merge pull request #178 from birdflow-science/123-flux-memory
Browse files Browse the repository at this point in the history
Update calc_flux()
  • Loading branch information
ethanplunkett committed Mar 29, 2024
2 parents 8b4301a + f850802 commit 37dfdd3
Show file tree
Hide file tree
Showing 14 changed files with 471 additions and 89 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BirdFlowR
Title: Predict and Visualize Bird Movement
Version: 0.1.0.9054
Version: 0.1.0.9055
Authors@R:
c(person("Ethan", "Plunkett", email = "plunkett@umass.edu", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-4405-2251")),
Expand Down
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# BirdFlowR 0.1.0.9055
2023-03-29
## Flux II
* Flux values are now divided by the radius to standardize units at
at P/km/week/ where P is the proportion of the population.
* Add tests for `calc_flux()` and `is_between()`
* Add analysis of flux radius parameter's affect on flux values. It's in
`"test-calc_flux.R"`.
* Add limit to radius argument in `calc_flux()` and ability to bypass it with
`check_radius` argument.
* Modified `is_between()` to only calculate the result for transitions that
are non-zero (at any timestep) in `bf`. This provides a slight performance
benefit for standard models and a large one for sparse models.
* `is_betweeen()` now processes connection lines in batches, controlled by
`batch_size` argument to it and `calc_flux()`. This is for memory management.


# BirdFlowR 0.1.0.9054
## Flux I
Expand Down
44 changes: 35 additions & 9 deletions R/calc_flux.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,30 @@
#' Estimate net bird movement
#' Estimate bird flux
#'
#' `calc_flux()` estimates the proportion of the species that passes through
#' `calc_flux()` estimates the proportion of the species that passes near
#' a set of points during each transition in a BirdFlow model.
#'
#' @section Units:
#'
#' The total relative abundance passing through the circle centered on the point
#' is divided by the diameter of the circle in kilometers. The units of the
#' returned value is therefore roughly the proportion
#' (\eqn{P}) of the
#' species's population that is expected to pass through each km
#' of a line oriented perpendicular to the movement at each point:
#' \eqn{\frac{P}{km \cdot week}}
#'
#' Multiplying the result by the total population would yield:
#' \eqn{\frac{birds}{km \cdot week}}
#'
#' @section Limitations:
#'
#' `calc_flux()` makes the incorrect simplifying assumption
#' that birds follow the shortest (great circle) path
#' between the center of the the source and destination raster cells. Caution
#' should be used when interpreting the results especially around
#' major geographic features such as coasts, large lakes, mountain ranges, and
#' ecological system boundaries that might result in non-linear migration paths.
#'
#' @param bf A BirdFlow model
#' @param points A set of points to calculate movement through. If `points` is
#' `NULL` they will default to the BirdFlow model cells that are either active
Expand All @@ -15,20 +37,21 @@
#' or an even number.
#' @param format The format to return the results in one of:
#' \describe{
#' \item{`"points"`}{Returns a list with `net_movement` a matrix or array of
#' movement, and `points` a data frame of either the input `points` or the
#' point locations }
#' \item{`"points"`}{Returns a list with `flux` a matrix or array of
#' flux values, and `points` a data frame of either the input `points` or the
#' default cell center derived points.}
#' \item{`"dataframe"`}{Returns a "long" data frame with columns:
#' * `x` and `y` coordinates of the points.
#' * `transition` Transition code.
#' * `movement` The total abundance moving through the point in the transition.
#' * `flux` The flux at the point. See "Units" below .
#' * `date` The date associated with the transition, will be at the midpoint
#' between timesteps.
#'
#' }
#' \item{`"SpatRaster"`}{Returns a `terra::SpatRaster` with layers for each
#' transition.}
#'}
#' @inheritParams is_between
#'
#' @return See `format` argument.
#' @export
Expand All @@ -45,7 +68,7 @@
#' }
#'
calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1,
format = NULL) {
format = NULL, batch_size = 1e6, check_radius = TRUE) {

if (n_directions != 1)
stop("Only one directional flux is supported at the moment.")
Expand All @@ -65,6 +88,7 @@ calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1,
result <- is_between(bf, points, radius, n_directions)
between <- result$between
points <- result$points
radius_km <- result$radius / 1000

timesteps <- lookup_timestep_sequence(bf)
transitions <- lookup_transitions(bf)
Expand Down Expand Up @@ -96,9 +120,11 @@ calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1,
}
}

# Standardize to P of population to pass through KM of transect in a week
net_movement <- net_movement / (radius_km * 2)

if (format == "points") {
return(list(net_movement, points))
return(list(flux = net_movement, point = points))
}

if (format == "spatraster") {
Expand All @@ -124,7 +150,7 @@ calc_flux <- function(bf, points = NULL, radius = NULL, n_directions = 1,
wide <- cbind(as.data.frame(points)[, c("x", "y")],
net_movement)
long <- tidyr::pivot_longer(wide, cols = setdiff(names(wide), c("x", "y")),
names_to = "transition", values_to = "movement")
names_to = "transition", values_to = "flux")

long$date <- as.character(lookup_date(long$transition, bf))

Expand Down
2 changes: 1 addition & 1 deletion R/get_marginal.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ get_marginal <- function(x, marginal = NULL, from = NULL) {
length(from) == 1,
from %in% seq_len(n_timesteps(x)))
if (from == n_timesteps(x)) {
if(!is_cyclical(x))
if (!is_cyclical(x))
stop("x isn't cyclical so there's no marginal with from = ", from)
to <- 1
} else {
Expand Down
150 changes: 115 additions & 35 deletions R/is_between.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@ if (FALSE) {
#'
#' This internal function is used to create a betweenness array with dimensions
#' [n_active(bf)](n_active()), [n_active(bf)](n_active()), and `length(points)`.
#' The first two dimensions represent pairs of locations within the
#' BirdFlow model (active cells).
#' The third dimension represents the `points`. Each value in the array is
#' `TRUE` if the point is between the associated model cells. Specifically,
#' if the point is within `radius` meters (along a great circle)
#' The first two dimensions from and to cells of possible connections
#' between pairs of locations within the BirdFlow model and both
#' `n_active()` elements.
#' The third dimension represents `points` that might be between each
#' connection.
#' Cell values are `TRUE` if the point is between the associated model cells.
#' Specifically, if the point is within `radius` meters (along a great circle)
#' of the great circle line connecting the cell centers.
#'
#' If `points` and `radius` are `NULL` they default to the cell radius and the
#' center of all cells within the BirdFlow extent that fall between
#' any active cells. This is usually a superset of the active cells.
#' any active cells. This includes all cell centers within a convex hull
#' (in spherical coordinates) around the active cells in `bf`.
#'
#'
#' @param bf A BirdFlow model
#' @param points The points to evaluate betweenness on. If NULL the cell
Expand All @@ -31,19 +35,35 @@ if (FALSE) {
#' locations. `radius` defaults to half the cell size (`mean(res(bf))/2`).
#' @param n_direction The number of (equally spaced) directional bins to
#' classify bearings into. Currently only `1` is supported.
#'
#' @param skip_unconnected If `TRUE` then only connections that exist in `bf`
#' will be evaluated, and between matrix will erroneously indicate that
#' no points are between locations that are not connected.
#' The resulting array can still be used with the model it was built for because
#' those missing connections would always have zero probability.
#' @param batch_size controls the number of movement lines that are processed
#' at a time. A smaller
#' `batch_size` will conserve memory at a slight performance cost. The number
#' of batches will be less than or equal to `n_active(bf)^2 / batch_size`.
#' @param check_radius If `TRUE` an error will be thrown if the radius
#' is not between the resolution and 1/4 the resolution of `bf`. Outside of
#' that range the algorithm is likely to yield distorted results.
#' `0.5 * mean(res(bf))` is the default, and recommended radius.
#' @return A list with:
#' \item{between}{An array with dimensions representing the
#' "from" location, the "to" location, and the `points`. Cells are `TRUE`
#' if the associated point is between the associated from and to locations.}
#' \item{points}{A dataframe of points that define the third dimension
#' in between. It is identical to the input `points` if they are not `NULL`.
#' Otherwise it will be a data frame with columns x, y, and i corresponding to
#' the third dimension in `between`. Note `i` will be `NA` for some points that
#' aren't within the mask but do fall between active cells.}
#' if the point is between the associated from and to locations.}
#' \item{points}{A data,frame of points that define the third dimension
#' in `between`. It is identical to the input `points` if they are not `NULL`.
#' Otherwise it will be a data frame with columns `x`, `y`, and `i`
#' corresponding to the third dimension in `between`.
#' `i` will be `NA` for points that are not within the mask but
#' fall between active cells.}
#' \item{radius}{The radius of the circle in meters.}
#' @keywords internal
is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) {

is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1,
skip_unconnected = TRUE, batch_size = 1e6,
check_radius = TRUE) {
bf_msg("Generating between array.\n")
# Force use of s2
o_use_s2 <- sf::sf_use_s2()
on.exit(sf::sf_use_s2(o_use_s2))
Expand All @@ -60,8 +80,17 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) {

if (is.null(radius)) {
radius <- mean(res(bf)) / 2
}
} else if (check_radius) {
# Analysis of the effect of changing radius is in test-calc_flux.R

radius_cells <- radius / mean(res(bf)) # radius converted to cells
if (radius_cells <= 0.25 || radius_cells >= 1) {
stop("radius should be less than the resolution and more than 1/4 the ",
"resolution or the flux is likely to be biased. ",
"Set check_radius to FALSE to ignore this advice.")
}
}
bf_msg("Creating points\n")
if (is.null(points)) {

# Create points at all cell centers in the rectangular raster
Expand Down Expand Up @@ -99,6 +128,13 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) {
points <- points[sv, , drop = FALSE]
}






bf_msg("Preparing points\n")

# Initialize array (All FALSE) to hold betweenness
between <- array(data = FALSE,
dim = c(n_active(bf), n_active(bf), nrow(points)),
Expand Down Expand Up @@ -139,24 +175,70 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) {
# They represent stop overs or seasonal residence, not migratory movement
pairs <- pairs[pairs$from != pairs$to, , drop = FALSE]

# Lines connecting pairs of points
lines <- vector(mode = "list", length = nrow(pairs))
for (i in seq_along(lines)) {
lines[[i]] <- sf::st_linestring(a_coords[c(pairs$from[i], pairs$to[i]), ])
}

# Convert to simple features column and add CRS
lines_sfc <- sf::st_sfc(lines, crs = "EPSG:4326")

# Determine if each location is between each possible pair of model locations
# based on whether it's within the `radius` distance (along a great circle)
# to each of the great circle lines connecting the pairs.
within_dist <- sf::st_is_within_distance(lines_sfc, points_sph,
dist = units::set_units(radius, "m"))
for (i in seq_len(nrow(pairs))) {
between[pairs$from[i], pairs$to[i], within_dist[[i]]] <- TRUE
between[pairs$to[i], pairs$from[i], within_dist[[i]]] <- TRUE


# Drop pairs that aren't ever connected
if (skip_unconnected) {
# Create matrix indicating which active cells are connected to
# each other via a non-zero marginal at any timestep
# With sparse models this will eliminate a lot of connections
# With non-sparse models it will still eliminate some
# due to dynamic masking.
ever_connected <- matrix(FALSE, n_active(bf), n_active(bf))
dm <- get_dynamic_mask(bf)
mi <- bf$marginals$index
mi <- mi[mi$direction == "forward", ]

for (i in seq_len(nrow(mi))) {
from_dm <- dm[, mi$from[i]]
to_dm <- dm[, mi$to[i]]
marg <- get_marginal(bf, mi$marginal[i])
ever_connected[from_dm, to_dm] <- as.matrix(marg != 0)
}
ever_connected <- ever_connected | t(ever_connected) # backwards counts

connected_pairs <- data.frame(from = row(ever_connected)[ever_connected],
to = col(ever_connected)[ever_connected])
connected_pairs$id <- with(connected_pairs, paste(from, "-", to))


pairs <- pairs[pairs$id %in% connected_pairs$id, , drop = FALSE]

}
# To avoid

# Add batch number
pairs$batch <- rep(seq_len(nrow(pairs)),
each = batch_size,
length.out = nrow(pairs))
n_batch <- max(pairs$batch)

bf_msg("Processing points in ", n_batch, " batches \n")
for (batch in seq_len(n_batch)) {
bf_msg(round(batch / n_batch * 100, 2), "%\n")
b_pairs <- pairs[pairs$batch == batch, , drop = FALSE]
# Lines connecting pairs of points
lines <- vector(mode = "list", length = nrow(b_pairs))
for (i in seq_along(lines)) {
lines[[i]] <-
sf::st_linestring(a_coords[c(b_pairs$from[i], b_pairs$to[i]), ])
}

# Convert to simple features column and add CRS
lines_sfc <- sf::st_sfc(lines, crs = "EPSG:4326")

# Determine if each location is between each possible pair of model
# locations based on whether it's within the `radius` distance (along a
# great circle) to each of the great circle lines connecting the pairs.
within_dist <-
sf::st_is_within_distance(lines_sfc, points_sph,
dist = units::set_units(radius, "m"))
for (i in seq_len(nrow(b_pairs))) {
between[b_pairs$from[i], b_pairs$to[i], within_dist[[i]]] <- TRUE
between[b_pairs$to[i], b_pairs$from[i], within_dist[[i]]] <- TRUE
}
} # End batch loop

if (FALSE) {
# visualize - for coding or debugging
Expand All @@ -172,7 +254,5 @@ is_between <- function(bf, points = NULL, radius = NULL, n_directions = 1) {
col = "green", add = TRUE)
}



return(list(between = between, points = points))
return(list(between = between, points = points, radius = radius))
}
Loading

0 comments on commit 37dfdd3

Please sign in to comment.