Merge pull request #183 from birdflow-science/population

Add get_population() and callaghan_abundance (dataset)
birdflow-science · May 15, 2024 · 6850291 · 6850291
2 parents c99704a + 7dd09b6
commit 6850291
Show file tree

Hide file tree

Showing 11 changed files with 9,931 additions and 3 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -57,3 +57,4 @@ BugReports: https://github.com/birdflow-science/BirdFlowR/issues
 Language: en-US
 Depends: 
     R (>= 3.5)
+LazyData: true
diff --git a/NAMESPACE b/NAMESPACE
@@ -44,6 +44,7 @@ export(get_marginal)
 export(get_mask)
 export(get_metadata)
 export(get_naturalearth)
+export(get_population)
 export(get_states)
 export(get_transition)
 export(great_circle_distances)

diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,16 @@
-# Birdflow 0.1.0.9058
+# BirdFlowR 0.1.0.9059
+2024-05-15
+
+* Added `callaghan_abundance` dataset on species populations.
+* Added `get_population()`  function.
+
+# BirdFlowR 0.1.0.9058
 2024-05-07
 
 * Fixed bug that prevented knitting example collection index without passed
 parameters.
 
-# Birdflow 0.1.0.9057
+# BirdFlowR 0.1.0.9057
 2024-04-17
 
 * `import_birdflow()` now works with preprocessed hdf5 files. See #177

diff --git a/R/get_population.R b/R/get_population.R
@@ -0,0 +1,90 @@
+
+#' Abundance Estimates from Callaghan et. al. 2021
+#'
+#' The Callaghan et. al. (2021) population estimates and 95% confidence
+#' interval for 9700 species.
+#'
+#' @format A data frame with 9,700 rows and 11 columns:
+#' \describe{
+#'   \item{`common_name`}{Common name}
+#'   \item{`scientific_name`}{Scientific name}
+#'   \item{`order`, `family`}{Taxonomic order and family}
+#'   \item{`lower_ci`}{The lower 95% confidence interval}
+#'   \item{`abundance_estimate`}{Estimated population size}
+#'   \item{`upper_ci`}{The upper 95% confidence interval}
+#'   \item{`range_adjusted`}{`TRUE` indicates that the area of the species
+#'   range from a source other than the eBird pixels was used to adjust the
+#'   abundance estimate.  If an independent range area was not available than
+#'   the area of the eBird cells was used.  See Callaghan et. al. 2021"}
+#'   \item{`training_species`}{`TRUE` if the species was used to train the
+#'   abundance model (Callaghan et. al. 2021)}
+#'   \item{`species_code`}{The eBird species code derived by joining
+#'   `scientific_name` to the `ebird_taxonomy` in the **auk** package.
+#'   `NA` indicates that the scientific name from Callaghan et. al. did not
+#'   match any scientific names in the eBird taxonomy table.}
+#'   \item{`in_ebird_2022`}{Is the `species_code` in `ebirds::ebirdst_runs` from
+#'   the 2022 version of the eBird package. See: [ebirdst::ebirdst_version()]
+#'   and [ebirdst::ebirdst_runs]. Starting with the 2022 version year only
+#'   a subset of the species were fit by eBird in each year.}
+#' }
+#' @source
+#' Callaghan, Corey T., Shinichi Nakagawa, and William K. Cornwell. "Global
+#'   abundance estimates for 9,700 bird species." Proceedings of the National
+#'   Academy of Sciences 118.21 (2021): e2023170118.
+#' <https://doi.org/10.1073/pnas.2023170118>
+#'
+"callaghan_abundance"
+
+
+
+#' Return the population estimate for a Bird Flow model or species
+#'
+#' @param x Either a single BirdFlow model, or
+#' one or more species eBird codes, scientific names, or common names.
+#' @param what Indicates what information will be returned. Options are:
+#' \describe{
+#'   \item{`"df"`}{A data frame with relevant rows from [callaghan_abundance]}
+#'   \item{`"abundance"`}{The abundance estimate.}
+#'   \item{`"lower_ci"`}{The lower 95 percent confidence interval}
+#'   \item{`"upper_ci"`}{The upper 95 percent confidence interval}
+#' }
+#' @return See `what`. In all cases the values correspond to `x` and will be
+#' `NA` when `x` cannot be cross walked to [`callaghan_abundance`].
+#' @references
+#'  Callaghan, Corey T., Shinichi Nakagawa, and William K. Cornwell. "Global
+#'   abundance estimates for 9,700 bird species." Proceedings of the National
+#'   Academy of Sciences 118.21 (2021): e2023170118.
+#'  <https://doi.org/10.1073/pnas.2023170118>
+#'
+#' @export
+#' @examples
+#' bf <- BirdFlowModels::amewoo
+#' get_population(bf)
+#' get_population("American Black Duck")
+#' get_population("amewoo")
+get_population <- function(x, what = "abundance") {
+  stopifnot(what %in% c("abundance", "lower_ci", "upper_ci", "df"))
+
+  if (inherits(x, "BirdFlow"))
+    x <- species(x, what = "code")
+
+  if (!inherits(x, "character"))
+    stop("x should be a BirdFlow model or a character species name or code")
+
+  p <- BirdFlowR::callaghan_abundance
+
+  c1 <- p$species_code[match(x, p$common_name)]
+  c2 <- p$species_code[match(x, p$scientific_name)]
+  c3 <- p$species_code[match(x, p$species_code)]
+  codes <- dplyr::coalesce(c1, c2, c3)
+
+  # select the codes
+  mv <- match(codes, p$species_code, incomparables = NA)
+  p <- p[mv, , drop = FALSE]
+  return(switch(what,
+                df = p,
+                abundance = p$abundance_estimate,
+                lower_ci = p$lower_ci,
+                upper_ci = p$upper_ci,
+                stop("Unrecognized what argument.")))
+}
diff --git a/data-raw/callaghan_abundance.R b/data-raw/callaghan_abundance.R
@@ -0,0 +1,40 @@
+# Format bird data for package
+
+# Data from
+
+# Callaghan, Corey T., Shinichi Nakagawa, and William K. Cornwell. "Global
+#   abundance estimates for 9,700 bird species." Proceedings of the National
+#   Academy of Sciences 118.21 (2021): e2023170118.
+
+# https://www.pnas.org/doi/full/10.1073/pnas.2023170118
+
+# Supplimental data downloaded from:
+# https://www.pnas.org/doi/suppl/10.1073/pnas.2023170118/suppl_file/pnas.2023170118.sd01.xlsx  # nolint
+# and saved from excel as .csv
+
+# Read abundance data from paper
+a <- readr::read_csv("data-raw/pnas.2023170118.sd01.csv",
+                     col_types = readr::cols()) |> as.data.frame()
+
+# Clean up names
+n <- names(a) |> tolower()
+n <- gsub("[[:blank:]]", "_", n)
+n <- gsub("95%_", "", n)
+names(a) <- n
+
+# Note with the 2022 version year ebirdst only fit about half the species
+# focusing mostly on North American species.
+
+# Get eBird codes from auk
+t <- auk::ebird_taxonomy
+mv <- match(a$scientific_name, t$scientific_name)
+a$species_code <- t$species_code[mv]
+
+# Determine which ones are in the current eBird version
+ebird_ver <- ebirdst::ebirdst_version()$version_year
+r <- ebirdst::ebirdst_runs
+a[[paste0("in_ebird_", ebird_ver)]] <- a$species_code %in% r$species_code
+
+callaghan_abundance <- a
+
+usethis::use_data(callaghan_abundance, overwrite = TRUE)