Skip to content

Commit

Permalink
Merge pull request #183 from birdflow-science/population
Browse files Browse the repository at this point in the history
Add get_population() and callaghan_abundance (dataset)
  • Loading branch information
ethanplunkett committed May 15, 2024
2 parents c99704a + 7dd09b6 commit 6850291
Show file tree
Hide file tree
Showing 11 changed files with 9,931 additions and 3 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@ BugReports: https://github.com/birdflow-science/BirdFlowR/issues
Language: en-US
Depends:
R (>= 3.5)
LazyData: true
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export(get_marginal)
export(get_mask)
export(get_metadata)
export(get_naturalearth)
export(get_population)
export(get_states)
export(get_transition)
export(great_circle_distances)
Expand Down
10 changes: 8 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# Birdflow 0.1.0.9058
# BirdFlowR 0.1.0.9059
2024-05-15

* Added `callaghan_abundance` dataset on species populations.
* Added `get_population()` function.

# BirdFlowR 0.1.0.9058
2024-05-07

* Fixed bug that prevented knitting example collection index without passed
parameters.

# Birdflow 0.1.0.9057
# BirdFlowR 0.1.0.9057
2024-04-17

* `import_birdflow()` now works with preprocessed hdf5 files. See #177
Expand Down
90 changes: 90 additions & 0 deletions R/get_population.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

#' Abundance Estimates from Callaghan et. al. 2021
#'
#' The Callaghan et. al. (2021) population estimates and 95% confidence
#' interval for 9700 species.
#'
#' @format A data frame with 9,700 rows and 11 columns:
#' \describe{
#' \item{`common_name`}{Common name}
#' \item{`scientific_name`}{Scientific name}
#' \item{`order`, `family`}{Taxonomic order and family}
#' \item{`lower_ci`}{The lower 95% confidence interval}
#' \item{`abundance_estimate`}{Estimated population size}
#' \item{`upper_ci`}{The upper 95% confidence interval}
#' \item{`range_adjusted`}{`TRUE` indicates that the area of the species
#' range from a source other than the eBird pixels was used to adjust the
#' abundance estimate. If an independent range area was not available than
#' the area of the eBird cells was used. See Callaghan et. al. 2021"}
#' \item{`training_species`}{`TRUE` if the species was used to train the
#' abundance model (Callaghan et. al. 2021)}
#' \item{`species_code`}{The eBird species code derived by joining
#' `scientific_name` to the `ebird_taxonomy` in the **auk** package.
#' `NA` indicates that the scientific name from Callaghan et. al. did not
#' match any scientific names in the eBird taxonomy table.}
#' \item{`in_ebird_2022`}{Is the `species_code` in `ebirds::ebirdst_runs` from
#' the 2022 version of the eBird package. See: [ebirdst::ebirdst_version()]
#' and [ebirdst::ebirdst_runs]. Starting with the 2022 version year only
#' a subset of the species were fit by eBird in each year.}
#' }
#' @source
#' Callaghan, Corey T., Shinichi Nakagawa, and William K. Cornwell. "Global
#' abundance estimates for 9,700 bird species." Proceedings of the National
#' Academy of Sciences 118.21 (2021): e2023170118.
#' <https://doi.org/10.1073/pnas.2023170118>
#'
"callaghan_abundance"



#' Return the population estimate for a Bird Flow model or species
#'
#' @param x Either a single BirdFlow model, or
#' one or more species eBird codes, scientific names, or common names.
#' @param what Indicates what information will be returned. Options are:
#' \describe{
#' \item{`"df"`}{A data frame with relevant rows from [callaghan_abundance]}
#' \item{`"abundance"`}{The abundance estimate.}
#' \item{`"lower_ci"`}{The lower 95 percent confidence interval}
#' \item{`"upper_ci"`}{The upper 95 percent confidence interval}
#' }
#' @return See `what`. In all cases the values correspond to `x` and will be
#' `NA` when `x` cannot be cross walked to [`callaghan_abundance`].
#' @references
#' Callaghan, Corey T., Shinichi Nakagawa, and William K. Cornwell. "Global
#' abundance estimates for 9,700 bird species." Proceedings of the National
#' Academy of Sciences 118.21 (2021): e2023170118.
#' <https://doi.org/10.1073/pnas.2023170118>
#'
#' @export
#' @examples
#' bf <- BirdFlowModels::amewoo
#' get_population(bf)
#' get_population("American Black Duck")
#' get_population("amewoo")
get_population <- function(x, what = "abundance") {
stopifnot(what %in% c("abundance", "lower_ci", "upper_ci", "df"))

if (inherits(x, "BirdFlow"))
x <- species(x, what = "code")

if (!inherits(x, "character"))
stop("x should be a BirdFlow model or a character species name or code")

p <- BirdFlowR::callaghan_abundance

c1 <- p$species_code[match(x, p$common_name)]
c2 <- p$species_code[match(x, p$scientific_name)]
c3 <- p$species_code[match(x, p$species_code)]
codes <- dplyr::coalesce(c1, c2, c3)

# select the codes
mv <- match(codes, p$species_code, incomparables = NA)
p <- p[mv, , drop = FALSE]
return(switch(what,
df = p,
abundance = p$abundance_estimate,
lower_ci = p$lower_ci,
upper_ci = p$upper_ci,
stop("Unrecognized what argument.")))
}
40 changes: 40 additions & 0 deletions data-raw/callaghan_abundance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Format bird data for package

# Data from

# Callaghan, Corey T., Shinichi Nakagawa, and William K. Cornwell. "Global
# abundance estimates for 9,700 bird species." Proceedings of the National
# Academy of Sciences 118.21 (2021): e2023170118.

# https://www.pnas.org/doi/full/10.1073/pnas.2023170118

# Supplimental data downloaded from:
# https://www.pnas.org/doi/suppl/10.1073/pnas.2023170118/suppl_file/pnas.2023170118.sd01.xlsx # nolint
# and saved from excel as .csv

# Read abundance data from paper
a <- readr::read_csv("data-raw/pnas.2023170118.sd01.csv",
col_types = readr::cols()) |> as.data.frame()

# Clean up names
n <- names(a) |> tolower()
n <- gsub("[[:blank:]]", "_", n)
n <- gsub("95%_", "", n)
names(a) <- n

# Note with the 2022 version year ebirdst only fit about half the species
# focusing mostly on North American species.

# Get eBird codes from auk
t <- auk::ebird_taxonomy
mv <- match(a$scientific_name, t$scientific_name)
a$species_code <- t$species_code[mv]

# Determine which ones are in the current eBird version
ebird_ver <- ebirdst::ebirdst_version()$version_year
r <- ebirdst::ebirdst_runs
a[[paste0("in_ebird_", ebird_ver)]] <- a$species_code %in% r$species_code

callaghan_abundance <- a

usethis::use_data(callaghan_abundance, overwrite = TRUE)
Loading

0 comments on commit 6850291

Please sign in to comment.