Skip to content

Commit

Permalink
Merge pull request #38 from kyle-messier/refactor-process
Browse files Browse the repository at this point in the history
0.1.5
  • Loading branch information
sigmafelix authored Mar 16, 2024
2 parents 1c18da4 + c304e6a commit 625f3dc
Show file tree
Hide file tree
Showing 26 changed files with 220 additions and 135 deletions.
10 changes: 9 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,12 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^.lintr
^.lintr
tools
.github
pkgdown
\*.Rproj
output
manuscript
input
tests
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: amadeus
Title: AMADEUS: A Mechanism/Machine for Data, Environments, and User Setup
Version: 0.1.4
Version: 0.1.5
Authors@R: c(
person("Kyle", "Messier", , "kyle.messier@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9508-9623")),
person("Mitchell", "Manware", role = c("aut", "ctb"), comment = c(ORCID = "0009-0003-6440-6106")),
Expand All @@ -21,5 +21,5 @@ RoxygenNote: 7.3.1
Config/Needs/website: tidyverse/tidytemplate
Config/testhat/edition: 3
License: MIT + file LICENSE
URL: https://github.com/Spatiotemporal-Exposures-and-Toxicology/amadeus,
https: //spatiotemporal-exposures-and-toxicology.github.io/amadeus/
URL: https://github.com/kyle-messier/amadeus,
https: //kyle-messier.github.io/amadeus/
101 changes: 60 additions & 41 deletions R/calculate_covariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@
#' Default is `"site_id"`.
#' @param ... Arguments passed to each covariate calculation
#' function.
#' @note `covariate` argument value is converted to lowercase.
#' @seealso
#' - `calc_modis`: `"modis"`, `"MODIS"`
#' - `calc_koppen_geiger`: `"koppen-geiger"`, `"koeppen-geiger"`, `"koppen"`,
#' - `calc_ecoregion`: `"ecoregion"`, `"ecoregions"`
#' - `calc_temporal_dummies`: `"dummies"`
#' - `calc_hms`: `"hms"`, `"noaa"`, `"smoke"`
#' - `calc_gmted`: `"gmted"`
#' - `calc_narr`: `"narr"`, `"narr_monolevel"`, `"narr_p_levels`",
#' - [`calc_modis_par`]: `"modis"`, `"MODIS"`
#' - [`calc_koppen_geiger`]: `"koppen-geiger"`, `"koeppen-geiger"`, `"koppen"`,
#' - [`calc_ecoregion`]: `"ecoregion"`, `"ecoregions"`
#' - [`calc_temporal_dummies`]: `"dummies"`
#' - [`calc_hms`]: `"hms"`, `"noaa"`, `"smoke"`
#' - [`calc_gmted`]: `"gmted"`
#' - [`calc_narr`]: `"narr"`, `"narr_monolevel"`, `"narr_p_levels`",
#' `"plevels"`, `"monolevel"`, `"p_levels"`
#' - `calc_geos`: `"geos"`, `"geos_cf"`
#' - `calc_sedac_population`: `"population"`, `"sedac_population"`
#' - `calc_sedac_groads`: `"roads"`
#' - `calc_nlcd`: `"nlcd"`
#' - `calc_merra2`: `"merra"`, `"MERRA"`, `"merra2"`, `"MERRA2"`
#' - [`calc_geos`]: `"geos"`, `"geos_cf"`
#' - [`calc_sedac_population`]: `"population"`, `"sedac_population"`
#' - [`calc_sedac_groads`]: `"roads"`
#' - [`calc_nlcd`]: `"nlcd"`
#' - [`calc_merra2`]: `"merra"`, `"MERRA"`, `"merra2"`, `"MERRA2"`
#' @returns Calculated covariates. Mainly data.frame object.
#' @author Insang Song
#' @export
Expand Down Expand Up @@ -120,7 +121,7 @@ calc_covariates <-
#' a unique identifier field named `locs_id`
#' @param locs_id character(1). Name of unique identifier.
#' @param ... Placeholders.
#' @seealso [process_koppen_geiger()]
#' @seealso [`process_koppen_geiger`]
#' @returns a data.frame object
#' @author Insang Song
#' @importFrom terra vect
Expand Down Expand Up @@ -218,8 +219,12 @@ calc_koppen_geiger <-
#' @param locs_id character(1). Unique identifier of locations
#' @param radius numeric (non-negative) giving the
#' radius of buffer around points
#' @param max_cells integer(1). Maximum number of cells to be read at once.
#' Higher values will expedite processing, but will increase memory usage.
#' Maximum possible value is `2^31 - 1`.
#' See [`exactextractr::exact_extract`] for details.
#' @param ... Placeholders.
#' @seealso [process_nlcd()]
#' @seealso [`process_nlcd`]
#' @returns a data.frame object
#' @importFrom utils read.csv
#' @importFrom methods is
Expand All @@ -239,6 +244,7 @@ calc_nlcd <- function(from,
locs,
locs_id = "site_id",
radius = 1000,
max_cells = 1e8,
...) {
# check inputs
if (!is.numeric(radius)) {
Expand Down Expand Up @@ -273,7 +279,8 @@ calc_nlcd <- function(from,
sf::st_geometry(bufs_pol),
fun = "frac",
stack_apply = TRUE,
progress = FALSE)
progress = FALSE,
max_cells_in_memory = max_cells)
# select only the columns of interest
cfpath <- system.file("extdata", "nlcd_classes.csv", package = "amadeus")
nlcd_classes <- utils::read.csv(cfpath)
Expand Down Expand Up @@ -307,15 +314,15 @@ calc_nlcd <- function(from,
#' Calculate ecoregions covariates
#' @description
#' Extract ecoregions covariates (U.S. EPA Ecoregions Level 2/3) at point
#' locations. Returns a \code{data.frame} object containing \code{locs_id} and
#' locations. Returns a `data.frame` object containing `locs_id` and
#' binary (0 = point not in ecoregion; 1 = point in ecoregion) variables for
#' each ecoregion.
#' @param from SpatVector(1). Output of \code{process_ecoregion()}.
#' @param from SpatVector(1). Output of [`process_ecoregion`].
#' @param locs sf/SpatVector. Unique locs. Should include
#' a unique identifier field named `locs_id`
#' @param locs_id character(1). Name of unique identifier.
#' @param ... Placeholders.
#' @seealso [process_ecoregion()]
#' @seealso [`process_ecoregion`]
#' @returns a data.frame object with dummy variables and attributes of:
#' - `attr(., "ecoregion2_code")`: Ecoregion lv.2 code and key
#' - `attr(., "ecoregion3_code")`: Ecoregion lv.3 code and key
Expand Down Expand Up @@ -401,17 +408,21 @@ calc_ecoregion <-
#' @param date Date(1). date to query.
#' @param name_extracted character. Names of calculated covariates.
#' @param fun_summary function. Summary function for
#' multilayer rasters. Passed to `foo`. See [exactextractr::exact_extract]
#' multilayer rasters. Passed to `foo`. See [`exactextractr::exact_extract`]
#' for details.
#' @param max_cells integer(1). Maximum number of cells to be read at once.
#' Higher values will expedite processing, but will increase memory usage.
#' Maximum possible value is `2^31 - 1`.
#' See [`exactextractr::exact_extract`] for details.
#' @param ... Placeholders.
#' @description modis_worker operates at six MODIS/VIIRS products
#' (MOD11A1, MOD13A2, MOD06_L2, VNP46A2, MOD09GA, and MCD19A2)
#' @description The function operates at MODIS/VIIRS products
#' on a daily basis. Given that the raw hdf files are downloaded from
#' NASA, standard file names include a data retrieval date flag starting
#' with A. Leveraging that piece of information, the function will select
#' files of scope on the date of interest. Please note that this function
#' does not provide a function to filter swaths or tiles, so it is strongly
#' recommended to check and pre-filter the file names at users' discretion.
#' with letter "A". Leveraging that piece of information, the function will
#' select files of scope on the date of interest.
#' Please note that this function does not provide a function to filter
#' swaths or tiles, so it is strongly recommended to check and pre-filter
#' the file names at users' discretion.
#' @author Insang Song
#' @returns A data.frame object.
#' @importFrom terra extract
Expand All @@ -431,6 +442,7 @@ calc_modis_daily <- function(
date = NULL,
name_extracted = NULL,
fun_summary = "mean",
max_cells = 1e8,
...
) {
if (!any(methods::is(locs, "SpatVector"),
Expand Down Expand Up @@ -470,7 +482,7 @@ calc_modis_daily <- function(
force_df = TRUE,
append_cols = c(id, time),
progress = FALSE,
max_cells_in_memory = 1e7
max_cells_in_memory = max_cells
)
return(surf_at_bufs)
}
Expand Down Expand Up @@ -529,6 +541,10 @@ calc_modis_daily <- function(
#' loaded.
#' @param export_list_add character. A vector with object names to export
#' to each thread. It should be minimized to spare memory.
#' @param max_cells integer(1). Maximum number of cells to be read at once.
#' Higher values will expedite processing, but will increase memory usage.
#' Maximum possible value is `2^31 - 1`.
#' See [`exactextractr::exact_extract`] for details.
#' @param ... Arguments passed to `preprocess`.
#' @description `calc_modis_par` essentially runs [`calc_modis_daily`] function
#' in each thread (subprocess). Based on daily resolution, each day's workload
Expand All @@ -549,15 +565,16 @@ calc_modis_daily <- function(
#' `locs` here and `path` in `preprocess` functions are assumed to have a
#' standard naming convention of raw files from NASA.
#' @seealso See details for setting parallelization:
#' * [foreach::foreach]
#' * [parallelly::makeClusterPSOCK]
#' * [parallelly::availableCores]
#' * [doParallel::registerDoParallel]
#' Also, for `preprocess`, see:
#' * [process_covariates]
#' * [process_modis_merge]
#' * [process_modis_swath]
#' * [process_bluemarble]
#' * [`foreach::foreach`]
#' * [`parallelly::makeClusterPSOCK`]
#' * [`parallelly::availableCores`]
#' * [`doParallel::registerDoParallel`]
#'
#' Also, for preprocessing, see:
#' * [`process_covariates`]
#' * [`process_modis_merge`]
#' * [`process_modis_swath`]
#' * [`process_bluemarble`]
#' @importFrom foreach foreach
#' @importFrom foreach %dopar%
#' @importFrom methods is
Expand Down Expand Up @@ -585,6 +602,7 @@ calc_modis_par <-
nthreads = floor(length(parallelly::availableWorkers()) / 2),
package_list_add = NULL,
export_list_add = NULL,
max_cells = 1e8,
...
) {
if (!is.function(preprocess)) {
Expand Down Expand Up @@ -667,7 +685,8 @@ process_modis_swath, or process_bluemarble.")
date = as.character(day_to_pick),
fun_summary = fun_summary,
name_extracted = name_radius,
radius = radius[k]
radius = radius[k],
max_cells = max_cells
)
return(extracted)
}, error = function(e) {
Expand Down Expand Up @@ -798,7 +817,7 @@ calc_temporal_dummies <-
#' - `attr(result, "sedc_threshold")``: the threshold distance
#' at which emission source points are excluded beyond that
#' @note The function is originally from
#' [chopin](https://github.com/Spatiotemporal-Exposures-and-Toxicology/chopin)
#' [chopin](https://github.com/kyle-messier/chopin)
#' Distance calculation is done with terra functions internally.
#' Thus, the function internally converts sf objects in
#' \code{point_*} arguments to terra.
Expand Down Expand Up @@ -944,7 +963,7 @@ The result may not be accurate.\n",
#' @author Insang Song, Mariana Kassien
#' @returns a data.frame object
#' @note U.S. context.
#' @seealso [calc_sedc()], [process_tri()]
#' @seealso [`calc_sedc`], [`process_tri`]
#' @importFrom terra vect
#' @importFrom terra crs
#' @importFrom terra nearby
Expand Down Expand Up @@ -1018,7 +1037,7 @@ calc_tri <- function(
#' Unused but kept for compatibility.
#' @param ... Placeholders.
#' @author Insang Song, Ranadeep Daw
#' @seealso [process_nei()]
#' @seealso [`process_nei`]
#' @returns a data.frame object
#' @importFrom terra vect
#' @importFrom methods is
Expand Down Expand Up @@ -1248,7 +1267,7 @@ calc_hms <- function(
#' @param fun character(1). Function used to summarize multiple raster cells
#' within sites location buffer (Default = `mean`).
#' @author Mitchell Manware
#' @seealso [process_gmted()]
#' @seealso [`process_gmted()`]
#' @return a data.frame object
#' @importFrom terra vect
#' @importFrom terra as.data.frame
Expand Down Expand Up @@ -1366,7 +1385,7 @@ calc_gmted <- function(
#' @param fun character(1). Function used to summarize multiple raster cells
#' within sites location buffer (Default = `mean`).
#' @author Mitchell Manware
#' @seealso [process_narr()]
#' @seealso [`process_narr`]
#' @return a data.frame object
#' @importFrom terra vect
#' @importFrom terra as.data.frame
Expand Down
Loading

0 comments on commit 625f3dc

Please sign in to comment.