diff --git a/.Rbuildignore b/.Rbuildignore index eb1a7d0..666c0c9 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,5 +18,8 @@ output manuscript input tests +R/olm_functions.R +vignettes/download_functions.Rmd vignettes/epa_download.Rmd -R/olm_functions.R \ No newline at end of file +vignettes/protected_datasets.Rmd +inst/extdata/air.2m \ No newline at end of file diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index f45f795..8635290 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -29,4 +29,5 @@ jobs: run: lintr::lint_package() shell: Rscript {0} env: + FILTER_REGEX_EXCLUDE: vignettes/workflow.Rmd LINTR_ERROR_ON_LINT: true \ No newline at end of file diff --git a/.gitignore b/.gitignore index e90e453..5d0c107 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,4 @@ rsconnect/ /doc/ /Meta/ inst/doc +inst/extdata/air.2m \ No newline at end of file diff --git a/.lintr b/.lintr index ddbabee..35ea5cc 100644 --- a/.lintr +++ b/.lintr @@ -2,5 +2,6 @@ linters: linters_with_defaults( commented_code_linter = NULL ) exclusions: list( - "tests/testthat/test-download_functions.R" + "tests/testthat/test-download_functions.R", + "vignettes/workflow.Rmd" ) diff --git a/DESCRIPTION b/DESCRIPTION index feaefb5..5ba99ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,18 @@ Package: amadeus Title: A Machine for Data, Environments, and User Setup for Common Environmental and Climate Health Datasets -Version: 1.0.0 +Version: 1.0.1 Authors@R: c( - person("Mitchell", "Manware", role = c("aut", "ctb"), comment = c(ORCID = "0009-0003-6440-6106")), - person("Insang", "Song", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-8732-3256")), - person("Eva", "Marques", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-9817-6546")), - person("Mariana", "Alifa Kassien", role = c("aut", "ctb"), comment = c(ORCID = "0000-0003-2295-406X")), - person("Kyle", "Messier", role = c("aut", "cre"), email = "kyle.messier@nih.gov", comment = c(ORCID = "0000-0001-9508-9623")) + person(given = "Mitchell", family = "Manware", role = c("aut", "ctb"), comment = c(ORCID = "0009-0003-6440-6106")), + person(given = "Insang", family = "Song", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-8732-3256")), + person(given = "Eva", family = "Marques", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-9817-6546")), + person(given = "Mariana", family = "Alifa Kassien", role = c("aut", "ctb"), comment = c(ORCID = "0000-0003-2295-406X")), + person(given = "Kyle", family = "Messier", role = c("aut", "cre"), email = "kyle.messier@nih.gov", comment = c(ORCID = "0000-0001-9508-9623")) ) Maintainer: Kyle Messier -Description: Functions are designed to facilitate access to and utility with large scale, publicly available environmental data in R. The package contains functions for downloading (download_data()) raw data files from web URLs, processing (process_covariates()) the raw data files into clean spatial objects, and extracting values (calc_covariates()) from the spatial data objects at point and polygon locations. These functions call a series of source-specific functions which are tailored to each data sources/datasets particular URL structure, data format, and spatial/temporal resolution. The functions are tested, versioned, and open source and open access. +Description: Functions are designed to facilitate access to and utility with large scale, publicly available environmental data in R. The package contains functions for downloading raw data files from web URLs (download_data()), processing the raw data files into clean spatial objects (process_covariates()), and extracting values from the spatial data objects at point and polygon locations (calc_covariates()). These functions call a series of source-specific functions which are tailored to each data sources/datasets particular URL structure, data format, and spatial/temporal resolution. The functions are tested, versioned, and open source and open access. Depends: R (>= 4.1.0) Imports: dplyr, sf, sftime, stats, terra, methods, data.table, httr, rvest, exactextractr, utils, stringi, testthat (>= 3.0.0), parallelly, stars, future, future.apply, tidyr, rlang, nhdplusTools, archive, collapse, Rdpack -Suggests: covr, withr, knitr, rmarkdown, lwgeom, FNN, doRNG, devtools, stringr +Suggests: covr, withr, knitr, rmarkdown, lwgeom, FNN, doRNG, devtools, stringr, tigris RdMacros: Rdpack Encoding: UTF-8 VignetteBuilder: knitr, rmarkdown @@ -21,5 +21,5 @@ RoxygenNote: 7.3.2 Config/Needs/website: tidyverse/tidytemplate Config/testhat/edition: 3 License: MIT + file LICENSE -URL: https://github.com/NIEHS/amadeus, +URL: https://github.com/NIEHS/amadeus BugReports: https://github.com/NIEHS/amadeus/issues diff --git a/LICENSE b/LICENSE index 092dbbd..7cdec7c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 {SET}group +Copyright (c) 2024 Spatiotemporal Exposures and Toxicology Group Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/R/calculate_covariates.R b/R/calculate_covariates.R index f299c0d..d341d95 100644 --- a/R/calculate_covariates.R +++ b/R/calculate_covariates.R @@ -1146,14 +1146,14 @@ calc_temporal_dummies <- #' nc <- terra::project(nc, "EPSG:5070") #' pnt_locs <- terra::centroids(nc, inside = TRUE) #' pnt_locs <- pnt_locs[, "NAME"] -#' pnt_from <- terra::spatSample(nc, 100L) -#' pnt_from$pid <- seq(1, 100) +#' pnt_from <- terra::spatSample(nc, 10L) +#' pnt_from$pid <- seq(1, 10) #' pnt_from <- pnt_from[, "pid"] -#' pnt_from$val1 <- rgamma(100L, 1, 0.05) -#' pnt_from$val2 <- rgamma(100L, 2, 1) +#' pnt_from$val1 <- rgamma(10L, 1, 0.05) +#' pnt_from$val2 <- rgamma(10L, 2, 1) #' #' vals <- c("val1", "val2") -#' calc_sedc(pnt_locs, pnt_from, "NAME", 1e5, vals) +#' calc_sedc(pnt_locs, pnt_from, "NAME", 1e4, vals) #' @importFrom dplyr as_tibble #' @importFrom dplyr left_join #' @importFrom dplyr summarize diff --git a/R/process.R b/R/process.R index c52a8e8..7beaf8a 100644 --- a/R/process.R +++ b/R/process.R @@ -1861,39 +1861,39 @@ process_narr <- function( for (p in seq_along(data_paths_ym)) { #### import data data_year <- terra::rast(data_paths_ym[p], win = extent) - message(paste0( - "Cleaning ", - variable, - " data for ", - month.name[ - as.numeric( - substr( - gsub( - "-", - "", - terra::time(data_year)[1] - ), - 5, - 6 - ) - ) - ], - ", ", - substr( - gsub( - "-", - "", - terra::time(data_year)[1] - ), - 1, - 4 - ), - "...\n" - )) #### check for mono or pressure levels if (grepl("level", names(data_year)[1])) { #### pressure levels data message(paste0("Detected pressure levels data...\n")) + message(paste0( + "Cleaning ", + variable, + " data for ", + month.name[ + as.numeric( + substr( + gsub( + "-", + "", + terra::time(data_year)[1] + ), + 5, + 6 + ) + ) + ], + ", ", + substr( + gsub( + "-", + "", + terra::time(data_year)[1] + ), + 1, + 4 + ), + "...\n" + )) days <- sapply( strsplit( names(data_year), @@ -1941,6 +1941,22 @@ process_narr <- function( } else { #### mono level data message(paste0("Detected monolevel data...\n")) + message(paste0( + "Cleaning ", + variable, + " data for ", + substr( + gsub( + "-", + "", + terra::time(data_year)[1] + ), + 1, + 4 + ), + "...\n" + )) + names(data_year) <- paste0( variable, "_", diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib index a04cfaa..c41db9d 100644 --- a/inst/REFERENCES.bib +++ b/inst/REFERENCES.bib @@ -909,7 +909,6 @@ @misc{data_usgs2023nhd author = {{U.S. Geological Survey}}, title = {{National Hydrography Dataset (NHD) -- USGS National Map Downloadable Data Collection}}, url = {https://www.sciencebase.gov/catalog/item/4f5545cce4b018de15819ca9}, - urldate = {2024-06-24}, year = {2023} } @@ -1953,10 +1952,8 @@ @misc{data_gmao_merra-tavgU_3d_qdt_Np @misc{dewitz_national_2024, title = {National {Land} {Cover} {Database} ({NLCD}) 2019 {Products} (ver. 3.0, {February} 2024)}, - url = {https://www.sciencebase.gov/catalog/item/5f21cef582cef313ed940043}, doi = {10.5066/P9KZCM54}, abstract = {The U.S. Geological Survey (USGS), in partnership with several federal agencies, has developed and released five National Land Cover Database (NLCD) products over the past two decades: NLCD 1992, 2001, 2006, 2011, and 2016. The 2016 release saw landcover created for additional years of 2003, 2008, and 2013. These products provide spatially explicit and reliable information on the Nation’s land cover and land cover change. To continue the legacy of NLCD and further establish a long-term monitoring capability for the Nation’s land resources, the USGS has designed a new generation of NLCD products named NLCD 2019. The NLCD 2019 design aims to provide innovative, consistent, and robust methodologies for production of a multi-temporal land cover and land cover change database from 2001 to 2019 at 2–3-year intervals. Comprehensive research was conducted and resulted in developed strategies for NLCD 2019: continued integration between impervious surface and all landcover products with impervious surface being directly mapped as developed classes in the landcover, a streamlined compositing process for assembling and preprocessing based on Landsat imagery and geospatial ancillary datasets; a multi-source integrated training data development and decision-tree based land cover classifications; a temporally, spectrally, and spatially integrated land cover change analysis strategy; a hierarchical theme-based post-classification and integration protocol for generating land cover and change products; a continuous fields biophysical parameters modeling method; and an automated scripted operational system for the NLCD 2019 production. The performance of the developed strategies and methods were tested in twenty composite referenced areas throughout the conterminous U.S. An overall accuracy assessment from the 2016 publication give a 91\% overall landcover accuracy, with the developed classes also showing a 91\% accuracy in overall developed. Results from this study confirm the robustness of this comprehensive and highly automated procedure for NLCD 2019 operational mapping. Questions about the NLCD 2019 land cover product can be directed to the NLCD 2019 land cover mapping team at USGS EROS, Sioux Falls, SD (605) 594-6151 or mrlc@usgs.gov. See included spatial metadata for more details.}, - urldate = {2024-06-24}, publisher = {U.S. Geological Survey}, author = {Dewitz, J.}, month = {March}, @@ -1966,10 +1963,8 @@ @usgs.gov. @misc{dewitz_national_2023, title = {National {Land} {Cover} {Database} ({NLCD}) 2021 {Products}}, - url = {https://www.sciencebase.gov/catalog/item/647626cbd34e4e58932d9d4e}, doi = {10.5066/P9JZ7AO3}, abstract = {The U.S. Geological Survey (USGS), in partnership with several federal agencies, has now developed and released seven National Land Cover Database (NLCD) products: NLCD 1992, 2001, 2006, 2011, 2016, 2019, and 2021. Beginning with the 2016 release, land cover products were created for two-to-three-year intervals between 2001 and the most recent year. These products provide spatially explicit and reliable information on the Nation?s land cover and land cover change. NLCD continues to provide innovative, consistent, and robust methodologies for production of a multi-temporal land cover and land cover change database. The NLCD 2021 release is update based, so the Land Cover and Impervious Surface products�released in�2019 are unchanged and used directly with NLCD 2021 for change analysis though the NLCD timespan.�Science products and the change index are updated and will need to be reacquired to contain the additional 2021 change. These new products use a streamlined compositing process for assembling and preprocessing Landsat imagery and geospatial ancillary datasets; a temporally, spectrally, and spatially integrated land cover change analysis strategy; a theme-based post-classification protocol for generating land cover and change products; a continuous fields biophysical parameters modeling method; and a scripted operational system. The overall accuracy of the 2019 Level I land cover was 91\%. Results from this study confirm the robustness of this comprehensive and highly automated procedure for NLCD 2021 operational mapping (see https://doi.org/10.1080/15481603.2023.2181143 for the latest accuracy assessment publication). Questions about the NLCD 2021 land cover product can be directed to the NLCD 2021 land cover mapping team at USGS EROS, Sioux Falls, SD (605) 594-6151 or mrlc@usgs.gov. See included spatial metadata for more details. Please see�National Land Cover Database (NLCD) 2019 Products (ver. 2.0, June 2021) - ScienceBase-Catalog�for the 2019 release of NLCD which is used with the 2021 release for comparisons through the years.� Also refer to the larger NLCD Community page for all things NLCD related�National Land Cover Database (NLCD) - ScienceBase-Catalog.�}, - urldate = {2024-06-24}, publisher = {U.S. Geological Survey}, author = {Dewitz, Jon}, year = {2023}, @@ -1984,7 +1979,6 @@ @article{mesinger_north_2006 doi = {10.1175/BAMS-87-3-343}, language = {en}, number = {3}, - urldate = {2024-06-24}, journal = {Bulletin of the American Meteorological Society}, author = {Mesinger, Fedor and DiMego, Geoff and Kalnay, Eugenia and Mitchell, Kenneth and Shafran, Perry C. and Ebisuzaki, Wesley and Jović, Dušan and Woollen, Jack and Rogers, Eric and Berbery, Ernesto H. and Ek, Michael B. and Fan, Yun and Grumbine, Robert and Higgins, Wayne and Li, Hong and Lin, Ying and Manikin, Geoff and Parrish, David and Shi, Wei}, month = {March}, @@ -2058,7 +2052,6 @@ @article{keller_description_2021 GEOS‐CF model output is freely available and offers a new tool for academic researchers, air quality managers, and the public}, language = {en}, number = {4}, - urldate = {2024-06-24}, journal = {Journal of Advances in Modeling Earth Systems}, author = {Keller, Christoph A. and Knowland, K. Emma and Duncan, Bryan N. and Liu, Junhua and Anderson, Daniel C. and Das, Sampa and Lucchesi, Robert A. and Lundgren, Elizabeth W. and Nicely, Julie M. and Nielsen, Eric and Ott, Lesley E. and Saunders, Emily and Strode, Sarah A. and Wales, Pamela A. and Jacob, Daniel J. and Pawson, Steven}, month = {April}, diff --git a/man/calc_sedc.Rd b/man/calc_sedc.Rd index 9a84beb..7a73597 100644 --- a/man/calc_sedc.Rd +++ b/man/calc_sedc.Rd @@ -62,14 +62,14 @@ nc <- terra::vect(ncpath) nc <- terra::project(nc, "EPSG:5070") pnt_locs <- terra::centroids(nc, inside = TRUE) pnt_locs <- pnt_locs[, "NAME"] -pnt_from <- terra::spatSample(nc, 100L) -pnt_from$pid <- seq(1, 100) +pnt_from <- terra::spatSample(nc, 10L) +pnt_from$pid <- seq(1, 10) pnt_from <- pnt_from[, "pid"] -pnt_from$val1 <- rgamma(100L, 1, 0.05) -pnt_from$val2 <- rgamma(100L, 2, 1) +pnt_from$val1 <- rgamma(10L, 1, 0.05) +pnt_from$val2 <- rgamma(10L, 2, 1) vals <- c("val1", "val2") -calc_sedc(pnt_locs, pnt_from, "NAME", 1e5, vals) +calc_sedc(pnt_locs, pnt_from, "NAME", 1e4, vals) } \references{ \insertRef{messier2012integrating}{amadeus} diff --git a/vignettes/workflow.Rmd b/vignettes/workflow.Rmd new file mode 100644 index 0000000..01e726a --- /dev/null +++ b/vignettes/workflow.Rmd @@ -0,0 +1,119 @@ +--- +title: "amadeus workflow" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{amadeus workflow} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +date: "2024-07-29" +author: "Mitchell Manware" +--- + +```{r setup, include = FALSE} +# packages +knitr::opts_chunk$set( + collapse = TRUE, + comment = "" +) +library(amadeus) +``` + +The following vignette will demonstrate how to download, process, and calculate covariates using `amadeus` functions. +The examples will utilize air temperature at 2m height ("air.2m") data from the NOAA North American Regional Reanalysis (NARR) dataset.\insertRef{web_NARRabout} + +## Download + +Download data for years 2021 and 2022 with `download_data`. + +```{r, eval = FALSE} +download_data( + dataset_name = "narr", + variable = "air.2m", + year = c(2021, 2022), + directory_to_save = "./inst/extdata", + acknowledgement = TRUE, + download = TRUE, + remove_command = TRUE +) +``` + +Check for the downloaded files. + +```{r, eval = FALSE} +list.files("./inst/extdata/air.2m") +``` +```{r, echo = FALSE} +cat('[1] "air.2m.2021.nc" "air.2m.2022.nc"\n') +``` + +## Process + +Process data for all dates from December 28, 2021 to January 3, 2022 with `process_covariates`. + +```{r, eval = FALSE} +temp_process <- process_covariates( + covariate = "narr", + variable = "air.2m", + date = c("2021-12-28", "2022-01-03"), + path = "./inst/extdata/air.2m" +) +``` + +Check the processed `SpatRaster` object. + +```{r, eval = FALSE} +temp_process +``` +```{r, echo = FALSE} +cat( +"class : SpatRaster +dimensions : 277, 349, 7 (nrow, ncol, nlyr) +resolution : 32462.99, 32463 (x, y) +extent : -16231.49, 11313351, -16231.5, 8976020 (xmin, xmax, ymin, ymax) +coord. ref. : +proj=lcc +lat_0=50 +lon_0=-107 +lat_1=50 +lat_2=50 +x_0=5632642.22547 +y_0=4612545.65137 +datum=WGS84 +units=m +no_defs +sources : air.2m.2021.nc:air (4 layers) + air.2m.2022.nc:air (3 layers) +varnames : air (Daily Air Temperature at 2 m) + air (Daily Air Temperature at 2 m) +names : air.2~11228, air.2~11229, air.2~11230, air.2~11231, air.2~20101, air.2~20102, ... +unit : K, K, K, K, K, K, ... +time : 2021-12-28 to 2022-01-03 UTC\n" +) +``` + +## Calculate covariates + +Calculate covariates for North Carolina county boundaries with `calc_covariates`. +County boundaries are accessed with the `tigris::counties` function.\insertRef{package_tigris} +`geom = TRUE` will return the covariates as a `SpatVector` object. + +```{r, eval = FALSE} +library(tigris) +temp_covar <- calc_covariates( + covariate = "narr", + from = temp_process, + locs = tigris::counties("NC", year = 2021), + locs_id = "NAME", + radius = 0, + geom = TRUE +) +``` + +Check the calculated covariates `SpatVector` object. + +```{r, eval = FALSE} +temp_covar +``` +```{r, echo = FALSE} +cat( +"class : SpatVector +geometry : polygons +dimensions : 700, 3 (geometries, attributes) +extent : 7731783, 8506154, 3248490, 3694532 (xmin, xmax, ymin, ymax) +coord. ref. : +proj=lcc +lat_0=50 +lon_0=-107 +lat_1=50 +lat_2=50 +x_0=5632642.22547 +y_0=4612545.65137 +datum=WGS84 +units=m +no_defs +names : NAME time air.2m_0 +type : +values : Chatham 2021-12-28 289.3 + Alamance 2021-12-28 288.8 + Davidson 2021-12-28 289.1\n") +```