diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index eb333254..42175ec3 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -12,7 +12,47 @@ on: name: pkgdown jobs: + test-covr: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::pak + any::covr + any::devtools + any::DT + any::htmltools + needs: coverage + + - name: Cache C++ and R dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cache/R + ~/.local/share/R + key: dependencies-${{ runner.os }}-${{ hashFiles('**/DESCRIPTION') }} + restore-keys: | + dependencies-${{ runner.os }}- + + - name: Run tests and generate covr report + run: | + Rscript -e 'covr::report(file = "coverage_report.html")' + + - name: Upload covr report as artifact + uses: actions/upload-artifact@v4 + with: + name: covr-report + path: coverage_report.html + pkgdown: + needs: test-covr runs-on: ubuntu-latest # Only restrict concurrency for non-PR jobs concurrency: @@ -24,6 +64,12 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Download covr report + uses: actions/download-artifact@v4 + with: + name: covr-report + path: . + - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r@v2 @@ -41,7 +87,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.6.1 with: clean: false branch: gh-pages diff --git a/.github/workflows/test-coverage-local.yaml b/.github/workflows/test-coverage-local.yaml index 226ffe89..4fcca8b4 100644 --- a/.github/workflows/test-coverage-local.yaml +++ b/.github/workflows/test-coverage-local.yaml @@ -24,11 +24,13 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr - needs: coverage + extra-packages: | + any::pak + any::covr + needs: coverage - name: Cache C++ and R dependencies - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: | ~/.cache/R @@ -38,10 +40,15 @@ jobs: dependencies-${{ runner.os }}- - name: Test coverage - run: > - Rscript -e - "covd<-covr::coverage_to_list()$totalcoverage; - write.table(covd[length(covd)], file = '${{ github.workspace }}/local_cov.Rout', row.names = F, col.names = F)" + run: | + Rscript ${{ github.workspace }}/.github/workflows/test-coverage.R ${{ runner.temp }} ${{ github.workspace }} + shell: bash + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Get Values @@ -61,6 +68,10 @@ jobs: id: patch-comparison shell: bash run: | + if ( ! test -f cov_current.Rout ); then + echo "0" >> cov_current.Rout + fi + cov_patch="${{ steps.get-values.outputs.coverage }}" cov_current=$(cat cov_current.Rout) echo "Current coverage: $cov_current" @@ -86,7 +97,7 @@ jobs: run: | npm i -g badgen-cli export COV=${{ steps.get-values.outputs.coverage }} - COLOR=$(node -p '+process.env.COV >= 95 ? `green` : `orange`') + COLOR=$(node -p '+process.env.COV >= 95 ? `green` : `yellow`') mkdir -p badges badgen -j coverage -s $COV% -c $COLOR > badges/coverage.svg diff --git a/.github/workflows/test-coverage.R b/.github/workflows/test-coverage.R new file mode 100644 index 00000000..e1403ba0 --- /dev/null +++ b/.github/workflows/test-coverage.R @@ -0,0 +1,15 @@ +args <- commandArgs(trailingOnly = TRUE) +runnertemp <- args[[1]] +ghworkspace <- args[[2]] + +dir.create(file.path(runnertemp, "package"), showWarnings = FALSE, recursive = TRUE) +sink(paste0(runnertemp, '/package/testthat.Rout.res')) +cov <- covr::package_coverage() +sink() +covd <- covr::coverage_to_list(cov)$totalcoverage +write.table( + covd[length(covd)], + file = file.path(ghworkspace, 'local_cov.Rout'), + row.names = FALSE, + col.names = FALSE +) diff --git a/DESCRIPTION b/DESCRIPTION index a417bb25..3442e503 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: amadeus Title: AMADEUS: A Machine for Data, Environments, and User Setup for common environmental and climate health datasets -Version: 0.1.7 +Version: 0.2.0 Authors@R: c( person("Kyle", "Messier", , "kyle.messier@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9508-9623")), person("Mitchell", "Manware", role = c("aut", "ctb"), comment = c(ORCID = "0009-0003-6440-6106")), @@ -12,7 +12,7 @@ Authors@R: c( ) Description: A Mechanism/Machine for Data, Environments, and User Setup package for health and climate research. It is fully tested, versioned, and open source and open access. Depends: R (>= 4.1.0) -Imports: dplyr, sf, sftime, stats, terra, methods, data.table, httr, rvest, exactextractr, utils, stringi, testthat (>= 3.0.0), doParallel, parallelly, stars, foreach, future, tidyr, rlang, rstac, nhdplusTools, archive +Imports: dplyr, sf, sftime, stats, terra, methods, data.table, httr, rvest, exactextractr, utils, stringi, testthat (>= 3.0.0), parallelly, stars, future, future.apply, tidyr, rlang, rstac, nhdplusTools, archive, collapse, devtools Suggests: covr, withr, knitr, rmarkdown, lwgeom, FNN, doRNG Encoding: UTF-8 VignetteBuilder: knitr, rmarkdown diff --git a/NAMESPACE b/NAMESPACE index 87eb3a09..2595c1ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(as_mysftime) +export(calc_check_time) export(calc_covariates) export(calc_ecoregion) export(calc_geos) @@ -31,36 +32,36 @@ export(check_mysf) export(check_mysftime) export(check_url_status) export(check_urls) -export(download_aqs_data) -export(download_cropscape_data) +export(download_aqs) +export(download_cropscape) export(download_data) -export(download_ecoregion_data) +export(download_ecoregion) export(download_epa_certificate) -export(download_geos_data) -export(download_gmted_data) -export(download_gridmet_data) -export(download_hms_data) -export(download_huc_data) -export(download_koppen_geiger_data) -export(download_merra2_data) -export(download_modis_data) -export(download_narr_monolevel_data) -export(download_narr_p_levels_data) -export(download_nei_data) -export(download_nlcd_data) -export(download_olm_data) +export(download_geos) +export(download_gmted) +export(download_gridmet) +export(download_hms) +export(download_huc) +export(download_koppen_geiger) +export(download_merra2) +export(download_modis) +export(download_narr_monolevel) +export(download_narr_p_levels) +export(download_nei) +export(download_nlcd) +export(download_olm) export(download_permit) -export(download_prism_data) +export(download_prism) export(download_remove_command) export(download_remove_zips) export(download_run) export(download_sanitize_path) -export(download_sedac_groads_data) -export(download_sedac_population_data) +export(download_sedac_groads) +export(download_sedac_population) export(download_setup_dir) export(download_sink) -export(download_terraclimate_data) -export(download_tri_data) +export(download_terraclimate) +export(download_tri) export(download_unzip) export(dt_as_mysftime) export(extract_urls) @@ -122,30 +123,34 @@ import(sf) import(sftime) import(stars) importFrom(archive,archive_extract) +importFrom(collapse,rowbind) importFrom(data.table,.SD) importFrom(data.table,as.data.table) importFrom(data.table,fread) importFrom(data.table,month) importFrom(data.table,rbindlist) importFrom(data.table,year) -importFrom(doParallel,registerDoParallel) importFrom(dplyr,across) importFrom(dplyr,all_of) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_rows) +importFrom(dplyr,distinct) importFrom(dplyr,ends_with) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,lag) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,select) importFrom(dplyr,summarize) importFrom(dplyr,ungroup) importFrom(exactextractr,exact_extract) -importFrom(foreach,"%dopar%") -importFrom(foreach,foreach) importFrom(future,cluster) +importFrom(future,multicore) importFrom(future,plan) +importFrom(future,sequential) +importFrom(future.apply,future_Map) +importFrom(future.apply,future_lapply) importFrom(httr,GET) importFrom(httr,HEAD) importFrom(methods,is) @@ -159,10 +164,16 @@ importFrom(rstac,read_collections) importFrom(rstac,read_items) importFrom(rstac,read_stac) importFrom(sf,st_as_sf) +importFrom(sf,st_as_sfc) +importFrom(sf,st_crs) importFrom(sf,st_drop_geometry) importFrom(sf,st_geometry) +importFrom(sf,st_intersects) +importFrom(sf,st_read) +importFrom(sf,st_transform) importFrom(sf,st_union) importFrom(stars,read_stars) +importFrom(stars,st_mosaic) importFrom(stars,st_warp) importFrom(stats,aggregate) importFrom(stats,setNames) @@ -190,8 +201,7 @@ importFrom(terra,nlyr) importFrom(terra,perim) importFrom(terra,project) importFrom(terra,rast) -importFrom(terra,same.crs) -importFrom(terra,snap) +importFrom(terra,set.crs) importFrom(terra,sprc) importFrom(terra,subset) importFrom(terra,tapp) diff --git a/R/calculate_covariates.R b/R/calculate_covariates.R index 93ad9bb9..3c1f03b2 100644 --- a/R/calculate_covariates.R +++ b/R/calculate_covariates.R @@ -19,7 +19,7 @@ #' @note `covariate` argument value is converted to lowercase. #' @seealso #' - [`calc_modis_par`]: `"modis"`, `"MODIS"` -#' - [`calc_koppen_geiger`]: `"koppen-geiger"`, `"koeppen-geiger"`, `"koppen"`, +#' - [`calc_koppen_geiger`]: `"koppen-geiger"`, `"koeppen-geiger"`, `"koppen"` #' - [`calc_ecoregion`]: `"ecoregion"`, `"ecoregions"` #' - [`calc_temporal_dummies`]: `"dummies"` #' - [`calc_hms`]: `"hms"`, `"noaa"`, `"smoke"` @@ -120,9 +120,19 @@ calc_covariates <- #' @param locs sf/SpatVector. Unique locs. Should include #' a unique identifier field named `locs_id` #' @param locs_id character(1). Name of unique identifier. +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders. #' @seealso [`process_koppen_geiger`] #' @returns a data.frame object +#' @note The returned `data.frame` object contains a +#' `$description` column to represent the temporal range covered by the +#' dataset. For more information, see +#' . #' @author Insang Song #' @importFrom terra vect #' @importFrom terra rast @@ -139,14 +149,18 @@ calc_koppen_geiger <- from = NULL, locs = NULL, locs_id = "site_id", + geom = FALSE, ...) { - ## You will get "locs" in memory after sourcing the file above - locs_tr <- locs - - if (!methods::is(locs, "SpatVector")) { - locs_tr <- terra::vect(locs) - } - locs_kg <- terra::project(locs_tr, terra::crs(from)) + # prepare locations + locs_prepared <- calc_prepare_locs( + from = from, + locs = locs, + locs_id = locs_id, + radius = 0, + geom = geom + ) + locs_kg <- locs_prepared[[1]] + locs_df <- locs_prepared[[2]] locs_kg_extract <- terra::extract(from, locs_kg) # The starting value is NA as the color table has 0 value in it @@ -164,7 +178,10 @@ calc_koppen_geiger <- class_kg = kg_class ) - locs_kg_extract[[locs_id]] <- unlist(locs_kg[[locs_id]]) + locs_kg_extract[[locs_id]] <- locs_df[, 1] + if (geom) { + locs_kg_extract$geometry <- locs_df[, 2] + } colnames(locs_kg_extract)[2] <- "value" locs_kg_extract_e <- merge(locs_kg_extract, kg_colclass, by = "value") @@ -202,10 +219,16 @@ calc_koppen_geiger <- kg_extracted <- cbind( - locs_id = unlist(locs_kg_extract_e[[locs_id]]), + locs_id = locs_df, + as.character(terra::metags(from)), df_ae_separated ) names(kg_extracted)[1] <- locs_id + if (geom) { + names(kg_extracted)[2:3] <- c("geometry", "description") + } else { + names(kg_extracted)[2] <- "description" + } return(kg_extracted) } @@ -218,13 +241,31 @@ calc_koppen_geiger <- #' @param from SpatRaster(1). Output of \code{process_nlcd()}. #' @param locs terra::SpatVector of points geometry #' @param locs_id character(1). Unique identifier of locations +#' @param mode character(1). One of `"exact"` +#' (using [`exactextractr::exact_extract()`]) +#' or `"terra"` (using [`terra::freq()`]). #' @param radius numeric (non-negative) giving the #' radius of buffer around points #' @param max_cells integer(1). Maximum number of cells to be read at once. -#' Higher values will expedite processing, but will increase memory usage. -#' Maximum possible value is `2^31 - 1`. +#' Higher values may expedite processing, but will increase memory usage. +#' Maximum possible value is `2^31 - 1`. Only valid when +#' `mode = "exact"`. #' See [`exactextractr::exact_extract`] for details. +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. +#' @param nthreads integer(1). Number of threads to be used #' @param ... Placeholders. +#' @note NLCD is available in U.S. only. Users should be aware of +#' the spatial extent of the data. The results are different depending +#' on `mode` argument. The `"terra"` mode is less memory intensive +#' but less accurate because it counts the number of cells +#' intersecting with the buffer. The `"exact"` may be more accurate +#' but uses more memory as it will account for the partial overlap +#' with the buffer. #' @seealso [`process_nlcd`] #' @returns a data.frame object #' @importFrom utils read.csv @@ -233,86 +274,141 @@ calc_koppen_geiger <- #' @importFrom terra project #' @importFrom terra vect #' @importFrom terra crs -#' @importFrom terra same.crs +#' @importFrom terra set.crs #' @importFrom terra buffer #' @importFrom sf st_union #' @importFrom sf st_geometry #' @importFrom terra intersect #' @importFrom terra metags #' @importFrom exactextractr exact_extract +#' @importFrom future plan multicore sequential +#' @importFrom future.apply future_Map +#' @importFrom collapse rowbind #' @export calc_nlcd <- function(from, locs, locs_id = "site_id", + mode = c("exact", "terra"), radius = 1000, - max_cells = 1e8, + max_cells = 5e7, + geom = FALSE, + nthreads = 1L, ...) { # check inputs + mode <- match.arg(mode) if (!is.numeric(radius)) { stop("radius is not a numeric.") } - if (radius <= 0) { + if (radius <= 0 && terra::geomtype(locs) == "points") { stop("radius has not a likely value.") } - if (!methods::is(locs, "SpatVector")) { - stop("locs is not a terra::SpatVector.") - } + if (!methods::is(from, "SpatRaster")) { stop("from is not a SpatRaster.") } + if (nthreads > 1L) { + stopifnot(Sys.info()["sysname"] != "Windows") + future::plan(future::multicore, workers = nthreads) + } else { + future::plan(future::sequential) + } + + # prepare locations + locs_prepared <- calc_prepare_locs( + from = from, + locs = locs, + locs_id = locs_id, + radius = radius, + geom = geom + ) + locs_vector <- locs_prepared[[1]] + locs_df <- locs_prepared[[2]] + year <- try(as.integer(terra::metags(from, name = "year"))) # select points within mainland US and reproject on nlcd crs if necessary - us_main <- - terra::ext(c(xmin = -127, xmax = -65, ymin = 24, ymax = 51)) |> - terra::vect() |> - terra::set.crs("EPSG:4326") |> - terra::project(y = terra::crs(locs)) - data_vect_b <- locs |> - terra::intersect(x = us_main) - if (!terra::same.crs(data_vect_b, from)) { - data_vect_b <- terra::project(data_vect_b, terra::crs(from)) - } + data_vect_b <- + terra::project(locs_vector, y = terra::crs(from)) # create circle buffers with buf_radius - bufs_pol <- terra::buffer(data_vect_b, width = radius) |> - sf::st_as_sf() - # ratio of each nlcd class per buffer - nlcd_at_bufs <- exactextractr::exact_extract(from, - sf::st_geometry(bufs_pol), - fun = "frac", - stack_apply = TRUE, - force_df = TRUE, - progress = FALSE, - max_cells_in_memory = max_cells) - # select only the columns of interest + bufs_pol <- terra::buffer(data_vect_b, width = radius) cfpath <- system.file("extdata", "nlcd_classes.csv", package = "amadeus") nlcd_classes <- utils::read.csv(cfpath) - nlcd_at_bufs <- - nlcd_at_bufs[ - sort(names(nlcd_at_bufs)[ - grepl(paste0("frac_(", paste(nlcd_classes$value, collapse = "|"), ")"), - names(nlcd_at_bufs)) - ]) - ] + + if (mode == "terra") { + # terra mode + class_query <- "names" + # extract land cover class in each buffer + nlcd_at_bufs <- future.apply::future_Map( + function(i) { + terra::freq( + from, + zones = bufs_pol[i, ], + wide = TRUE + ) + }, seq_len(nrow(bufs_pol)), + future.seed = TRUE + ) + nlcd_at_bufs <- collapse::rowbind(nlcd_at_bufs, fill = TRUE) + nlcd_at_bufs <- nlcd_at_bufs[, -seq(1, 2)] + nlcd_cellcnt <- nlcd_at_bufs[, seq(1, ncol(nlcd_at_bufs), 1)] + nlcd_cellcnt <- nlcd_cellcnt / rowSums(nlcd_cellcnt, na.rm = TRUE) + nlcd_at_bufs[, seq(1, ncol(nlcd_at_bufs), 1)] <- nlcd_cellcnt + } else { + class_query <- "value" + # ratio of each nlcd class per buffer + bufs_polx <- bufs_pol[terra::ext(from), ] |> + sf::st_as_sf() |> + sf::st_geometry() + nlcd_at_bufs <- future.apply::future_Map( + function(i) { + exactextractr::exact_extract( + from, + bufs_polx[i, ], + fun = "frac", + force_df = TRUE, + progress = FALSE, + max_cells_in_memory = max_cells + ) + }, seq_len(length(bufs_polx)), + future.seed = TRUE + ) + nlcd_at_bufs <- collapse::rowbind(nlcd_at_bufs, fill = TRUE) + # select only the columns of interest + nlcd_at_buf_names <- names(nlcd_at_bufs) + nlcd_val_cols <- + grep("^frac_", nlcd_at_buf_names) + nlcd_at_bufs <- nlcd_at_bufs[, nlcd_val_cols] + } + # fill NAs + nlcd_at_bufs[is.na(nlcd_at_bufs)] <- 0 + # change column names nlcd_names <- names(nlcd_at_bufs) nlcd_names <- sub(pattern = "frac_", replacement = "", x = nlcd_names) - nlcd_names <- as.numeric(nlcd_names) - nlcd_names <- nlcd_classes[nlcd_classes$value %in% nlcd_names, c("class")] - new_names <- sapply( - nlcd_names, - function(x) { - sprintf("LDU_%s_0_%05d", x, radius) - } - ) + nlcd_names <- + switch( + mode, + exact = as.numeric(nlcd_names), + terra = nlcd_names + ) + nlcd_names <- + nlcd_classes$class[match(nlcd_names, nlcd_classes[[class_query]])] + new_names <- sprintf("LDU_%s_0_%05d", nlcd_names, radius) names(nlcd_at_bufs) <- new_names - # merge data_vect with nlcd class fractions (and reproject) - new_data_vect <- cbind(data_vect_b, nlcd_at_bufs) - new_data_vect <- terra::project(new_data_vect, terra::crs(locs)) - new_data_vect$time <- as.integer(year) + + # merge locs_df with nlcd class fractions + new_data_vect <- cbind(locs_df, as.integer(year), nlcd_at_bufs) + if (geom) { + names(new_data_vect)[1:3] <- c(locs_id, "geometry", "time") + } else { + names(new_data_vect)[1:2] <- c(locs_id, "time") + } + calc_check_time(covar = new_data_vect, POSIXt = FALSE) + future::plan(future::sequential) return(new_data_vect) } + #' Calculate ecoregions covariates #' @description #' Extract ecoregions covariates (U.S. EPA Ecoregions Level 2/3) at point @@ -323,59 +419,60 @@ calc_nlcd <- function(from, #' @param locs sf/SpatVector. Unique locs. Should include #' a unique identifier field named `locs_id` #' @param locs_id character(1). Name of unique identifier. +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders. #' @seealso [`process_ecoregion`] #' @returns a data.frame object with dummy variables and attributes of: #' - `attr(., "ecoregion2_code")`: Ecoregion lv.2 code and key #' - `attr(., "ecoregion3_code")`: Ecoregion lv.3 code and key #' @author Insang Song -#' @importFrom methods is -#' @importFrom terra vect -#' @importFrom terra project -#' @importFrom terra intersect -#' @importFrom terra snap #' @importFrom terra extract -#' @importFrom terra crs +#' @importFrom data.table year #' @export calc_ecoregion <- function( from = NULL, locs, locs_id = "site_id", + geom = FALSE, ... ) { + # prepare locations + locs_prepared <- calc_prepare_locs( + from = from, + locs = locs, + locs_id = locs_id, + radius = 0, + geom = geom + ) + # both objects will preserve the row order + locsp <- locs_prepared[[1]] + locs_df <- locs_prepared[[2]] - if (!methods::is(locs, "SpatVector")) { - locs <- terra::vect(locs) - } - - locs <- terra::project(locs, terra::crs(from)) - locs_in <- terra::intersect(locs, from) - locs_out <- - locs[!unlist(locs[[locs_id]]) %in% unlist(locs_in[[locs_id]]), ] - - locs_snapped <- terra::snap(locs_out, from, tolerance = 50) - locs_fixed <- rbind(locs_in, locs_snapped) - extracted <- terra::extract(from, locs_fixed) + extracted <- terra::intersect(locsp, from) # Generate field names from extracted ecoregion keys # TODO: if we keep all-zero fields, the initial reference # should be the ecoregion polygon, not the extracted data - key2_sorted <- unlist(extracted[, 3]) + key2_sorted <- unlist(extracted[[grep("L2", names(extracted))]]) key2_num <- regmatches(key2_sorted, regexpr("\\d{1,2}\\.[1-9]", key2_sorted)) key2_num <- as.integer(10 * as.numeric(key2_num)) key2_num <- sprintf("DUM_E2%03d_0_00000", key2_num) key2_num_unique <- sort(unique(key2_num)) - key3_sorted <- unlist(extracted[, 2]) + key3_sorted <- unlist(extracted[[grep("L3", names(extracted))]]) key3_num <- regmatches(key3_sorted, regexpr("\\d{1,3}", key3_sorted)) key3_num <- as.integer(as.numeric(key3_num)) key3_num <- sprintf("DUM_E3%03d_0_00000", key3_num) key3_num_unique <- sort(unique(key3_num)) - df_lv2 <- split(key2_num_unique, key2_num_unique) |> lapply(function(x) { @@ -393,7 +490,16 @@ calc_ecoregion <- as.data.frame() colnames(df_lv3) <- key3_num_unique - locs_ecoreg <- cbind(locs[[locs_id]], df_lv2, df_lv3) + locs_ecoreg <- cbind( + locs_df, + paste0("1997 - ", data.table::year(Sys.Date())), + df_lv2, df_lv3 + ) + if (geom) { + names(locs_ecoreg)[3] <- "description" + } else { + names(locs_ecoreg)[2] <- "description" + } attr(locs_ecoreg, "ecoregion2_code") <- sort(unique(from$L2_KEY)) attr(locs_ecoreg, "ecoregion3_code") <- sort(unique(from$L3_KEY)) return(locs_ecoreg) @@ -425,6 +531,10 @@ calc_ecoregion <- #' Please note that this function does not provide a function to filter #' swaths or tiles, so it is strongly recommended to check and pre-filter #' the file names at users' discretion. +#' @seealso +#' * Preprocessing: [process_modis_merge()], [process_modis_swath()], +#' [process_bluemarble()] +#' * Parallelization: [calc_modis_par()] #' @author Insang Song #' @returns A data.frame object. #' @importFrom terra extract @@ -444,24 +554,18 @@ calc_modis_daily <- function( date = NULL, name_extracted = NULL, fun_summary = "mean", - max_cells = 1e8, + max_cells = 3e7, ... ) { - if (!any(methods::is(locs, "SpatVector"), - methods::is(locs, "sf"), - methods::is(locs, "sftime"))) { - stop("locs should be one of sf, sftime, or SpatVector.\n") - } if (!methods::is(locs, "SpatVector")) { - locs <- terra::vect(locs) + locs <- try(terra::vect(locs)) + if (inherits(locs, "try-error")) { + stop("locs should be a SpatVector or convertible object.") + } } if (!locs_id %in% names(locs)) { stop(sprintf("locs should include columns named %s.\n", - locs_id) - ) - } - if (!"time" %in% names(locs)) { - locs$time <- date + locs_id)) } extract_with_buffer <- function( @@ -469,10 +573,11 @@ calc_modis_daily <- function( surf, radius, id, - time = "time", - func = "mean" + func = "mean", + maxcells = NULL ) { # generate buffers + if (radius == 0) radius <- 1e-6 # approximately 1 meter in degree bufs <- terra::buffer(points, width = radius, quadsegs = 180L) bufs <- terra::project(bufs, terra::crs(surf)) # extract raster values @@ -482,38 +587,36 @@ calc_modis_daily <- function( y = sf::st_as_sf(bufs), fun = func, force_df = TRUE, - append_cols = c(id, time), + stack_apply = TRUE, + append_cols = id, progress = FALSE, - max_cells_in_memory = max_cells + max_cells_in_memory = maxcells ) return(surf_at_bufs) } - ## NaN to zero - from[is.nan(from)] <- 0L + ## NaN to NA + from[is.nan(from)] <- NA # raster used to be vrt_today - if (any(grepl("00000", name_extracted))) { - locs_tr <- terra::project(locs, terra::crs(from)) - extracted <- terra::extract(x = from, y = locs_tr, ID = FALSE) - locs_blank <- as.data.frame(locs) - extracted <- cbind(locs_blank, extracted) - } else { - extracted <- - extract_with_buffer( - points = locs, - surf = from, - id = locs_id, - radius = radius, - func = fun_summary - ) - } + extracted <- + extract_with_buffer( + points = locs, + surf = from, + id = locs_id, + radius = radius, + func = fun_summary, + maxcells = max_cells + ) # cleaning names # assuming that extracted is a data.frame name_offset <- terra::nlyr(from) # multiple columns will get proper names name_range <- seq(ncol(extracted) - name_offset + 1, ncol(extracted), 1) colnames(extracted)[name_range] <- name_extracted + extracted$time <- as.POSIXlt(date) + calc_check_time(covar = extracted, POSIXt = TRUE) + gc() return(extracted) } @@ -532,7 +635,8 @@ calc_modis_daily <- function( #' '{name_covariates}{zero-padded buffer radius in meters}', #' e.g., 'MOD_NDVIF_0_50000' where 50 km radius circular buffer #' was used to calculate mean NDVI value. -#' @param subdataset Index or search pattern of subdataset. +#' @param subdataset Indices, names, or search patterns for subdatasets. +#' Find detail usage of the argument in notes. #' @param fun_summary character or function. Function to summarize #' extracted raster values. #' @param nthreads integer(1). Number of threads to be used @@ -566,22 +670,37 @@ calc_modis_daily <- function( #' automatically detected and passed to the function. Please note that #' `locs` here and `path` in `preprocess` functions are assumed to have a #' standard naming convention of raw files from NASA. +#' The argument `subdataset` should be in a proper format +#' depending on `preprocess` function: +#' * `process_modis_merge()`: Regular expression pattern. +#' e.g., `"^LST_"` +#' * `process_modis_swath()`: Subdataset names. +#' e.g., `c("Cloud_Fraction_Day", "Cloud_Fraction_Night")` +#' * `process_bluemarble()`: Subdataset number. +#' e.g., for VNP46A2 product, 3L. +#' Dates with less than 80 percent of the expected number of tiles, +#' which are determined by the mode of the number of tiles, are removed. +#' Users will be informed of the dates with insufficient tiles. +#' The result data.frame will have an attribute with the dates with +#' insufficient tiles. +#' @returns A data.frame with an attribute: +#' * `attr(., "dates_dropped")`: Dates with insufficient tiles. +#' Note that the dates mean the dates with insufficient tiles, +#' not the dates without available tiles. #' @seealso See details for setting parallelization: -#' * [`foreach::foreach`] -#' * [`parallelly::makeClusterPSOCK`] -#' * [`parallelly::availableCores`] -#' * [`doParallel::registerDoParallel`] +#' * [`future::plan()`] +#' * [`future.apply::future_lapply()`] +#' * [`parallelly::makeClusterPSOCK()`] +#' * [`parallelly::availableCores()`] #' #' This function leverages the calculation of single-day MODIS #' covariates: -#' * [`calc_modis_daily`] +#' * [`calc_modis_daily()`] #' -#' Also, for preprocessing, see: -#' * [`process_modis_merge`] -#' * [`process_modis_swath`] -#' * [`process_bluemarble`] -#' @importFrom foreach foreach -#' @importFrom foreach %dopar% +#' Also, for preprocessing, please refer to: +#' * [`process_modis_merge()`] +#' * [`process_modis_swath()`] +#' * [`process_bluemarble()`] #' @importFrom methods is #' @importFrom sf st_as_sf #' @importFrom sf st_drop_geometry @@ -591,8 +710,8 @@ calc_modis_daily <- function( #' @importFrom rlang inject #' @importFrom future plan #' @importFrom future cluster +#' @importFrom future.apply future_lapply #' @importFrom parallelly availableWorkers -#' @importFrom doParallel registerDoParallel #' @export calc_modis_par <- function( @@ -607,7 +726,7 @@ calc_modis_par <- nthreads = floor(length(parallelly::availableWorkers()) / 2), package_list_add = NULL, export_list_add = NULL, - max_cells = 1e8, + max_cells = 3e7, ... ) { if (!is.function(preprocess)) { @@ -615,13 +734,39 @@ calc_modis_par <- process_modis_swath, or process_bluemarble.") } # read all arguments + # nolint start hdf_args <- c(as.list(environment()), list(...)) - - dates_available <- + # nolint end + dates_available_m <- regmatches(from, regexpr("A20\\d{2,2}[0-3]\\d{2,2}", from)) - dates_available <- unique(dates_available) + dates_available <- sort(unique(dates_available_m)) dates_available <- sub("A", "", dates_available) + # When multiple dates are concerned, + # the number of tiles are expected to be the same. + # Exceptions could exist, so here the number of tiles are checked. + summary_available <- table(dates_available_m) + summary_available_mode <- + sort(table(summary_available), decreasing = TRUE)[1] + summary_available_mode <- as.numeric(names(summary_available_mode)) + summary_available_insuf <- + which(summary_available < floor(summary_available_mode * 0.8)) + if (length(summary_available_insuf) > 0) { + dates_insuf <- + as.Date(dates_available[summary_available_insuf], "%Y%j") + message( + paste0( + "The number of tiles on the following dates are insufficient: ", + paste(dates_insuf, collapse = ", "), + ".\n" + ) + ) + # finally it removes the dates with insufficient tiles + dates_available <- dates_available[-summary_available_insuf] + } else { + dates_insuf <- NA + } + locs_input <- try(sf::st_as_sf(locs), silent = TRUE) if (inherits(locs_input, "try-error")) { stop("locs cannot be convertible to sf. @@ -630,8 +775,9 @@ process_modis_swath, or process_bluemarble.") export_list <- c() package_list <- - c("sf", "terra", "exactextractr", "foreach", "data.table", "stars", - "dplyr", "parallelly", "doParallel", "rlang") + c("sf", "terra", "exactextractr", "data.table", "stars", + "dplyr", "parallelly", "rlang", "amadeus", "future", + "future.apply") if (!is.null(export_list_add)) { export_list <- append(export_list, export_list_add) } @@ -640,85 +786,88 @@ process_modis_swath, or process_bluemarble.") } # make clusters - doParallel::registerDoParallel(cores = nthreads) - future::future(future::cluster, workers = nthreads) - - datei <- NULL + # doParallel::registerDoParallel(cores = nthreads) + if (nthreads == 1) { + future::plan(future::sequential) + } else { + future::plan(future::multicore, workers = nthreads) + } + idx_date_available <- seq_along(dates_available) + list_date_available <- + split(idx_date_available, idx_date_available) calc_results <- - foreach::foreach( - datei = seq_along(dates_available), - .packages = package_list, - .export = export_list, - .combine = dplyr::bind_rows, - .errorhandling = "pass", - .verbose = TRUE - ) %dopar% { - options(sf_use_s2 = FALSE) - # nolint start - day_to_pick <- dates_available[datei] - # nolint end - day_to_pick <- as.Date(day_to_pick, format = "%Y%j") - - radiusindex <- seq_along(radius) - radiuslist <- split(radiusindex, radiusindex) - - hdf_args <- append(hdf_args, values = list(date = day_to_pick)) - hdf_args <- append(hdf_args, values = list(path = hdf_args$from)) - # unified interface with rlang::inject - vrt_today <- - rlang::inject(preprocess(!!!hdf_args)) - - if (sum(terra::nlyr(vrt_today)) != length(name_covariates)) { - warning("The number of layers in the input raster do not match - the length of name_covariates.\n") - } - - res0 <- - lapply(radiuslist, - function(k) { - name_radius <- - sprintf("%s%05d", - name_covariates, - radius[k]) - - tryCatch({ - extracted <- - calc_modis_daily( - locs = locs_input, - from = vrt_today, - locs_id = locs_id, - date = as.character(day_to_pick), - fun_summary = fun_summary, - name_extracted = name_radius, - radius = radius[k], - max_cells = max_cells - ) - return(extracted) - }, error = function(e) { + future.apply::future_lapply( + list_date_available, + FUN = function(datei) { + options(sf_use_s2 = FALSE) + # nolint start + day_to_pick <- dates_available[datei] + # nolint end + day_to_pick <- as.Date(day_to_pick, format = "%Y%j") + + radiusindex <- seq_along(radius) + radiusindexlist <- split(radiusindex, radiusindex) + + hdf_args <- c(hdf_args, list(date = day_to_pick)) + hdf_args <- c(hdf_args, list(path = hdf_args$from)) + # unified interface with rlang::inject + vrt_today <- + rlang::inject(preprocess(!!!hdf_args)) + + if (sum(terra::nlyr(vrt_today)) != length(name_covariates)) { + message("The number of layers in the input raster do not match + the length of name_covariates.\n") + } + + res0 <- + lapply(radiusindexlist, + function(k) { name_radius <- sprintf("%s%05d", name_covariates, radius[k]) - error_df <- sf::st_drop_geometry(locs_input) - # coerce to avoid errors - error_df <- as.data.frame(error_df) - error_df <- error_df[, c(locs_id, "time")] - error_df[[name_radius]] <- -99999 - attr(error_df, "error_message") <- e - return(error_df) + extracted <- + try( + calc_modis_daily( + locs = locs_input, + from = vrt_today, + locs_id = locs_id, + date = as.character(day_to_pick), + fun_summary = fun_summary, + name_extracted = name_radius, + radius = radius[k], + max_cells = max_cells + ) + ) + if (inherits(extracted, "try-error")) { + # coerce to avoid errors + error_df <- data.frame( + matrix(-99999, + ncol = length(name_radius) + 1, + nrow = nrow(locs_input)) + ) + error_df <- stats::setNames(error_df, c(locs_id, name_radius)) + error_df[[locs_id]] <- unlist(locs_input[[locs_id]]) + error_df$time <- day_to_pick + extracted <- error_df + } + return(extracted) } - ) - } - ) - res <- - Reduce(function(x, y) { - dplyr::left_join(x, y, - by = c(locs_id, "time") ) - }, - res0) - return(res) - } + res <- + Reduce(function(x, y) { + dplyr::left_join(x, y, + by = c(locs_id, "time") + ) + }, + res0) + return(res) + + }, + future.seed = TRUE + ) + calc_results <- do.call(dplyr::bind_rows, calc_results) + attr(calc_results, "dates_dropped") <- dates_insuf Sys.sleep(1L) return(calc_results) } @@ -768,6 +917,7 @@ calc_temporal_dummies <- return(dt_dum) } + calc_check_time(covar = locs, POSIXt = TRUE) # year vec_year <- data.table::year(locs$time) dt_year_dum <- dummify(vec_year, year) @@ -785,9 +935,10 @@ calc_temporal_dummies <- colnames(dt_month_dum) <- sprintf("DUM_%s_0_00000", shortmn) - # weekday (starts from 1-Monday) + # weekday (starts from 0 - Sunday) vec_wday <- as.POSIXlt(locs$time)$wday - dt_wday_dum <- dummify(vec_wday, seq(1L, 7L)) + # subtracting 1 due to the difference in the base + dt_wday_dum <- dummify(vec_wday, seq(1L, 7L) - 1) colnames(dt_wday_dum) <- sprintf("DUM_WKDY%d_0_00000", seq(1L, 7L)) @@ -799,7 +950,6 @@ calc_temporal_dummies <- dt_month_dum, dt_wday_dum ) - return(locs_dums) } @@ -870,8 +1020,6 @@ calc_sedc <- sedc_bandwidth = NULL, target_fields = NULL ) { - # define sources, set SEDC exponential decay range - if (!methods::is(locs, "SpatVector")) { locs <- try(terra::vect(locs)) } @@ -908,7 +1056,6 @@ The result may not be accurate.\n", # near features with distance argument: only returns integer indices # threshold is set to the twice of sedc_bandwidth - # lines 895-900 may overlap with distance arg in 912-913 res_nearby <- terra::nearby(locs, from_in, distance = sedc_bandwidth * 2) # attaching actual distance @@ -930,7 +1077,7 @@ The result may not be accurate.\n", dplyr::left_join(dist_nearby_df) |> # per the definition in # https://mserre.sph.unc.edu/BMElab_web/SEDCtutorial/index.html - # exp(-3) is about 0.05 + # exp(-3) is about 0.05 * (value at origin) dplyr::mutate(w_sedc = exp((-3 * dist) / sedc_bandwidth)) |> dplyr::group_by(!!rlang::sym(locs_id)) |> dplyr::summarize( @@ -946,7 +1093,7 @@ The result may not be accurate.\n", attr(res_sedc, "sedc_bandwidth") <- sedc_bandwidth attr(res_sedc, "sedc_threshold") <- sedc_bandwidth * 2 - + calc_check_time(covar = res_sedc, POSIXt = TRUE) return(res_sedc) } @@ -1010,8 +1157,7 @@ calc_tri <- function( # inner lapply list_radius <- split(radius, radius) list_locs_tri <- - lapply( - list_radius, + Map( function(x) { locs_tri_s <- calc_sedc( @@ -1022,7 +1168,8 @@ calc_tri <- function( target_fields = tri_cols ) return(locs_tri_s) - } + }, + list_radius ) # bind element data.frames into one df_tri <- Reduce(function(x, y) dplyr::full_join(x, y), list_locs_tri) @@ -1030,7 +1177,8 @@ calc_tri <- function( df_tri <- dplyr::left_join(as.data.frame(locs), df_tri) } # read attr - df_tri$time <- attr(from, "tri_year") + df_tri$time <- as.integer(attr(from, "tri_year")) + calc_check_time(covar = df_tri, POSIXt = FALSE) return(df_tri) } @@ -1064,7 +1212,8 @@ calc_nei <- function( # spatial join locs_re <- terra::project(locs, terra::crs(from)) locs_re <- terra::intersect(locs_re, from) - + locs_re <- as.data.frame(locs_re) + calc_check_time(covar = locs_re, POSIXt = FALSE) return(locs_re) } @@ -1101,7 +1250,7 @@ calc_hms <- function( ...) { #### check for null parameters check_for_null_parameters(mget(ls())) - #### from == character indicates no wildfire smoke polumes are present + #### from == character indicates no wildfire smoke plumes are present #### return 0 for all locs and dates if ("character" %in% class(from)) { cat(paste0( @@ -1160,7 +1309,7 @@ calc_hms <- function( from$Date[nrow(from)], format = "%Y%m%d" ), - sub_hyphen = TRUE + sub_hyphen = FALSE ) #### empty location data.frame sites_extracted <- NULL @@ -1216,7 +1365,7 @@ calc_hms <- function( ) } #### check for missing dates (missing polygons) - if (!(identical(date_sequence, from$Date))) { + if (!(identical(date_sequence, sort(unique(from$Date))))) { cat(paste0( "Detected absent smoke plume polygons.\n" )) @@ -1248,24 +1397,29 @@ calc_hms <- function( } #### coerce binary to integer sites_extracted[, 3] <- as.integer(sites_extracted[, 3]) + #### date to POSIXct + sites_extracted$time <- as.POSIXct(sites_extracted$time) #### order by date - sites_extracted_ordered <- sites_extracted[order(sites_extracted$time), ] + sites_extracted_ordered <- as.data.frame( + sites_extracted[order(sites_extracted$time), ] + ) cat(paste0( "Returning ", layer_name, " covariates.\n" )) + calc_check_time(covar = sites_extracted_ordered, POSIXt = TRUE) #### return data.frame - return(data.frame(sites_extracted_ordered)) + return(sites_extracted_ordered) } #' Calculate elevation covariates #' @description #' Extract elevation values at point locations. Returns a \code{data.frame} -#' object containing \code{locs_id} and elevation variable. Elevation variable -#' column name reflects the elevation statistic, spatial resolution of -#' \code{from}, and circular buffer radius (ie. Breakline Emphasis at 7.5 -#' arc-second resolution with 0 meter buffer: breakline_emphasis_r75_0). +#' object containing \code{locs_id}, year of release, and elevation variable. +#' Elevation variable column name reflects the elevation statistic, spatial +#' resolution of \code{from}, and circular buffer radius (ie. Breakline Emphasis +#' at 7.5 arc-second resolution with 0 meter buffer: breakline_emphasis_r75_0). #' @param from SpatRaster(1). Output from \code{process_gmted()}. #' @param locs data.frame. character to file path, SpatVector, or sf object. #' @param locs_id character(1). Column within `locations` CSV file @@ -1274,6 +1428,12 @@ calc_hms <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders #' @author Mitchell Manware #' @seealso [`process_gmted()`] @@ -1291,13 +1451,15 @@ calc_gmted <- function( locs_id = NULL, radius = 0, fun = "mean", + geom = FALSE, ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1310,50 +1472,55 @@ calc_gmted <- function( radius = radius, fun = fun, variable = 2, - time = NULL, - time_type = "timeless" + time = 3, + time_type = "year" ) - #### convert integer to numeric - sites_extracted[, 2] <- as.numeric(sites_extracted[, 2]) - #### define column names - colnames(sites_extracted) <- c( - locs_id, - paste0( - gsub( - " ", - "_", - tolower( - process_gmted_codes( - substr( - strsplit( - names(from), - "_" - )[[1]][2], - 1, - 2 - ), - statistic = TRUE, - invert = TRUE - ) - ) - ), - "r", - substr( - strsplit( - names(from), - "_" - )[[1]][2], - 3, - 4 - ), - "_", - radius + #### variable column name + statistic_codes <- c("be", "ds", "md", "mi", "mn", "mx", "sd") + statistic_to <- c( + "BRK", "SUB", "MED", "MEA", "MIN", "MAX", "STD" + ) + name_from <- names(from) + code_unique <- + regmatches( + name_from, + regexpr( + paste0("(", + paste(statistic_codes, collapse = "|"), + ")[0-9]{2,2}"), + name_from + ) ) + statistic <- substr(code_unique, 1, 2) + resolution <- substr(code_unique, 3, 4) + statistic_to <- + sprintf( + "%s%s", + statistic_to[match(statistic, statistic_codes)], + resolution + ) + + variable_name <- paste0( + statistic_to, + "_", + sprintf("%05d", as.integer(radius)) ) + if (geom) { + #### convert integer to numeric + sites_extracted[, 4] <- as.numeric(sites_extracted[, 4]) + names(sites_extracted) <- c(locs_id, "geometry", "time", variable_name) + } else { + #### convert integer to numeric + sites_extracted[, 3] <- as.numeric(sites_extracted[, 3]) + names(sites_extracted) <- c(locs_id, "time", variable_name) + } + calc_check_time(covar = sites_extracted, POSIXt = FALSE) #### return data.frame return(data.frame(sites_extracted)) } + + #' Calculate meteorological covariates #' @description #' Extract meteorological values at point locations. Returns a \code{data.frame} @@ -1368,6 +1535,12 @@ calc_gmted <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders #' @author Mitchell Manware #' @seealso [`process_narr`] @@ -1385,13 +1558,15 @@ calc_narr <- function( locs_id = NULL, radius = 0, fun = "mean", + geom = FALSE, ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1414,8 +1589,10 @@ calc_narr <- function( variable = 1, time = narr_time, time_type = "date", - level = narr_level + level = narr_level, + ... ) + calc_check_time(covar = sites_extracted, POSIXt = TRUE) #### return data.frame return(data.frame(sites_extracted)) } @@ -1436,7 +1613,13 @@ calc_narr <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). -#' @param ... Placeholders +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. +#' @param ... Placeholders. #' @author Mitchell Manware #' @seealso [process_geos()] #' @return a data.frame object @@ -1454,13 +1637,15 @@ calc_geos <- function( locs_id = NULL, radius = 0, fun = "mean", + geom = FALSE, ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1475,8 +1660,10 @@ calc_geos <- function( variable = 1, time = c(3, 4), time_type = "hour", - level = 2 + level = 2, + ... ) + calc_check_time(covar = sites_extracted, POSIXt = TRUE) #### return data.frame return(data.frame(sites_extracted)) } @@ -1495,6 +1682,12 @@ calc_geos <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders #' @author Mitchell Manware #' @seealso [process_sedac_population()] @@ -1507,13 +1700,15 @@ calc_sedac_population <- function( locs_id = NULL, radius = 0, fun = "mean", + geom = FALSE, ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1548,8 +1743,10 @@ calc_sedac_population <- function( fun = fun, variable = 3, time = 4, - time_type = "year" + time_type = "year", + ... ) + calc_check_time(covar = sites_extracted, POSIXt = FALSE) #### return data.frame return(data.frame(sites_extracted)) } @@ -1571,8 +1768,18 @@ calc_sedac_population <- function( #' (Default = 1000). #' @param fun function(1). Function used to summarize the length of roads #' within sites location buffer (Default is `sum`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders. -#' @note Unit is km / sq km. +# nolint start +#' @note Unit is km / sq km. The returned `data.frame` object contains a +#' `$time` column to represent the temporal range covered by the +#' dataset. For more information, see . +# nolint end #' @author Insang Song #' @seealso [`process_sedac_groads`] #' @return a data.frame object with three columns. @@ -1593,7 +1800,8 @@ calc_sedac_groads <- function( locs = NULL, locs_id = NULL, radius = 1000, - fun = sum, + fun = "sum", + geom = FALSE, ...) { #### check for null parameters if (radius <= 0) { @@ -1604,7 +1812,8 @@ calc_sedac_groads <- function( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] @@ -1640,8 +1849,16 @@ calc_sedac_groads <- function( sprintf("GRD_TOTAL_0_%05d", radius), sprintf("GRD_DENKM_0_%05d", radius)) ) - - return(from_clip) + #### time period + from_clip$description <- "1980 - 2010" + if (geom) { + from_clip$geometry <- sites_list[[2]]$geometry + from_clip_reorder <- from_clip[, c(1, 5, 4, 2, 3)] + } else { + #### reorder + from_clip_reorder <- from_clip[, c(1, 4, 2, 3)] + } + return(from_clip_reorder) } #' Calculate meteorological and atmospheric covariates @@ -1658,6 +1875,12 @@ calc_sedac_groads <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. #' @param ... Placeholders #' @author Mitchell Manware #' @seealso [calc_geos()], [process_merra2()] @@ -1676,13 +1899,15 @@ calc_merra2 <- function( locs_id = NULL, radius = 0, fun = "mean", + geom = FALSE, ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1705,8 +1930,10 @@ calc_merra2 <- function( variable = 1, time = merra2_time, time_type = "hour", - level = merra2_level + level = merra2_level, + ... ) + calc_check_time(covar = sites_extracted, POSIXt = TRUE) #### return data.frame return(data.frame(sites_extracted)) } @@ -1724,6 +1951,13 @@ calc_merra2 <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. +#' @param ... Placeholders. #' @author Mitchell Manware #' @seealso [`process_gridmet()`] #' @return a data.frame object @@ -1739,13 +1973,16 @@ calc_gridmet <- function( locs, locs_id = NULL, radius = 0, - fun = "mean") { + fun = "mean", + geom = FALSE, + ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1759,8 +1996,10 @@ calc_gridmet <- function( fun = fun, variable = 1, time = 2, - time_type = "date" + time_type = "date", + ... ) + calc_check_time(covar = sites_extracted, POSIXt = TRUE) #### return data.frame return(data.frame(sites_extracted)) } @@ -1779,6 +2018,13 @@ calc_gridmet <- function( #' (Default = 0). #' @param fun character(1). Function used to summarize multiple raster cells #' within sites location buffer (Default = `mean`). +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. If `geom = TRUE` and `locs` contain +#' polygon geometries, the `$geometry` column in the returned data frame may +#' make the `data.frame` difficult to read due to long geometry strings. The +#' coordinate reference system of the `$geometry` is the coordinate +#' reference system of `from`. +#' @param ... Placeholders. #' @note #' TerraClimate data has monthly temporal resolution, so the `$time` column #' will contain the year and month in YYYYMM format (ie. January, 2018 = @@ -1794,17 +2040,20 @@ calc_gridmet <- function( #' @importFrom terra crs #' @export calc_terraclimate <- function( - from, - locs, + from = NULL, + locs = NULL, locs_id = NULL, radius = 0, - fun = "mean") { + fun = "mean", + geom = FALSE, + ...) { #### prepare locations list sites_list <- calc_prepare_locs( from = from, locs = locs, locs_id = locs_id, - radius = radius + radius = radius, + geom = geom ) sites_e <- sites_list[[1]] sites_id <- sites_list[[2]] @@ -1818,8 +2067,10 @@ calc_terraclimate <- function( fun = fun, variable = 1, time = 2, - time_type = "yearmonth" + time_type = "yearmonth", + ... ) + calc_check_time(covar = sites_extracted, POSIXt = FALSE) #### return data.frame return(data.frame(sites_extracted)) } @@ -1841,6 +2092,8 @@ calc_terraclimate <- function( #' least the number of lag days before the desired start date. For example, if #' `date = c("2024-01-01", "2024-01-31)` and `lag = 1`, `from` must contain data #' starting at 2023-12-31. +#' If `from` contains geometry features, `calc_lagged` will return a column +#' with geometry features of the same name. #' \code{calc_lagged()} assumes that all columns other than `time_id`, #' `locs_id`, and fixed columns of "lat" and "lon", follow the genre, variable, #' lag, buffer radius format adopted in \code{calc_setcolumns()}. @@ -1852,9 +2105,9 @@ calc_lagged <- function( date, lag, locs_id, - time_id) { + time_id = "time") { #### check input data types - stopifnot(class(from) %in% c("data.frame", "data.table")) + stopifnot(methods::is(from, "data.frame")) #### check if time_id is not null stopifnot(!is.null(time_id)) #### return from if lag == 0 @@ -1863,37 +2116,50 @@ calc_lagged <- function( return(from) } #### extract times - time <- from[[time_id]] + time <- as.character(from[[time_id]]) + dateseq <- seq(as.Date(date[1]) - lag, as.Date(date[2]), by = 1) + dateseq <- as.character(dateseq) + align <- setdiff(dateseq, unique(time)) ### check temporal alignment - if (!all(c(as.Date(date)[1] - lag, as.Date(date)[2]) %in% time)) { + if (length(align) > 0) { stop( - paste0( - "Dates requested in `date` do not align with data available in `from`." - ) + "Dates requested in `date` do not align with data available in `from`." ) } - #### etract variables - variables <- from[ - , !(names(from) %in% c(time_id, locs_id, "lon", "lat")), - drop = FALSE - ] - #### apply lag using dplyr::lag - variables_lag <- dplyr::lag(variables, lag, default = NA) - colnames(variables_lag) <- gsub( - paste0("_[0-9]{1}_"), - paste0("_", lag, "_"), - colnames(variables_lag) - ) - #### create the return dataframe - variables_return <- cbind(from[[locs_id]], time, variables_lag) - colnames(variables_return)[1:2] <- c(locs_id, time_id) - #### identify dates of interest - date_sequence <- generate_date_sequence( - date[1], - date[2], - sub_hyphen = FALSE - ) - #### filter to dates of interest - variables_return_date <- variables_return[time %in% date_sequence, ] - return(variables_return_date) + unique_locs <- unique(from[[locs_id]]) + variables_merge <- NULL + for (u in seq_along(unique_locs)) { + from_u <- subset( + from, + from[[locs_id]] == unique_locs[u] + ) + time_u <- from_u[[time_id]] + #### extract variables + variables <- from_u[ + , !(names(from_u) %in% c(locs_id, time_id)), + drop = FALSE + ] + #### apply lag using dplyr::lag + variables_lag <- dplyr::lag(variables, lag, default = NA) + colnames(variables_lag) <- gsub( + paste0("_[0-9]{1}_"), + paste0("_", lag, "_"), + colnames(variables_lag) + ) + #### create the return dataframe + variables_return <- cbind(from_u[[locs_id]], time_u, variables_lag) + colnames(variables_return)[1:2] <- c(locs_id, time_id) + #### identify dates of interest + date_sequence <- generate_date_sequence( + date[1], + date[2], + sub_hyphen = FALSE + ) + #### filter to dates of interest + variables_return_date <- variables_return[time_u %in% date_sequence, ] + #### merge with other locations + variables_merge <- rbind(variables_merge, variables_return_date) + } + calc_check_time(covar = variables_merge, POSIXt = TRUE) + return(variables_merge) } diff --git a/R/calculate_covariates_auxiliary.R b/R/calculate_covariates_auxiliary.R index dfe8f3ed..39b63d0f 100644 --- a/R/calculate_covariates_auxiliary.R +++ b/R/calculate_covariates_auxiliary.R @@ -40,6 +40,14 @@ calc_setcolumns <- function( ) stopifnot(length(time_index) <= 1) names_return[time_index] <- "time" + #### description (for time period coverage) + description_index <- which(names_from == "description") + stopifnot(length(description_index) <= 1) + names_return[description_index] <- "description" + #### geometry + geometry_index <- which(names_from == "geometry") + stopifnot(length(geometry_index) <= 1) + names_return[geometry_index] <- "geometry" #### latitude and longitude lat_index <- which( tolower(names_from) %in% c("lat", "latitude") @@ -65,7 +73,10 @@ calc_setcolumns <- function( genre <- substr(dataset, 1, 3) #### covariates cov_index <- which( - !(c(names_from %in% c(locs_id, "time", "lat", "lon", "level"))) + !(c(names_from %in% c( + locs_id, "geometry", "time", "lat", "lon", "level", "description" + )) + ) ) for (c in seq_along(cov_index)) { name_covariate <- names_return[cov_index[c]] @@ -104,11 +115,10 @@ calc_setcolumns <- function( side = "left" ) ) - names_return[cov_index[c]] <- name_new + names_return[cov_index[c]] <- toupper(name_new) } #### check for unique names stopifnot(length(names_return) == length(unique(names_return))) - colnames(from) <- names_return return(from) } @@ -196,6 +206,8 @@ calc_message <- function( #' Passed from \code{calc_\*()}. #' @param radius integer(1). Circular buffer distance around site locations. #' (Default = 0). Passed from \code{calc_\*()}. +#' @param geom logical(1). Should the geometry of `locs` be returned in the +#' `data.frame`? Default is `FALSE`. #' @return A `list` containing `SpatVector` and `data.frame` objects #' @seealso [`process_locs_vector()`], [`check_for_null_parameters()`] #' @keywords internal @@ -206,20 +218,34 @@ calc_prepare_locs <- function( from, locs, locs_id, - radius) { + radius, + geom = FALSE) { #### check for null parameters check_for_null_parameters(mget(ls())) + if (!locs_id %in% names(locs)) { + stop(sprintf("locs should include columns named %s.\n", + locs_id) + ) + } #### prepare sites sites_e <- process_locs_vector( locs, terra::crs(from), radius ) - #### site identifiers only - sites_id <- subset( - terra::as.data.frame(sites_e), - select = locs_id - ) + #### site identifiers and geometry + if (geom) { + sites_id <- subset( + terra::as.data.frame(sites_e, geom = "WKT"), + select = c(locs_id, "geometry") + ) + } else { + #### site identifiers only + sites_id <- subset( + terra::as.data.frame(sites_e), + select = locs_id + ) + } return(list(sites_e, sites_id)) } @@ -229,8 +255,9 @@ calc_prepare_locs <- function( #' value. #' @param time Time value #' @param format Type of time to return in the `$time` column. Can be -#' "timeless" (ie. GMTED data), "date" (ie. NARR data), "hour", (ie. GEOS data), -#' "year" (ie. SEDAC population data), or "yearmonth" (ie. TerraClimate data). +#' "timeless" (ie. Ecoregions data), "date" (ie. NARR data), "hour" +#' (ie. GEOS data), "year" (ie. SEDAC population data), or "yearmonth" +#' (ie. TerraClimate data). #' @return a `Date`, `POSIXt`, or `integer` object based on `format =` #' @keywords internal #' @export @@ -238,21 +265,24 @@ calc_time <- function( time, format) { if (format == "timeless") { - return() + return(time) } else if (format == "date") { - return_time <- as.Date( + return_time <- as.POSIXlt( time, - format = "%Y%m%d" + format = "%Y%m%d", + tz = "UTC" ) } else if (format == "hour") { - return_time <- ISOdatetime( - year = substr(time[1], 1, 4), - month = substr(time[1], 5, 6), - day = substr(time[1], 7, 8), - hour = substr(time[2], 1, 2), - min = substr(time[2], 3, 4), - sec = substr(time[2], 5, 6), - tz = "UTC" + return_time <- as.POSIXlt( + ISOdatetime( + year = substr(time[1], 1, 4), + month = substr(time[1], 5, 6), + day = substr(time[1], 7, 8), + hour = substr(time[2], 1, 2), + min = substr(time[2], 3, 4), + sec = substr(time[2], 5, 6), + tz = "UTC" + ) ) } else if (format %in% c("yearmonth", "year")) { return_time <- as.integer(time) @@ -260,7 +290,37 @@ calc_time <- function( return(return_time) } -#' Peform covariate extraction +#' Check time values +#' @description +#' Check the time values within calculated covariates `data.frame` +#' @param covar data.frame(1). Calculated covariates `data.frame`. +#' @param POSIXt logical(1). Should the time values in `covar` be of class +#' `POSIXt`? If `FALSE`, the time values will be checked for integer class +#' (year and year-month). +#' @return NULL +#' @keywords internal +#' @export +# nolint start +calc_check_time <- function( + covar, + POSIXt = TRUE +) { + stopifnot(methods::is(covar, "data.frame")) + if ("time" %in% names(covar)) { + if (POSIXt) { + stopifnot(all(sapply(covar$time, methods::is, "POSIXt"))) + } else { + stopifnot(all(sapply(covar$time, methods::is, "integer"))) + } + } else { + message( + "`$time` not detected in `data.frame` provided.\n" + ) + } +} +# nolint end + +#' Perform covariate extraction #' @description #' Extract covariate values from `SpatRaster` object passed from #' \code{process_*()}. @@ -281,8 +341,15 @@ calc_time <- function( #' pressure level value (if applicable). Default = `NULL`. #' @param radius integer(1). Buffer distance (m). Passed from #' \code{calc_prepare_locs()}. Used in column naming. +#' @param max_cells integer(1). Maximum number of cells to be read at once. +#' Higher values will expedite processing, but will increase memory usage. +#' Maximum possible value is `2^31 - 1`. +#' See [`exactextractr::exact_extract`] for details. +#' @param ... Placeholders. #' @importFrom terra nlyr #' @importFrom terra extract +#' @importFrom exactextractr exact_extract +#' @importFrom sf st_as_sf #' @return a `data.frame` object #' @keywords internal #' @export @@ -296,9 +363,12 @@ calc_worker <- function( time, time_type = c("date", "hour", "year", "yearmonth", "timeless"), radius, - level = NULL) { + level = NULL, + max_cells = 1e8, + ...) { #### empty location data.frame sites_extracted <- NULL + time_type <- match.arg(time_type) for (l in seq_len(terra::nlyr(from))) { #### select data layer data_layer <- from[[l]] @@ -317,10 +387,10 @@ calc_worker <- function( ) } #### extract level (if applicable) - if (!(is.null(level))) { + if (!is.null(level)) { data_level <- data_split[level] } else { - data_level <- NULL + data_level <- "" } #### message calc_message( @@ -331,15 +401,27 @@ calc_worker <- function( level = data_level ) #### extract layer data at sites - sites_extracted_layer <- terra::extract( - data_layer, - locs_vector, - fun = fun, - method = "simple", - ID = FALSE, - bind = FALSE, - na.rm = TRUE - ) + if (terra::geomtype(locs_vector) == "polygons") { + ### apply exactextractr::exact_extract for polygons + sites_extracted_layer <- exactextractr::exact_extract( + data_layer, + sf::st_as_sf(locs_vector), + progress = FALSE, + force_df = TRUE, + fun = fun, + max_cells_in_memory = max_cells + ) + } else if (terra::geomtype(locs_vector) == "points") { + #### apply terra::extract for points + sites_extracted_layer <- terra::extract( + data_layer, + locs_vector, + method = "simple", + ID = FALSE, + bind = FALSE, + na.rm = TRUE + ) + } # merge with site_id, time, and pressure levels (if applicable) if (time_type == "timeless") { sites_extracted_layer <- cbind( @@ -374,7 +456,11 @@ calc_worker <- function( sites_extracted_layer <- cbind( locs_df, data_time, - data_level, + gsub( + "level=|lev=", + "", + data_level + ), sites_extracted_layer ) colnames(sites_extracted_layer) <- c( diff --git a/R/download.R b/R/download.R index 141264cb..1edf5410 100644 --- a/R/download.R +++ b/R/download.R @@ -12,33 +12,36 @@ #' large and use lots of machine storage and memory. #' @param ... Arguments passed to each download function. #' @note -#' - All download function names are in \code{download_*_data} formats +#' - All download function names are in \code{download_*} formats #' @author Insang Song #' @seealso #' For details of each download function per dataset, #' Please refer to: -#' * \link{download_aqs_data}: "aqs", "AQS" -#' * \link{download_ecoregion_data}: "ecoregion" -#' * \link{download_geos_data}: "geos" -#' * \link{download_gmted_data}: "gmted", "GMTED" -#' * \link{download_koppen_geiger_data}: "koppen", "koppengeiger" -#' * \link{download_merra2_data}: "merra2", "merra", "MERRA", "MERRA2" -#' * \link{download_narr_monolevel_data}: "narr_monolevel", "monolevel" -#' * \link{download_narr_p_levels_data}: "narr_p_levels", "p_levels", "plevels" -#' * \link{download_nlcd_data}: "nlcd", "NLCD" -#' * \link{download_hms_data}: "noaa", "smoke", "hms" -#' * \link{download_sedac_groads_data}: "sedac_groads", "groads" -#' * \link{download_sedac_population_data}: "sedac_population", "population" -#' * \link{download_modis_data}: "modis", "MODIS" -#' * \link{download_tri_data}: "tri", "TRI" -#' * \link{download_nei_data}: "nei", "NEI" -#' * \link{download_gridmet_data}: "gridMET", "gridmet" -#' * \link{download_terraclimate_data}: "TerraClimate", "terraclimate" +#' * \code{\link{download_aqs}}: `"aqs"`, `"AQS"` +#' * \code{\link{download_ecoregion}}: `"ecoregions"`, `"ecoregion"` +#' * \code{\link{download_geos}}: `"geos"` +#' * \code{\link{download_gmted}}: `"gmted"`, `"GMTED"` +#' * \code{\link{download_koppen_geiger}}: `"koppen"`, `"koppengeiger"` +#' * \code{\link{download_merra2}}: "merra2", `"merra"`, `"MERRA"`, `"MERRA2"` +#' * \code{\link{download_narr_monolevel}}: `"narr_monolevel"`, `"monolevel"` +#' * \code{\link{download_narr_p_levels}}: `"narr_p_levels"`, `"p_levels"`, +#' `"plevels"` +#' * \code{\link{download_nlcd}}: `"nlcd"`, `"NLCD"` +#' * \code{\link{download_hms}}: `"noaa"`, `"smoke"`, `"hms"` +#' * \code{\link{download_sedac_groads}}: `"sedac_groads"`, `"groads"` +#' * \code{\link{download_sedac_population}}: `"sedac_population"`, +#' `"population"` +#' * \code{\link{download_modis}}: `"modis"`, `"MODIS"` +#' * \code{\link{download_tri}}: `"tri"`, `"TRI"` +#' * \code{\link{download_nei}}: `"nei"`, `"NEI"` +#' * \code{\link{download_gridmet}}: `"gridMET"`, `"gridmet"` +#' * \code{\link{download_terraclimate}}: `"TerraClimate"`, `"terraclimate"` #' @returns NULL #' @export download_data <- function( - dataset_name = c("aqs", "ecoregion", "geos", "gmted", "koppen", + dataset_name = c("aqs", "ecoregion", "ecoregions", + "geos", "gmted", "koppen", "koppengeiger", "merra2", "merra", "narr_monolevel", "modis", "narr_p_levels", "nlcd", "noaa", "sedac_groads", "sedac_population", "groads", "population", "plevels", @@ -55,38 +58,39 @@ download_data <- # determine whether the data exist and deter proceeding? what_to_run <- switch(dataset_name, - aqs = download_aqs_data, - ecoregion = download_ecoregion_data, - geos = download_geos_data, - gmted = download_gmted_data, - koppen = download_koppen_geiger_data, - koppengeiger = download_koppen_geiger_data, - merra2 = download_merra2_data, - merra = download_merra2_data, - narr_monolevel = download_narr_monolevel_data, - monolevel = download_narr_monolevel_data, - narr_p_levels = download_narr_p_levels_data, - p_levels = download_narr_p_levels_data, - plevels = download_narr_p_levels_data, - nlcd = download_nlcd_data, - noaa = download_hms_data, - smoke = download_hms_data, - hms = download_hms_data, - sedac_groads = download_sedac_groads_data, - groads = download_sedac_groads_data, - sedac_population = download_sedac_population_data, - population = download_sedac_population_data, - modis = download_modis_data, - tri = download_tri_data, - nei = download_nei_data, - gridmet = download_gridmet_data, - terraclimate = download_terraclimate_data, - huc = download_huc_data, - cropscape = download_cropscape_data, - cdl = download_cropscape_data, - prism = download_prism_data, - olm = download_olm_data, - openlandmap = download_olm_data + aqs = download_aqs, + ecoregion = download_ecoregion, + ecoregions = download_ecoregion, + geos = download_geos, + gmted = download_gmted, + koppen = download_koppen_geiger, + koppengeiger = download_koppen_geiger, + merra2 = download_merra2, + merra = download_merra2, + narr_monolevel = download_narr_monolevel, + monolevel = download_narr_monolevel, + narr_p_levels = download_narr_p_levels, + p_levels = download_narr_p_levels, + plevels = download_narr_p_levels, + nlcd = download_nlcd, + noaa = download_hms, + smoke = download_hms, + hms = download_hms, + sedac_groads = download_sedac_groads, + groads = download_sedac_groads, + sedac_population = download_sedac_population, + population = download_sedac_population, + modis = download_modis, + tri = download_tri, + nei = download_nei, + gridmet = download_gridmet, + terraclimate = download_terraclimate, + huc = download_huc, + cropscape = download_cropscape, + cdl = download_cropscape, + prism = download_prism, + olm = download_olm, + openlandmap = download_olm ) tryCatch( @@ -109,7 +113,7 @@ download_data <- # nolint start #' Download air quality data #' @description -#' The \code{download_aqs_data()} function accesses and downloads Air Quality System (AQS) data from the [U.S. Environmental Protection Agency's (EPA) Pre-Generated Data Files](https://aqs.epa.gov/aqsweb/airdata/download_files.html). +#' The \code{download_aqs()} function accesses and downloads Air Quality System (AQS) data from the [U.S. Environmental Protection Agency's (EPA) Pre-Generated Data Files](https://aqs.epa.gov/aqsweb/airdata/download_files.html). #' @param parameter_code integer(1). length of 5. #' EPA pollutant parameter code. For details, please refer to #' [AQS parameter codes](https://aqs.epa.gov/aqsweb/documents/codetables/parameters.html) @@ -123,10 +127,9 @@ download_data <- #' Start year for downloading data. #' @param year_end integer(1). length of 4. #' End year for downloading data. -#' @param directory_to_download character(1). -#' Directory to download zip files from AQS data mart. -#' @param directory_to_save character(1). -#' Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped data files ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -140,18 +143,16 @@ download_data <- #' @param remove_zip logical(1). Remove zip file from directory_to_download. #' Default \code{FALSE}. #' @author Mariana Kassien, Insang Song, Mitchell Manware -#' @returns NULL; Separate comma-separated value (CSV) files of -#' monitors and the daily representative values -#' will be stored in \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' \code{directory_to_save}. #' @export -download_aqs_data <- +download_aqs <- function( parameter_code = 88101, resolution_temporal = "daily", year_start = 2018, year_end = 2022, url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -160,17 +161,14 @@ download_aqs_data <- remove_zip = FALSE ) { #### 1. check for data download acknowledgement - download_permit( - acknowledgement = - acknowledgement - ) + download_permit(acknowledgement = acknowledgement) #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. define year sequence year_sequence <- seq(year_start, year_end, 1) #### 5. build URLs @@ -179,7 +177,7 @@ download_aqs_data <- resolution_temporal, "_", parameter_code, - "_%.0f.zip", + "_%d.zip", sep = "" ), year_sequence @@ -198,14 +196,14 @@ download_aqs_data <- resolution_temporal, "_", parameter_code, - "_%.0f.zip", + "_%d.zip", sep = "" ), year_sequence ) #### 6. build download command download_commands <- paste0( - "curl ", + "curl -s --url ", download_urls, " --output ", download_names, @@ -219,7 +217,7 @@ download_aqs_data <- ] #### 7. initiate "..._curl_commands.txt" commands_txt <- paste0( - directory_to_download, + directory_original, "aqs_", parameter_code, "_", @@ -230,7 +228,7 @@ download_aqs_data <- ) download_sink(commands_txt) #### 8. concatenate and print download commands to "..._curl_commands.txt" - writeLines(download_commands) + cat(download_commands) #### 9. finish "..._curl_commands.txt" file sink() #### 10. build system command @@ -245,17 +243,16 @@ download_aqs_data <- system_command = system_command ) #### 12. unzip data - for (n in seq_along(download_names)) { - download_unzip( - file_name = download_names[n], - directory_to_unzip = directory_to_save, - unzip = unzip - ) - download_remove_zips( - remove = remove_zip, - download_name = download_names[n] - ) - } + sapply( + download_names, + download_unzip, + directory_to_unzip = directory_to_save, + unzip = unzip + ) + download_remove_zips( + remove = remove_zip, + download_name = download_names + ) #### 13. remove command file download_remove_command( commands_txt = commands_txt, @@ -268,7 +265,7 @@ download_aqs_data <- # nolint start #' Download ecoregion data #' @description -#' The \code{download_ecoregion_data()} function accesses and downloads United States Ecoregions data from the [U.S. Environmental Protection Agency's (EPA) Ecorgions](https://www.epa.gov/eco-research/ecoregions). Level 3 data, where all pieces of information in the higher levels are included, are downloaded. +#' The \code{download_ecoregion()} function accesses and downloads United States Ecoregions data from the [U.S. Environmental Protection Agency's (EPA) Ecorgions](https://www.epa.gov/eco-research/ecoregions). Level 3 data, where all pieces of information in the higher levels are included, are downloaded. # nolint end #' @note #' For EPA Data Commons certificate errors, follow the steps below: @@ -284,9 +281,9 @@ download_aqs_data <- #' 'extdata/cacert_gaftp_epa.pem' under the package installation path. #' @param certificate_url character(1). URL to certificate file. See notes for #' details. -#' @param directory_to_download character(1). Directory to download zip file -#' of Ecoregion level 3 shapefiles -#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped data files ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -297,19 +294,19 @@ download_aqs_data <- #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @param unzip logical(1). Unzip zip files. Default \code{TRUE}. -#' @param remove_zip logical(1). Remove zip file from directory_to_download. -#' Default \code{FALSE}. +#' @param remove_zip logical(1). Remove zip file from +#' \code{directory_to_download}. Default \code{FALSE}. #' @author Insang Song -#' @returns NULL; +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' \code{directory_to_save}. #' @importFrom utils download.file #' @export -download_ecoregion_data <- function( +download_ecoregion <- function( epa_certificate_path = system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus"), certificate_url = "http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -322,21 +319,10 @@ download_ecoregion_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_save) - download_setup_dir(directory_to_download) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) - #### 4. Check the presence of file - ## This part is hard-coded as the original file appears to - ## be a misnomer. May need to be modified accordingly in the future. - path_downloaded_file <- sprintf( - "%sus_eco_l3_state_boundaries.shp", - directory_to_save - ) - if (file.exists(path_downloaded_file)) { - message("Requested files exist in the target directory.\n") - return(NULL) - } + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 5. define download URL download_epa_certificate( epa_certificate_path = epa_certificate_path, @@ -348,9 +334,9 @@ download_ecoregion_data <- function( "us_eco_l3_state_boundaries.zip" ) #### 6. build download file name - download_name <- sprintf( - "%sus_eco_l3_state_boundaries.zip", - directory_to_download + download_name <- file.path( + directory_to_download, + "us_eco_l3_state_boundaries.zip" ) #### 7. build download command download_command <- @@ -365,7 +351,7 @@ download_ecoregion_data <- function( ) #### 8. initiate "..._curl_commands.txt" file commands_txt <- paste0( - directory_to_download, + directory_original, "us_eco_l3_state_boundaries_", Sys.Date(), "_wget_command.txt" @@ -409,7 +395,7 @@ download_ecoregion_data <- function( # nolint start #' Download atmospheric composition data #' @description -#' The \code{download_geos_data()} function accesses and downloads various +#' The \code{download_geos()} function accesses and downloads various #' atmospheric composition collections from [NASA's Global Earth Observing System (GEOS) model](https://gmao.gsfc.nasa.gov/GEOS_systems/). # nolint end #' @param collection character(1). GEOS-CF data collection file name. @@ -418,6 +404,8 @@ download_ecoregion_data <- function( #' @param date_end character(1). length of 10. End date for downloading data. #' Format YYYY-MM-DD (ex. September 1, 2023 = `"2023-09-01"`). #' @param directory_to_save character(1). Directory to save data. +#' Sub-directories will be created within \code{directory_to_save} for each +#' GEOS-CF collection. #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -428,10 +416,11 @@ download_ecoregion_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mitchell Manware, Insang Song -#' @return NULL; Hourly netCDF (.nc4) files will be stored in -#' \code{directory_to_save}. +#' @return NULL; netCDF (.nc4) files will be stored in a +#' collection-specific folder within \code{directory_to_save}. #' @export -download_geos_data <- function( +# nolint start: cyclocomp +download_geos <- function( collection = c( "aqc_tavg_1hr_g1440x721_v1", "chm_tavg_1hr_g1440x721_v1", @@ -452,22 +441,19 @@ download_geos_data <- function( download_setup_dir(directory_to_save) directory_to_save <- download_sanitize_path(directory_to_save) #### 4. match collection - collection <- match.arg(collection) + collection <- match.arg(collection, several.ok = TRUE) #### 5. define date sequence date_sequence <- generate_date_sequence( date_start, date_end, sub_hyphen = TRUE ) - #### 6. define time sequence - time_sequence <- generate_time_sequence(collection) #### 7. define URL base base <- "https://portal.nccs.nasa.gov/datashare/gmao/geos-cf/v1/ana/" #### 8. initiate "..._wget_commands.txt" file commands_txt <- paste0( directory_to_save, - collection, - "_", + "geos_", date_start, "_", date_end, @@ -475,67 +461,71 @@ download_geos_data <- function( ) download_sink(commands_txt) #### 9. concatenate and print download commands to "..._wget_commands.txt" - for (d in seq_along(date_sequence)) { - date <- date_sequence[d] - year <- substr(date, 1, 4) - month <- substr(date, 5, 6) - day <- substr(date, 7, 8) - for (t in seq_along(time_sequence)) { - download_url_base <- paste0( - base, - "Y", - year, - "/M", - month, - "/D", - day, - "/" - ) - download_name <- paste0( - "GEOS-CF.v01.rpl.", - collection, - ".", - date, - "_", - time_sequence[t], - "z.nc4" - ) - download_url <- paste0( - download_url_base, - download_name - ) - if (t == 1) { - if (!(check_url_status(download_url))) { - sink() - file.remove(commands_txt) - stop(paste0( - "Invalid date returns HTTP code 404. ", - "Check `date_start` parameter.\n" - )) + for (c in seq_along(collection)) { + collection_loop <- collection[c] + download_folder <- paste0( + directory_to_save, + collection_loop, + "/" + ) + if (!dir.exists(download_folder)) { + dir.create(download_folder, recursive = TRUE) + } + for (d in seq_along(date_sequence)) { + date <- date_sequence[d] + year <- substr(date, 1, 4) + month <- substr(date, 5, 6) + day <- substr(date, 7, 8) + time_sequence <- generate_time_sequence(collection_loop) + for (t in seq_along(time_sequence)) { + download_url_base <- paste0( + base, + "Y", + year, + "/M", + month, + "/D", + day, + "/" + ) + download_name <- paste0( + "GEOS-CF.v01.rpl.", + collection_loop, + ".", + date, + "_", + time_sequence[t], + "z.nc4" + ) + download_url <- paste0( + download_url_base, + download_name + ) + if (t == 1) { + if (!(check_url_status(download_url))) { + sink() + file.remove(commands_txt) + stop(paste0( + "Invalid date returns HTTP code 404. ", + "Check `date_start` parameter.\n" + )) + } + } + download_folder_name <- paste0( + download_folder, + download_name + ) + download_command <- paste0( + "curl ", + download_url, + " -o ", + download_folder_name, + "\n" + ) + if (!file.exists(download_folder_name)) { + #### cat command only if file does not already exist + cat(download_command) } - } - download_folder <- paste0( - directory_to_save, - collection, - "/" - ) - download_folder_name <- paste0( - download_folder, - download_name - ) - if (!file.exists(download_folder)) { - dir.create(download_folder) - } - download_command <- paste0( - "curl ", - download_url, - " -o ", - download_folder_name, - "\n" - ) - if (!file.exists(download_folder_name)) { - #### cat command only if file does not already exist - cat(download_command) } } } @@ -557,20 +547,21 @@ download_geos_data <- function( remove = remove_command ) } +# nolint end: cyclocomp # nolint start #' Download elevation data #' @description -#' The \code{download_gmted_data()} function accesses and downloads Global +#' The \code{download_gmted()} function accesses and downloads Global #' Multi-resolution Terrain Elevation Data (GMTED2010) from #' [U.S. Geological Survey and National Geospatial-Intelligence Agency](https://www.usgs.gov/coastal-changes-and-impacts/gmted2010). #' @param statistic character(1). Available statistics include `"Breakline Emphasis"`, `"Systematic Subsample"`, `"Median Statistic"`, #' `"Minimum Statistic"`, `"Mean Statistic"`, `"Maximum Statistic"`, and #' `"Standard Deviation Statistic"`. #' @param resolution character(1). Available resolutions include `"7.5 arc-seconds"`, `"15 arc-seconds"`, and `"30 arc-seconds"`. -#' @param directory_to_download character(1). Directory to download zip files -#' from Global Multi-resolution Terrain Elevation Data (GMTED2010). -#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped data files ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -585,11 +576,10 @@ download_geos_data <- function( #' Default is \code{FALSE}. #' @author Mitchell Manware, Insang Song # nolint end -#' @return NULL; Statistic and resolution-specific zip files will be stored in -#' \code{directory_to_download}, and directories containing raw ASCII Grid data -#'will be stored in \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' \code{directory_to_save}. #' @export -download_gmted_data <- function( +download_gmted <- function( statistic = c( "Breakline Emphasis", "Systematic Subsample", "Median Statistic", "Minimum Statistic", @@ -597,7 +587,6 @@ download_gmted_data <- function( "Standard Deviation Statistic" ), resolution = c("7.5 arc-seconds", "15 arc-seconds", "30 arc-seconds"), - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -610,10 +599,10 @@ download_gmted_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. check for valid statistic statistic <- match.arg(statistic) #### 5. check for valid resolution @@ -660,7 +649,7 @@ download_gmted_data <- function( ) #### 12. initiate "..._curl_commands.txt" commands_txt <- paste0( - directory_to_download, + directory_original, "gmted_", gsub(" ", "", statistic), "_", @@ -709,7 +698,7 @@ download_gmted_data <- function( # nolint start #' Download meteorological and atmospheric data #' @description -#' The \code{download_merra2_data()} function accesses and downloads various +#' The \code{download_merra2()} function accesses and downloads various #' meteorological and atmospheric collections from [NASA's Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA-2) model](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/). #' @param collection character(1). MERRA-2 data collection file name. #' @param date_start character(1). length of 10. Start date for downloading @@ -727,11 +716,12 @@ download_gmted_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mitchell Manware, Insang Song -#' @return NULL; Daily netCDF (.nc4) files will be stored in -#' \code{directory_to_save}. +#' @return NULL; netCDF (.nc4) files will be stored in a +#' collection-specific folder within \code{directory_to_save}. #' @export # nolint end -download_merra2_data <- function( +# nolint start: cyclocomp +download_merra2 <- function( collection = c( "inst1_2d_asm_Nx", "inst1_2d_int_Nx", "inst1_2d_lfo_Nx", "inst3_3d_asm_Np", "inst3_3d_aer_Nv", "inst3_3d_asm_Nv", @@ -753,14 +743,14 @@ download_merra2_data <- function( acknowledgement = FALSE, download = FALSE, remove_command = FALSE) { - #### 1. check for data download acknowledgement + #### check for data download acknowledgement download_permit(acknowledgement = acknowledgement) - #### 2. directory setup + #### directory setup download_setup_dir(directory_to_save) directory_to_save <- download_sanitize_path(directory_to_save) - #### 3. check for null parameters + #### check for null parameters check_for_null_parameters(mget(ls())) - #### 4. check if collection is recognized + #### check if collection is recognized identifiers <- c( "inst1_2d_asm_Nx M2I1NXASM 10.5067/3Z173KIE2TPD", "inst1_2d_int_Nx M2I1NXINT 10.5067/G0U6NGQ3BLE0", @@ -807,217 +797,212 @@ download_merra2_data <- function( identifiers <- do.call(rbind, identifiers) identifiers_df <- as.data.frame(identifiers) colnames(identifiers_df) <- c("collection_id", "estd_name", "DOI") - if (!(collection %in% identifiers_df$collection_id)) { + if (!all(collection %in% identifiers_df$collection_id)) { print(identifiers_df) stop(paste0("Requested collection is not recognized.\n Please refer to the table above to find a proper collection.\n")) } - #### 5. define date sequence + #### define date sequence date_sequence <- generate_date_sequence( date_start, date_end, sub_hyphen = TRUE ) - #### 6. define year + month sequence + #### define year + month sequence yearmonth_sequence <- unique(substr(date_sequence, 1, 6)) - #### 7. define ESDT name and DOI - identifiers_df_requested <- subset(identifiers_df, - subset = - identifiers_df$collection_id == - collection - ) - esdt_name <- identifiers_df_requested[, 2] - cat(paste0( - "Collection: ", - collection, - " | ESDT Name: ", - esdt_name, - " | DOI: ", - identifiers_df_requested[, 3], - "\n" - )) - #### 8. define URL base - #### NOTE: sorted and defined manually according to - #### https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2/ \& - #### https://goldsmr5.gesdisc.eosdis.nasa.gov/data/MERRA2/ - esdt_name_4 <- c( - "M2I1NXASM", "M2I1NXINT", "M2I1NXLFO", "M2I3NXGAS", - "M2SDNXSLV", "M2T1NXADG", "M2T1NXAER", "M2T1NXCHM", - "M2T1NXCSP", "M2T1NXFLX", "M2T1NXINT", "M2T1NXLFO", - "M2T1NXLND", "M2T1NXOCN", "M2T1NXRAD", "M2T1NXSLV", - "M2T3NXGLC" - ) - esdt_name_5 <- c( - "M2I3NPASM", "M2I3NVAER", "M2I3NVASM", "M2I3NVCHM", - "M2I3NVGAS", "M2I6NPANA", "M2I6NVANA", "M2T3NEMST", - "M2T3NENAV", "M2T3NETRB", "M2T3NPCLD", "M2T3NPMST", - "M2T3NPODT", "M2T3NPQDT", "M2T3NPRAD", "M2T3NPTDT", - "M2T3NPTRB", "M2T3NPUDT", "M2T3NVASM", "M2T3NVCLD", - "M2T3NVMST", "M2T3NVRAD" - ) - if (esdt_name %in% esdt_name_4) { - base <- "https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2/" - } else if (esdt_name %in% esdt_name_5) { - base <- "https://goldsmr5.gesdisc.eosdis.nasa.gov/data/MERRA2/" - } - #### 9. identify download URLs - list_urls <- NULL - for (y in seq_along(yearmonth_sequence)) { - year <- substr(yearmonth_sequence[y], 1, 4) - month <- substr(yearmonth_sequence[y], 5, 6) - if (y == 1) { - base_url <- paste0( + #### initiate "..._wget_commands.txt" file + commands_txt <- paste0( + directory_to_save, + "merra2_", + date_start, + "_", + date_end, + "_wget_commands.txt" + ) + download_sink(commands_txt) + for (c in seq_along(collection)) { + collection_loop <- collection[c] + #### define ESDT name and DOI + identifiers_df_requested <- subset( + identifiers_df, + subset = identifiers_df$collection_id == collection_loop + ) + esdt_name <- identifiers_df_requested[, 2] + #### define URL base + #### NOTE: sorted and defined manually according to + #### https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2/ \& + #### https://goldsmr5.gesdisc.eosdis.nasa.gov/data/MERRA2/ + esdt_name_4 <- c( + "M2I1NXASM", "M2I1NXINT", "M2I1NXLFO", "M2I3NXGAS", + "M2SDNXSLV", "M2T1NXADG", "M2T1NXAER", "M2T1NXCHM", + "M2T1NXCSP", "M2T1NXFLX", "M2T1NXINT", "M2T1NXLFO", + "M2T1NXLND", "M2T1NXOCN", "M2T1NXRAD", "M2T1NXSLV", + "M2T3NXGLC" + ) + esdt_name_5 <- c( + "M2I3NPASM", "M2I3NVAER", "M2I3NVASM", "M2I3NVCHM", + "M2I3NVGAS", "M2I6NPANA", "M2I6NVANA", "M2T3NEMST", + "M2T3NENAV", "M2T3NETRB", "M2T3NPCLD", "M2T3NPMST", + "M2T3NPODT", "M2T3NPQDT", "M2T3NPRAD", "M2T3NPTDT", + "M2T3NPTRB", "M2T3NPUDT", "M2T3NVASM", "M2T3NVCLD", + "M2T3NVMST", "M2T3NVRAD" + ) + if (esdt_name %in% esdt_name_4) { + base <- "https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2/" + } else if (esdt_name %in% esdt_name_5) { + base <- "https://goldsmr5.gesdisc.eosdis.nasa.gov/data/MERRA2/" + } + #### identify download URLs + list_urls <- NULL + for (y in seq_along(yearmonth_sequence)) { + year <- substr(yearmonth_sequence[y], 1, 4) + month <- substr(yearmonth_sequence[y], 5, 6) + if (y == 1) { + base_url <- paste0( + base, + esdt_name, + ".5.12.4/", + year, + "/", + month, + "/" + ) + if (!(check_url_status(base_url))) { + stop(paste0( + "Invalid date returns HTTP code 404. ", + "Check `date_start` parameter.\n" + )) + } + } + list_urls_month <- system( + paste0( + "wget -q -nH -nd ", + "\"", + base, + esdt_name, + ".5.12.4/", + year, + "/", + month, + "/\"", + " -O - | grep .nc4 | awk -F'\"' ", + "'{print $4}'" + ), + intern = TRUE + ) + list_urls <- c(list_urls, list_urls_month) + } + #### match list_urls to date sequence + list_urls_date_sequence <- list_urls[substr(list_urls, 28, 35) %in% + date_sequence] + #### separate data and metadata + list_urls_data <- list_urls_date_sequence[grep( + "*.xml", + list_urls_date_sequence, + invert = TRUE + )] + list_urls_metadata <- list_urls_date_sequence[grep( + "*.xml", + list_urls_date_sequence, + invert = FALSE + )] + #### concatenate and print download commands to "..._wget_commands.txt" + for (l in seq_along(date_sequence)) { + year <- as.character(substr(date_sequence[l], 1, 4)) + month <- as.character(substr(date_sequence[l], 5, 6)) + download_url <- paste0( base, esdt_name, ".5.12.4/", year, "/", month, - "/" + "/", + list_urls_data[l] ) - if (!(check_url_status(base_url))) { - stop(paste0( - "Invalid date returns HTTP code 404. ", - "Check `date_start` parameter.\n" - )) + download_folder <- paste0( + directory_to_save, + collection_loop + ) + if (!dir.exists(download_folder)) { + dir.create(download_folder, recursive = TRUE) } - } - list_urls_month <- system( - paste0( - "wget -q -nH -nd ", - "\"", + download_name <- paste0( + download_folder, + "/", + list_urls_data[l] + ) + download_command <- paste0( + "wget ", + download_url, + " -O ", + download_name, + "\n" + ) + if (!file.exists(download_name)) { + #### cat command only if file does not already exist + cat(download_command) + } + download_url_metadata <- paste0( base, esdt_name, ".5.12.4/", year, "/", month, - "/\"", - " -O - | grep .nc4 | awk -F'\"' ", - "'{print $4}'" - ), - intern = TRUE - ) - list_urls <- c(list_urls, list_urls_month) - } - #### 10. match list_urls to date sequence - list_urls_date_sequence <- list_urls[substr(list_urls, 28, 35) %in% - date_sequence] - #### 11. separate data and metadata - list_urls_data <- list_urls_date_sequence[grep("*.xml", - list_urls_date_sequence, - invert = TRUE - )] - list_urls_metadata <- list_urls_date_sequence[grep("*.xml", - list_urls_date_sequence, - invert = FALSE - )] - #### 12. initiate "..._wget_commands.txt" file - commands_txt <- paste0( - directory_to_save, - collection, - "_", - date_start, - "_", - date_end, - "_wget_commands.txt" - ) - download_sink(commands_txt) - #### 13. concatenate and print download commands to "..._wget_commands.txt" - for (l in seq_along(date_sequence)) { - year <- as.character(substr(date_sequence[l], 1, 4)) - month <- as.character(substr(date_sequence[l], 5, 6)) - download_url <- paste0( - base, - esdt_name, - ".5.12.4/", - year, - "/", - month, - "/", - list_urls_data[l] - ) - download_folder <- paste0( - directory_to_save, - collection - ) - if (!file.exists(download_folder)) { - dir.create(download_folder) - } - download_name <- paste0( - download_folder, - "/", - list_urls_data[l] - ) - download_command <- paste0( - "wget ", - download_url, - " -O ", - download_name, - "\n" - ) - if (!file.exists(download_name)) { - #### cat command only if file does not already exist - cat(download_command) - } - download_url_metadata <- paste0( - base, - esdt_name, - ".5.12.4/", - year, - "/", - month, - "/", - list_urls_metadata[l] - ) - download_folder_metadata <- paste0( - directory_to_save, - collection, - "/metadata/" - ) - if (!file.exists(download_folder_metadata)) { - dir.create(download_folder_metadata) - } - download_name_metadata <- paste0( - download_folder_metadata, - list_urls_metadata[l] - ) - download_command_metadata <- paste0( - "wget ", - download_url_metadata, - " -O ", - download_name_metadata, - "\n" - ) - if (!file.exists(download_name)) { - #### cat command only if file does not already exist - cat(download_command_metadata) + "/", + list_urls_metadata[l] + ) + download_folder_metadata <- paste0( + directory_to_save, + collection_loop, + "/metadata/" + ) + if (!dir.exists(download_folder_metadata)) { + dir.create(download_folder_metadata, recursive = TRUE) + } + download_name_metadata <- paste0( + download_folder_metadata, + list_urls_metadata[l] + ) + download_command_metadata <- paste0( + "wget ", + download_url_metadata, + " -O ", + download_name_metadata, + "\n" + ) + if (!file.exists(download_name)) { + #### cat command only if file does not already exist + cat(download_command_metadata) + } } } - #### 14. finish "..._wget_commands.txt" + #### finish "..._wget_commands.txt" sink() - #### 15. build system command + #### build system command system_command <- paste0( ". ", commands_txt, "\n" ) - #### 16. download data + #### download data download_run( download = download, system_command = system_command ) - #### 17. Remove command file + #### Remove command file download_remove_command( commands_txt = commands_txt, remove = remove_command ) } +# nolint end: cyclocomp # nolint start #' Download meteorological data (monolevel) #' @description -#' The \code{download_narr_monolevel_data} function accesses and downloads monolevel meteorological data from [NOAA's North American Regional Reanalysis (NARR) model](https://psl.noaa.gov/data/gridded/data.narr.html). "Monolevel" variables contain a single value for the entire atmospheric column (ie. Variable: Convective cloud cover; Level: Entire atmosphere considered as a single layer), or represent a specific altitude associated with the variable (ie. Variable: Air temperature; Level: 2 m). +#' The \code{download_narr_monolevel} function accesses and downloads monolevel meteorological data from [NOAA's North American Regional Reanalysis (NARR) model](https://psl.noaa.gov/data/gridded/data.narr.html). "Monolevel" variables contain a single value for the entire atmospheric column (ie. Variable: Convective cloud cover; Level: Entire atmosphere considered as a single layer), or represent a specific altitude associated with the variable (ie. Variable: Air temperature; Level: 2 m). #' @param variables character. Variable(s) name acronym. See [List of Variables in NARR Files](https://ftp.cpc.ncep.noaa.gov/NARR/fixed/merged_land_AWIP32corrected.pdf) #' for variable names and acronym codes. #' @param year_start integer(1). length of 4. Start of year range for @@ -1036,11 +1021,11 @@ download_merra2_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mitchell Manware, Insang Song -#' @return NULL; Yearly netCDF (.nc) files will be stored in a variable-specific +#' @return NULL; netCDF (.nc) files will be stored in a variable-specific #' folder within \code{directory_to_save}. #' @export # nolint end -download_narr_monolevel_data <- function( +download_narr_monolevel <- function( variables = NULL, year_start = 2022, year_end = 2022, @@ -1076,8 +1061,8 @@ download_narr_monolevel_data <- function( for (v in seq_along(variables_list)) { variable <- variables_list[v] folder <- paste0(directory_to_save, variable, "/") - if (!(file.exists(folder))) { - dir.create(folder) + if (!dir.exists(folder)) { + dir.create(folder, recursive = TRUE) } for (y in seq_along(years)) { year <- years[y] @@ -1143,7 +1128,7 @@ download_narr_monolevel_data <- function( # nolint start #' Download meteorological data (pressure levels) #' @description -#' The \code{download_narr_p_levels_data} function accesses and downloads pressure levels meteorological data from [NOAA's North American Regional Reanalysis (NARR) model](https://psl.noaa.gov/data/gridded/data.narr.html). "Pressure levels" variables contain variable values at 29 atmospheric levels, ranging from 1000 hPa to 100 hPa. All pressure levels data will be downloaded for each variable. +#' The \code{download_narr_p_levels} function accesses and downloads pressure levels meteorological data from [NOAA's North American Regional Reanalysis (NARR) model](https://psl.noaa.gov/data/gridded/data.narr.html). "Pressure levels" variables contain variable values at 29 atmospheric levels, ranging from 1000 hPa to 100 hPa. All pressure levels data will be downloaded for each variable. #' @param variables character. Variable(s) name acronym. See [List of Variables in NARR Files](https://ftp.cpc.ncep.noaa.gov/NARR/fixed/merged_land_AWIP32corrected.pdf) #' for variable names and acronym codes. #' @param year_start integer(1). length of 4. Start of year range for @@ -1162,12 +1147,12 @@ download_narr_monolevel_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mitchell Manware, Insang Song -#' @return NULL; Monthly netCDF (.nc) files will be stored in +#' @return NULL; netCDF (.nc) files will be stored in #' \code{directory_to_save}. #' @export # nolint end # nolint start: cyclocomp -download_narr_p_levels_data <- function( +download_narr_p_levels <- function( variables = NULL, year_start = 2022, year_end = 2022, @@ -1205,8 +1190,8 @@ download_narr_p_levels_data <- function( for (v in seq_along(variables_list)) { variable <- variables_list[v] folder <- paste0(directory_to_save, variable, "/") - if (!(file.exists(folder))) { - dir.create(folder) + if (!dir.exists(folder)) { + dir.create(folder, recursive = TRUE) } for (y in seq_along(years)) { year <- years[y] @@ -1279,7 +1264,7 @@ download_narr_p_levels_data <- function( # nolint start #' Download land cover data #' @description -#' The \code{download_nlcd_data()} function accesses and downloads +#' The \code{download_nlcd()} function accesses and downloads #' land cover data from the #' [Multi-Resolution Land Characteristics (MRLC) Consortium's National Land Cover Database (NLCD) products data base](https://www.mrlc.gov/data). # nolint end @@ -1288,9 +1273,9 @@ download_narr_p_levels_data <- function( #' include `2001`, `2004`, `2006`, `2008`, `2011`, `2013`, `2016`, #' `2019`, and `2021`. #' Available years for Alaska include `2001`, `2011`, and `2016`. -#' @param directory_to_download character(1). Directory to download zip files -#' from National Land Cover Database Science Research Products. -#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped shapefiles ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -1304,13 +1289,12 @@ download_narr_p_levels_data <- function( #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default is \code{FALSE}. #' @author Mitchell Manware, Insang Song -#' @returns NULL; Zip file will be stored in \code{directory_to_download}, and -#' selected GeoTIFF (.tif) files will be stored in \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' respective sub-directories within \code{directory_to_save}. #' @export -download_nlcd_data <- function( +download_nlcd <- function( collection = "Coterminous United States", year = 2021, - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -1323,10 +1307,10 @@ download_nlcd_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. check for valid years valid_years <- c(2001, 2004, 2006, 2008, 2011, 2013, 2016, 2019, 2021) if (!(year %in% valid_years)) { @@ -1382,7 +1366,7 @@ download_nlcd_data <- function( ) #### 11. initiate "..._curl_command.txt" commands_txt <- paste0( - directory_to_download, + directory_original, tolower(collection_code), Sys.Date(), "_curl_command.txt" @@ -1427,15 +1411,15 @@ download_nlcd_data <- function( # nolint start #' Download roads data #' @description -#' The \code{download_sedac_groads_data()} function accesses and downloads +#' The \code{download_sedac_groads()} function accesses and downloads #' roads data from [NASA's Global Roads Open Access Data Set (gROADS), v1 (1980-2010)](https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/data-download). #' @param data_region character(1). Data can be downloaded for `"Global"`, #' `"Africa"`, `"Asia"`, `"Europe"`, `"Americas"`, `"Oceania East"`, and `"Oceania West"`. #' @param data_format character(1). Data can be downloaded as `"Shapefile"` or #' `"Geodatabase"`. (Only `"Geodatabase"` available for `"Global"` region). -#' @param directory_to_download character(1). Directory to download zip files -#' from NASA Global Roads Open Access Data Set. -#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped shapefiles ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -1449,14 +1433,12 @@ download_nlcd_data <- function( #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default is \code{FALSE}. #' @author Mitchell Manware, Insang Song -#' @returns NULL; Zip file will be stored in \code{directory_to_download}, and -#' selected Shapefile (.shp) or Geodatabase (.gdb) files will be stored in -#' \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' respective sub-directories within \code{directory_to_save}. #' @export -download_sedac_groads_data <- function( +download_sedac_groads <- function( data_region = c("Americas", "Global", "Africa", "Asia", "Europe", "Oceania East", "Oceania West"), data_format = c("Shapefile", "Geodatabase"), - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -1470,10 +1452,10 @@ download_sedac_groads_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. check if region is valid data_format <- match.arg(data_format) data_region <- match.arg(data_region) @@ -1522,7 +1504,7 @@ download_sedac_groads_data <- function( ) #### 11. initiate "..._curl_commands.txt" commands_txt <- paste0( - directory_to_download, + directory_original, "sedac_groads_", gsub(" ", "_", region), "_", @@ -1569,7 +1551,7 @@ download_sedac_groads_data <- function( # nolint start #' Download population density data #' @description -#' The \code{download_sedac_population_data()} function accesses and downloads +#' The \code{download_sedac_population()} function accesses and downloads #' population density data from [NASA's UN WPP-Adjusted Population Density, v4.11](https://sedac.ciesin.columbia.edu/data/set/gpw-v4-population-density-adjusted-to-2015-unwpp-country-totals-rev11). #' @param data_resolution character(1). Available resolutions are 30 second #' (approx. 1 km), 2.5 minute (approx. 5 km), 15 minute (approx. 30 km), @@ -1578,9 +1560,9 @@ download_sedac_groads_data <- function( #' `"ASCII"` or `"GeoTIFF"`. "all" years is downloaded as `"netCDF"`. #' @param year character(1). Available years are `2000`, `2005`, `2010`, `2015`, and #' `2020`, or `"all"` for all years. -#' @param directory_to_download character(1). Directory to download zip files -#' from NASA UN WPP-Adjusted Population Density, v4.11. -#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped shapefiles ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -1595,14 +1577,13 @@ download_sedac_groads_data <- function( #' Default is \code{FALSE}. #' @author Mitchell Manware, Insang Song # nolint end -#' @returns NULL; Zip file will be stored in \code{directory_to_download}, and -#' selected GeoTIFF (.tif) files will be stored in \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' respective sub-directories within \code{directory_to_save}. #' @export -download_sedac_population_data <- function( +download_sedac_population <- function( data_resolution = "60 minute", data_format = c("GeoTIFF", "ASCII", "netCDF"), year = "2020", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -1615,10 +1596,10 @@ download_sedac_population_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. define URL base base <- paste0("https://sedac.ciesin.columbia.edu/downloads/data/gpw-v4/") #### 5. define year @@ -1698,7 +1679,7 @@ download_sedac_population_data <- function( ) #### 12. initiate "..._curl_command.txt" commands_txt <- paste0( - directory_to_download, + directory_original, "sedac_population_", year, "_", @@ -1747,7 +1728,7 @@ download_sedac_population_data <- function( # nolint start #' Download wildfire smoke data #' @description -#' The \code{download_hms_data()} function accesses and downloads +#' The \code{download_hms()} function accesses and downloads #' wildfire smoke plume coverage data from [NOAA's Hazard Mapping System Fire and Smoke Product](https://www.ospo.noaa.gov/Products/land/hms.html#0). # nolint end #' @param data_format character(1). "Shapefile" or "KML". @@ -1755,11 +1736,11 @@ download_sedac_population_data <- function( #' data. Format YYYY-MM-DD (ex. September 1, 2023 is `"2023-09-01"`). #' @param date_end character(1). length of 10. End date for downloading data. #' Format YYYY-MM-DD (ex. September 10, 2023 is `"2023-09-10"`). -#' @param directory_to_download character(1). Directory to download zip files -#' from NOAA Hazard Mapping System Fire and Smoke Product. (Ignored if -#' \code{data_format = "KML"}.) -#' @param directory_to_save character(1). Directory to save unzipped shapefiles -#' and KML files. +#' @param directory_to_save character(1). Directory to save data. If +#' `data_format = "Shapefile"`, two sub-directories will be created for the +#' downloaded zip files ("/zip_files") and the unzipped shapefiles +#' ("/data_files"). If `data_format = "KML"`, a single sub-directory +#' ("/data_files") will be created. #' @param acknowledgement logical(1). #' By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very @@ -1778,16 +1759,14 @@ download_sedac_population_data <- function( #' @importFrom utils head #' @importFrom utils tail #' @author Mitchell Manware, Insang Song -#' @returns NULL; Zip file will be stored in \code{directory_to_download}, and -#' Shapefiles (.shp) or KML files (.kml) will be stored in -#' \code{directory_to_save}. +##' @returns NULL; Zip and/or data files will be downloaded and stored in +#' respective sub-directories within \code{directory_to_save}. #' @export # nolint start: cyclocomp -download_hms_data <- function( +download_hms <- function( data_format = "Shapefile", date_start = "2023-09-01", date_end = "2023-09-01", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -1799,10 +1778,10 @@ download_hms_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. check for unzip == FALSE && remove_zip == TRUE if (unzip == FALSE && remove_zip == TRUE) { stop(paste0( @@ -1820,7 +1799,7 @@ download_hms_data <- function( base <- "https://satepsanone.nesdis.noaa.gov/pub/FIRE/web/HMS/Smoke_Polygons/" #### 7. initiate "..._curl_commands.txt" commands_txt <- paste0( - directory_to_download, + directory_original, "hms_smoke_", utils::head(date_sequence, n = 1), "_", @@ -1833,10 +1812,12 @@ download_hms_data <- function( for (f in seq_along(date_sequence)) { year <- substr(date_sequence[f], 1, 4) month <- substr(date_sequence[f], 5, 6) - if (data_format == "Shapefile") { + if (tolower(data_format) == "shapefile") { + data_format <- "Shapefile" suffix <- ".zip" directory_to_cat <- directory_to_download - } else if (data_format == "KML") { + } else if (tolower(data_format) == "kml") { + data_format <- "KML" suffix <- ".kml" directory_to_cat <- directory_to_save } @@ -1902,7 +1883,9 @@ download_hms_data <- function( ) #### 13. end if data_format == "KML" if (data_format == "KML") { - return(cat(paste0("KML files cannot be unzipped.\n"))) + unlink(directory_to_download, recursive = TRUE) + cat(paste0("KML files cannot be unzipped.\n")) + return(TRUE) } #### 14. unzip downloaded zip files for (d in seq_along(download_names)) { @@ -1923,7 +1906,7 @@ download_hms_data <- function( # nolint start #' Download climate classification data #' @description -#' The \code{download_koppen_geiger_data()} function accesses and downloads +#' The \code{download_koppen_geiger()} function accesses and downloads #' climate classification data from the \emph{Present and future #' Köppen-Geiger climate classification maps at #' 1-km resolution}([link for article](https://www.nature.com/articles/sdata2018214); [link for data](https://figshare.com/articles/dataset/Present_and_future_K_ppen-Geiger_climate_classification_maps_at_1-km_resolution/6396959/2)). @@ -1933,10 +1916,9 @@ download_hms_data <- function( #' @param time_period character(1). Available times are `"Present"` (1980-2016) #' and `"Future"` (2071-2100). ("Future" classifications are based on scenario #' RCP8.5). -#' @param directory_to_download character(1). Directory to download zip files -#' from Present and future Köppen-Geiger climate classification maps at 1-km -#' resolution. -#' @param directory_to_save character(1). Directory to decompress zip files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped shapefiles ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -1950,13 +1932,12 @@ download_hms_data <- function( #' @param remove_zip logical(1). Remove zip files from directory_to_download. #' Default is \code{FALSE}. #' @author Mitchell Manware, Insang Song -#' @returns NULL; Zip file will be stored in \code{directory_to_download}, and -#' selected GeoTIFF (.tif) files will be stored in \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' respective sub-directories within \code{directory_to_save}. #' @export -download_koppen_geiger_data <- function( +download_koppen_geiger <- function( data_resolution = c("0.0083", "0.083", "0.5"), time_period = c("Present", "Future"), - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -1968,10 +1949,10 @@ download_koppen_geiger_data <- function( #### 2. check for null parameters check_for_null_parameters(mget(ls())) #### 3. directory setup - download_setup_dir(directory_to_download) - download_setup_dir(directory_to_save) - directory_to_download <- download_sanitize_path(directory_to_download) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 4. check for data resolution data_resolution <- match.arg(data_resolution) #### 5. check for valid time period @@ -1981,7 +1962,7 @@ download_koppen_geiger_data <- function( #### 7. define data resolution data_resolution <- gsub("\\.", "p", data_resolution) #### 8 define download URL - download_url <- "https://figshare.com/ndownloader/files/12407516" + download_url <- "https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/12407516/Beck_KG_V1.zip" #### 9 build download file name download_name <- paste0( directory_to_download, @@ -2001,7 +1982,7 @@ download_koppen_geiger_data <- function( ) #### 11. initiate "..._wget_commands.txt" commands_txt <- paste0( - directory_to_download, + directory_original, "koppen_geiger_", time_period, "_", @@ -2028,44 +2009,6 @@ download_koppen_geiger_data <- function( download = download, system_command = system_command ) - - if (unzip) { - #### 16. remove unwanted files - wanted_names <- list.files( - path = directory_to_save, - pattern = - sprintf("(Beck_KG_*.*_%s_*.*%s*.*tif$|legend.txt)", - time_period, data_resolution), - full.names = TRUE - ) - all_names <- list.files( - path = directory_to_save, - full.names = TRUE - ) - unwanted_names <- all_names[!all_names %in% wanted_names] - # unwanted_names <- as.vector(c( - # unwanted_names, - # paste0( - # directory_to_save, - # "KoppenGeiger.m" - # ) - # )) - # tif <- paste0( - # directory_to_save, - # "/Beck_KG_V1_", - # period, - # "_", - # data_resolution, - # ".tif" - # ) - # unwanted_names <- unwanted_names[grep( - # pattern = tif, - # unwanted_names, - # invert = TRUE - # )] - file.remove(unwanted_names) - } - #### 17. Remove command file download_remove_command( commands_txt = commands_txt, @@ -2077,6 +2020,7 @@ download_koppen_geiger_data <- function( directory_to_unzip = directory_to_save, unzip = unzip ) + #### 19. remove zip files download_remove_zips( remove = remove_zip, @@ -2124,10 +2068,10 @@ download_koppen_geiger_data <- function( #' the text file containing download commands. #' @author Mitchell Manware, Insang Song #' @import rvest -#' @return NULL; Raw HDF (.hdf) files will be stored in +#' @return NULL; HDF (.hdf) files will be stored in #' \code{directory_to_save}. #' @export -download_modis_data <- function( +download_modis <- function( product = c( "MOD09GA", "MOD11A1", "MOD06_L2", "MCD19A2", "MOD13A2", "VNP46A2" @@ -2441,7 +2385,7 @@ download_modis_data <- function( # nolint start #' Download toxic release data #' @description -#' The \code{download_tri_data()} function accesses and downloads toxic release data from the [U.S. Environmental Protection Agency's (EPA) Toxic Release Inventory (TRI) Program](https://www.epa.gov/toxics-release-inventory-tri-program/find-understand-and-use-tri). +#' The \code{download_tri()} function accesses and downloads toxic release data from the [U.S. Environmental Protection Agency's (EPA) Toxic Release Inventory (TRI) Program](https://www.epa.gov/toxics-release-inventory-tri-program/find-understand-and-use-tri). # nolint end #' @param year_start integer(1). length of 4. Start year for downloading data. #' @param year_end integer(1). length of 4. End year for downloading data. @@ -2455,10 +2399,10 @@ download_modis_data <- function( #' @param remove_command logical(1). Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mariana Kassien, Insang Song -#' @returns NULL; Yearly comma-separated value (CSV) files will be stored in +#' @returns NULL; Comma-separated value (CSV) files will be stored in #' \code{directory_to_save}. #' @export -download_tri_data <- function( +download_tri <- function( year_start = 2018L, year_end = 2022L, directory_to_save = NULL, @@ -2471,7 +2415,6 @@ download_tri_data <- function( #### 2. directory setup download_setup_dir(directory_to_save) directory_to_save <- download_sanitize_path(directory_to_save) - #### 3. define measurement data paths url_download <- "https://data.epa.gov/efservice/downloads/tri/mv_tri_basic_download/" @@ -2531,7 +2474,7 @@ download_tri_data <- function( # nolint start #' Download road emissions data #' @description -#' The \code{download_nei_data()} function accesses and downloads road emissions data from the [U.S Environmental Protection Agency's (EPA) National Emissions Inventory (NEI)](https://www.epa.gov/air-emissions-inventories/national-emissions-inventory-nei). +#' The \code{download_nei()} function accesses and downloads road emissions data from the [U.S Environmental Protection Agency's (EPA) National Emissions Inventory (NEI)](https://www.epa.gov/air-emissions-inventories/national-emissions-inventory-nei). # nolint end #' @param epa_certificate_path character(1). Path to the certificate file #' for EPA DataCommons. Default is @@ -2540,7 +2483,9 @@ download_tri_data <- function( #' details. #' @param year_target Available years of NEI data. #' Default is \code{c(2017L, 2020L)}. -#' @param directory_to_save character(1). Directory to download files. +#' @param directory_to_save character(1). Directory to save data. Two +#' sub-directories will be created for the downloaded zip files ("/zip_files") +#' and the unzipped data files ("/data_files"). #' @param acknowledgement logical(1). By setting \code{TRUE} the #' user acknowledges that the data downloaded using this function may be very #' large and use lots of machine storage and memory. @@ -2562,10 +2507,10 @@ download_tri_data <- function( #' Currently we bundle the pre-downloaded crt and its PEM (which is accepted #' in wget command) file in ./inst/extdata. The instruction above is for #' certificate updates in the future. -#' @returns NULL; Yearly comma-separated value (CSV) files will be stored in -#' \code{directory_to_save}. +#' @returns NULL; Zip and/or data files will be downloaded and stored in +#' respective sub-directories within \code{directory_to_save}. #' @export -download_nei_data <- function( +download_nei <- function( epa_certificate_path = system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus"), @@ -2581,8 +2526,10 @@ download_nei_data <- function( #### 1. check for data download acknowledgement download_permit(acknowledgement = acknowledgement) #### 2. directory setup - download_setup_dir(directory_to_save) - directory_to_save <- download_sanitize_path(directory_to_save) + directory_original <- download_sanitize_path(directory_to_save) + directories <- download_setup_dir(directory_original, zip = TRUE) + directory_to_download <- directories[1] + directory_to_save <- directories[2] #### 5. define download URL download_epa_certificate( @@ -2603,7 +2550,7 @@ download_nei_data <- function( download_names_file <- c("2017neiApr_onroad_byregions.zip", "2020nei_onroad_byregion.zip") - download_names <- paste0(directory_to_save, download_names_file) + download_names <- paste0(directory_to_download, download_names_file) #### filter commands to non-existing files download_urls <- download_urls[ which( @@ -2622,7 +2569,7 @@ download_nei_data <- function( #### 5. initiate "..._curl_commands.txt" commands_txt <- paste0( - directory_to_save, + directory_original, "NEI_AADT_", paste(year_target, collapse = "-"), "_", @@ -2649,7 +2596,10 @@ download_nei_data <- function( # as duplicate file names are across multiple zip files if (download) { if (unzip) { - dir_unzip <- sub(".zip", "", download_names) + dir_unzip <- paste0( + directory_to_save, + sub(".zip", "", download_names_file) + ) for (fn in seq_along(dir_unzip)) { utils::unzip(zipfile = download_names[fn], exdir = dir_unzip[fn]) } @@ -2762,7 +2712,7 @@ download_nei_data <- function( #' @seealso [list_stac_files] #' @export # nolint end -download_olm_data <- function( +download_olm <- function( product = NULL, format = "tif", directory_to_save = NULL, @@ -2854,18 +2804,18 @@ download_olm_data <- function( #' the text file containing download commands. #' @param unzip logical(1). Unzip the downloaded compressed files. #' Default is \code{FALSE}. Not working for this function since HUC data is in 7z format. -#' @returns None. Downloaded files will be stored in \code{directory_to_save}. +#' @returns NULL. Downloaded files will be stored in \code{directory_to_save}. #' @author Insang Song #' @examples #' \dontrun{ -#' download_huc("Lower48", "Seamless", "~/data" +#' download_huc("Lower48", "Seamless", "/data" #' acknowledgement = TRUE, #' download = TRUE, #' unzip = TRUE) #' } #' @export # @importFrom archive archive_extract -download_huc_data <- +download_huc <- function( region = c("Lower48", "Islands"), type = c("Seamless", "OceanCatchment"), @@ -3001,7 +2951,7 @@ download_huc_data <- #' \code{directory_to_save}. #' @examples #' \dontrun{ -#' download_cropscape_data( +#' download_cropscape( #' 2020, "~/data", #' acknowledgement = TRUE, #' download = TRUE, @@ -3010,7 +2960,7 @@ download_huc_data <- #' } #' @importFrom archive archive_extract #' @export -download_cropscape_data <- function( +download_cropscape <- function( year = seq(1997, 2023), source = c("USDA", "GMU"), directory_to_save = NULL, @@ -3089,6 +3039,7 @@ download_cropscape_data <- function( # note that this part does not utilize download_unzip # as duplicate file names are across multiple zip files if (download) { + # nocov start if (unzip) { extension <- ifelse(source == "USDA", "\\.zip", "(\\.tar|\\.tar\\.gz)") dir_unzip <- gsub(extension, "", download_names) @@ -3096,12 +3047,12 @@ download_cropscape_data <- function( archive::archive_extract(download_names[fn], exdir = dir_unzip[fn]) } } + # nocov end } message("Requests were processed.\n") #### 10. remove download commands download_remove_command(commands_txt = commands_txt, remove = remove_command) - } # nolint end @@ -3142,11 +3093,11 @@ download_cropscape_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Insang Song -#' @returns NULL; .bil (normals) or single grid files depending on the format choice. -#' \code{directory_to_save}. +#' @returns NULL; .bil (normals) or single grid files depending on the format +#' choice will be stored in \code{directory_to_save}. #' @examples #' \dontrun{ -#' download_prism_data( +#' download_prism( #' time = "202104", #' element = "ppt", #' data_type = "ts", @@ -3162,7 +3113,7 @@ download_cropscape_data <- function( #' * [PRISM Web Service Guide](https://prism.oregonstate.edu/documents/PRISM_downloads_web_service.pdf) #' @export # nolint end -download_prism_data <- function( +download_prism <- function( time, element = c("ppt", "tmin", "tmax", "tmean", "tdmean", "vpdmin", "vpdmax", @@ -3258,7 +3209,7 @@ download_prism_data <- function( # nolint start #' Download gridMET data #' @description -#' The \code{download_gridmet_data} function accesses and downloads gridded surface meteorological data from the [University of California Merced Climatology Lab's gridMET dataset](https://www.climatologylab.org/gridmet.html). +#' The \code{download_gridmet} function accesses and downloads gridded surface meteorological data from the [University of California Merced Climatology Lab's gridMET dataset](https://www.climatologylab.org/gridmet.html). #' @param variables character(1). Variable(s) name(s). See [gridMET Generate Wget File](https://www.climatologylab.org/wget-gridmet.html) #' for variable names and acronym codes. (Note: variable "Burning Index" has code "bi" and variable #' "Energy Release Component" has code "erc"). @@ -3278,11 +3229,11 @@ download_prism_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mitchell Manware -#' @return NULL; Yearly netCDF (.nc) files will be stored in a variable-specific +#' @return NULL; netCDF (.nc) files will be stored in a variable-specific #' folder within \code{directory_to_save}. #' @export # nolint end -download_gridmet_data <- function( +download_gridmet <- function( variables = NULL, year_start = 2022, year_end = 2022, @@ -3388,7 +3339,7 @@ download_gridmet_data <- function( # nolint start #' Download TerraClimate data #' @description -#' The \code{download_terraclimate_data} function accesses and downloads climate and water balance data from the [University of California Merced Climatology Lab's TerraClimate dataset](https://www.climatologylab.org/terraclimate.html). +#' The \code{download_terraclimate} function accesses and downloads climate and water balance data from the [University of California Merced Climatology Lab's TerraClimate dataset](https://www.climatologylab.org/terraclimate.html). #' @param variables character(1). Variable(s) name(s). See [TerraClimate Direct Downloads](https://climate.northwestknowledge.net/TERRACLIMATE/index_directDownloads.php) #' for variable names and acronym codes. #' @param year_start integer(1). length of 4. Start of year range for @@ -3407,11 +3358,11 @@ download_gridmet_data <- function( #' Remove (\code{TRUE}) or keep (\code{FALSE}) #' the text file containing download commands. #' @author Mitchell Manware, Insang Song -#' @return NULL; Yearly netCDF (.nc) files will be stored in a variable-specific +#' @return NULL; netCDF (.nc) files will be stored in a variable-specific #' folder within \code{directory_to_save}. #' @export # nolint end -download_terraclimate_data <- function( +download_terraclimate <- function( variables = NULL, year_start = 2022, year_end = 2022, diff --git a/R/download_auxiliary.R b/R/download_auxiliary.R index 10c5cc41..2aa019a8 100644 --- a/R/download_auxiliary.R +++ b/R/download_auxiliary.R @@ -4,16 +4,40 @@ #' @description #' Create \code{directory} if it does not already exist. #' @param directory character(1) directory path +#' @param zip logical(1). Should sub-directories be created for zip files and +#' data files? If `TRUE`, a vector of sub-directoy names will be returned. #' @description If directory does not exist, the directory #' will be created. -#' @returns NULL +#' @returns NULL; if `zip = TRUE` a vector of directories for zip files and +#' data files #' @keywords internal #' @export download_setup_dir <- - function(directory) { + function(directory, zip = FALSE) { if (!dir.exists(directory)) { dir.create(directory, recursive = TRUE) } + if (zip) { + directory_zip <- download_sanitize_path( + paste0( + download_sanitize_path(directory), + "zip_files" + ) + ) + if (!dir.exists(directory_zip)) { + dir.create(directory_zip, recursive = TRUE) + } + directory_data <- download_sanitize_path( + paste0( + download_sanitize_path(directory), + "data_files" + ) + ) + if (!dir.exists(directory_data)) { + dir.create(directory_data, recursive = TRUE) + } + return(c(directory_zip, directory_data)) + } } @@ -165,8 +189,10 @@ download_unzip <- #' @param remove logical(1). Confirm removal. Default is FALSE. #' @param download_name character. Full zip file path #' @note +#' !!! USE THE FUNCTION WITH CAUTION !!! #' If \code{remove = TRUE}, ensure that \code{unzip = TRUE}. Choosing to remove #' ".zip" files without unzipping will retain none of the downloaded data. +#' then it will remove all files in the second higher level directory. #' @returns NULL #' @keywords internal #' @export @@ -177,6 +203,9 @@ download_remove_zips <- if (remove) { cat(paste0("Removing download files...\n")) file.remove(download_name) + # oftentimes zipfiles are stored in zip_files under + # directory_to_save in download functions. + unlink(dirname(dirname(download_name)), recursive = TRUE) cat(paste0("Download files removed.\n")) } } @@ -294,7 +323,7 @@ generate_time_sequence <- #' Check HTTP status #' @description -#' Check if provided URL returns HTTP status 200. +#' Check if provided URL returns HTTP status 200 or 206. #' @param url Download URL to be checked. #' @param method httr method to obtain URL (`"HEAD"` or `"GET"`) #' @author Insang Song; Mitchell Manware @@ -307,15 +336,15 @@ check_url_status <- function( url, method = c("HEAD", "GET")) { method <- match.arg(method) - http_status_ok <- 200 + http_status_ok <- c(200, 206) if (method == "HEAD") { hd <- httr::HEAD(url) } else if (method == "GET") { hd <- httr::GET(url) } status <- hd$status_code - Sys.sleep(1.5) - return(status == http_status_ok) + Sys.sleep(1) + return(status %in% http_status_ok) } #' Import download commands @@ -350,7 +379,7 @@ extract_urls <- function( } urls <- sapply( strsplit( - commands, + trimws(commands), " " ), function(x, l) x[l], diff --git a/R/process.R b/R/process.R index 324df624..adce2480 100644 --- a/R/process.R +++ b/R/process.R @@ -14,7 +14,7 @@ #' - [`process_modis_swath`]: `"modis_swath"` #' - [`process_modis_merge`]: `"modis_merge"` #' - [`process_bluemarble`]: `"bluemarble"` -#' - [`process_koppen_geiger`]: `"koppen-geiger"`, `"koeppen-geiger"`, `"koppen"`, +#' - [`process_koppen_geiger`]: `"koppen-geiger"`, `"koeppen-geiger"`, `"koppen"` #' - [`process_ecoregion`]: `"ecoregion"`, `"ecoregions"` #' - [`process_nlcd`]: `"nlcd"` #' - [`process_tri`]: `"tri"` @@ -32,7 +32,7 @@ #' - [`process_huc`]: `"huc"` #' - [`process_cropscape`]: `"cropscape"`, `"cdl"` #' - [`process_prism`]: `"prism"` -#' - [`process_olm`]: `"olm"`, `"openlandmap` +#' - [`process_olm`]: `"olm"`, `"openlandmap"` #' @returns `SpatVector`, `SpatRaster`, `sf`, or `character` depending on #' covariate type and selections. #' @author Insang Song @@ -492,7 +492,7 @@ process_modis_warp <- function( path = NULL, cellsize = 0.1, - threshold = cellsize * 2, + threshold = cellsize * 4, crs = 4326, ... ) { @@ -502,14 +502,13 @@ process_modis_warp <- stars::st_warp( ras, crs = crs, + segments = 500, cellsize = cellsize, threshold = threshold ) return(rtd) } - - # nolint start #' Mosaic MODIS swaths #' @description This function will return a `SpatRaster` object with @@ -523,16 +522,20 @@ process_modis_warp <- #' @param path character. Full paths of hdf files. #' @param date character(1). Date to query. #' @param subdataset character. Subdatasets to process. +#' __Unlike other preprocessing functions, this argument should specify +#' the exact subdataset name.__ For example, when using MOD06_L2 product, +#' one may specify `c("Cloud_Fraction", "Cloud_Optical_Thickness")`, +#' etc. The subdataset names can be found in `terra::describe()` output. #' @param suffix character(1). Should be formatted `:{product}:`, #' e.g., `:mod06:` #' @param resolution numeric(1). Resolution of output raster. -#' Unit is degree. +#' Unit is degree (decimal degree in WGS84). #' @param ... For internal use. #' @seealso -#' * [`process_modis_warp`] +#' * [`process_modis_warp()`], [`stars::read_stars()`], [`stars::st_warp()`] #' * [GDAL HDF4 driver documentation](https://gdal.org/drivers/raster/hdf4.html) -#' * [`terra::describe`]: to list the full subdataset list with `sds = TRUE` -#' * [`terra::sprc`], [`terra::rast`] +#' * [`terra::describe()`]: to list the full subdataset list with `sds = TRUE` +#' * [`terra::sprc()`], [`terra::rast()`] #' @returns #' * a `SpatRaster` object (crs = `"EPSG:4326"`): if `path` is a single file with #' full specification of subdataset. @@ -540,8 +543,9 @@ process_modis_warp <- #' @author Insang Song #' @importFrom terra rast #' @importFrom terra crop +#' @importFrom terra ext #' @importFrom terra mosaic -#' @importFrom terra varnames +#' @importFrom stars st_mosaic #' @importFrom terra values #' @importFrom terra sprc #' @export @@ -561,51 +565,77 @@ process_modis_swath <- header <- "HDF4_EOS:EOS_SWATH:" ras_mod06 <- vector("list", length = length(subdataset)) datejul <- strftime(date, format = "%Y%j") + ## FIXME: this part may result in underperformance. + ## Find a way to optimize this part. paths_today <- grep(sprintf("A%s", datejul), path, value = TRUE) # if two or more paths are put in, # these are read into a list then mosaicked - if (length(path) > 1) { - for (element in seq_along(subdataset)) { - target_text <- - sprintf("%s%s%s%s", header, paths_today, suffix, subdataset[element]) - # rectified stars objects to SpatRaster - mod06_element <- split(target_text, target_text) |> - lapply(process_modis_warp, cellsize = resolution) |> - lapply(terra::rast) - # Remove all NA layers to avoid erroneous values - mod06_element_nas <- - sapply( - mod06_element, - function(x) { - all(is.na(terra::values(x))) - } - ) - mod06_element <- mod06_element[!mod06_element_nas] + for (element in seq_along(subdataset)) { + target_text <- + sprintf("%s%s%s%s", header, paths_today, suffix, subdataset[element]) + # rectified stars objects to SpatRaster + mod06_element <- split(target_text, target_text) |> + lapply(process_modis_warp, cellsize = resolution) + # Remove all NA layers to avoid erroneous values + mod06_element_nas <- + sapply( + mod06_element, + function(x) { + xvals <- x[[subdataset[element]]] + all(is.na(xvals)) || all(is.nan(xvals)) + } + ) + mod06_element <- + mod06_element[!mod06_element_nas & !is.null(mod06_element_nas)] + + # prepare a fail-safe alternative return + # It will be used again later. + alt <- terra::rast( + xmin = -128, + xmax = -64, + ymin = 20, + ymax = 52, + resolution = resolution + ) + # initialize values with NA + alt[] <- NA + alt_dim <- dim(alt) + alt[1, 1] <- 0 + alt[1, alt_dim[2]] <- 0 + alt[alt_dim[1], 1] <- 0 + alt[alt_dim[1], alt_dim[2]] <- 0 + terra::crs(alt) <- "EPSG:4326" + + if (is.null(mod06_element) || length(mod06_element) == 0) { + message("All layers are NA or NaN.") + mod06_element_mosaic <- terra::deepcopy(alt) + } else { # mosaick the warped SpatRasters into one - mod06_element_mosaic <- Reduce(f = terra::mosaic, x = mod06_element) - ras_mod06[[element]] <- mod06_element_mosaic + mod06_element_mosaic <- + do.call(stars::st_mosaic, mod06_element) |> + terra::rast() # assigning variable name - names(ras_mod06)[element] <- subdataset[element] + mod06_element_mosaic <- + terra::crop(mod06_element_mosaic, terra::ext(alt)) } - # SpatRasterCollection can accommodate multiple SpatRasters - # with different extents (most flexible kind) - mod06_sprc <- terra::sprc(ras_mod06) - # post-hoc: stack multiple layers with different extent - # into one SpatRaster - # 1. mosaic all layers into one - mod06_mosaic <- terra::mosaic(mod06_sprc, fun = "first") - # 2. Assign NAs to prepare "etching" - terra::values(mod06_mosaic) <- NA - # 3. Looping main "etching"; each element is put first - mod06_etched <- - sapply(mod06_sprc, terra::mosaic, y = mod06_mosaic, fun = "first") - # 4. stack - mod06_return <- do.call(c, mod06_etched) - } else { - mod06_return <- - terra::rast(process_modis_warp(path, cellsize = resolution)) + names(mod06_element_mosaic) <- subdataset[element] + ras_mod06[[element]] <- mod06_element_mosaic } + # SpatRasterCollection can accommodate multiple SpatRasters + # with different extents (most flexible kind) + mod06_sprc <- terra::sprc(ras_mod06) + # post-hoc: stack multiple layers with different extent + # into one SpatRaster + # 1. mosaic all layers into one + mod06_mosaic <- terra::mosaic(mod06_sprc, fun = "median") + # 2. Assign NAs to prepare "etching"; NA will result in NaNs + terra::values(mod06_mosaic) <- NA + # 3. Looping main "etching"; each element is put first + mod06_etched <- + sapply(mod06_sprc, terra::mosaic, y = mod06_mosaic, fun = "first") + # 4. stack + mod06_return <- do.call(c, mod06_etched) return(mod06_return) } @@ -631,7 +661,18 @@ process_koppen_geiger <- year = NULL, ... ) { + # import data kg_rast <- terra::rast(path) + # identify time period + period <- strsplit( + names(kg_rast), + "_" + )[[1]][4] + if (period == "present") { + terra::metags(kg_rast) <- c(year = "1980 - 2016") + } else { + terra::metags(kg_rast) <- c(year = "2071 - 2100") + } return(kg_rast) } @@ -642,6 +683,8 @@ process_koppen_geiger <- #' returning a single `SpatRaster` object. #' @param path character giving nlcd data path #' @param year numeric giving the year of NLCD data used +#' @param extent numeric(4) or SpatExtent giving the extent of the raster +#' if `NULL` (default), the entire raster is loaded #' @param ... Placeholders. #' @description Reads NLCD file of selected `year`. #' @returns a `SpatRaster` object @@ -654,6 +697,7 @@ process_nlcd <- function( path = NULL, year = 2021, + extent = NULL, ... ) { # check inputs @@ -670,13 +714,13 @@ process_nlcd <- nlcd_file <- list.files( path, - pattern = paste0("nlcd_", year, "_.*.tif$"), + pattern = paste0("nlcd_", year, "_.*.(tif|img)$"), full.names = TRUE ) if (length(nlcd_file) == 0) { stop("NLCD data not available for this year.") } - nlcd <- terra::rast(nlcd_file) + nlcd <- terra::rast(nlcd_file, win = extent) terra::metags(nlcd) <- c(year = year) return(nlcd) } @@ -688,17 +732,41 @@ process_nlcd <- #' data, returning a `SpatVector` object. #' @param path character(1). Path to Ecoregion Shapefiles #' @param ... Placeholders. +#' @note The function will fix Tukey's bridge in Portland, ME. +#' This fix will ensure that the EPA air quality monitoring sites +#' will be located within the ecoregion. #' @author Insang Song #' @returns a `SpatVector` object #' @importFrom terra vect +#' @importFrom sf st_read st_crs st_as_sfc st_transform st_intersects st_union +#' @importFrom data.table year #' @export process_ecoregion <- function( path = NULL, ... ) { - ecoreg <- terra::vect(path) + ecoreg <- sf::st_read(path) + # fix Tukey's bridge in Portland, ME + # nolint start + poly_tukey <- + "POLYGON ((-70.258 43.68, -70.2555 43.68, -70.255 43.6733, -70.2576 43.6732, -70.258 43.68))" + poly_tukey <- sf::st_as_sfc(poly_tukey, crs = "EPSG:4326") + poly_tukey <- sf::st_transform(poly_tukey, sf::st_crs(ecoreg)) + + # nolint end ecoreg <- ecoreg[, grepl("^(L2_KEY|L3_KEY)", names(ecoreg))] + ecoreg_edit_idx <- sf::st_intersects(ecoreg, poly_tukey, sparse = FALSE) + ecoreg_edit_idx <- vapply(ecoreg_edit_idx, function(x) any(x), logical(1)) + if (!all(ecoreg_edit_idx == 0)) { + ecoreg_else <- ecoreg[!ecoreg_edit_idx, ] + ecoreg_edit <- sf::st_union(ecoreg[ecoreg_edit_idx, ], poly_tukey) + ecoreg <- rbind(ecoreg_else, ecoreg_edit) + } + ecoreg$time <- paste0( + "1997 - ", data.table::year(Sys.time()) + ) + ecoreg <- terra::vect(ecoreg) return(ecoreg) } @@ -807,7 +875,7 @@ process_tri <- function( # nolint start #' Process road emissions data #' @description -#' The \code{process_tri()} function imports and cleans raw road emissions data, +#' The \code{process_nei()} function imports and cleans raw road emissions data, #' returning a single `SpatVector` object. #' @param path character(1). Directory with NEI csv files. #' @param county `SpatVector`/`sf`. County boundaries. @@ -855,10 +923,19 @@ process_nei <- function( stop("year should be one of 2017 or 2020.\n") } # Concatenate NEI csv files - csvs_nei <- list.files(path = path, pattern = "*.csv$", full.names = TRUE) + csvs_nei <- + list.files( + path = path, + pattern = "*.csv$", + recursive = TRUE, + full.names = TRUE + ) + csvs_nei <- grep(year, csvs_nei, value = TRUE) + if (is.null(csvs_nei) || length(csvs_nei) == 0) { + stop("No files found for the year. The file names should include the year") + } csvs_nei <- lapply(csvs_nei, data.table::fread) csvs_nei <- data.table::rbindlist(csvs_nei) - # column name readjustment target_nm <- c("fips code", "total emissions", "emissions uom") # not grep-ping at once for flexibility @@ -883,14 +960,14 @@ process_nei <- function( TRF_NEINP_0_00000 = sum(emissions_total_ton, na.rm = TRUE) ), by = geoid] - csvs_nei$nei_year <- year + csvs_nei$time <- as.integer(year) # read county vector cnty_geoid_guess <- grep("GEOID", names(county)) names(county)[cnty_geoid_guess] <- "geoid" county$geoid <- sprintf("%05d", as.integer(county$geoid)) - cnty_vect <- merge(county, csvs_nei, by = "geoid") - cnty_vect <- cnty_vect[, c("geoid", "nei_year", "TRF_NEINP_0_00000")] + cnty_vect <- merge(county, as.data.frame(csvs_nei), by = "geoid") + cnty_vect <- cnty_vect[, c("geoid", "time", "TRF_NEINP_0_00000")] return(cnty_vect) } @@ -899,32 +976,47 @@ process_nei <- function( #' Process unique U.S. EPA AQS sites #' @description #' The \code{process_aqs()} function cleans and imports raw air quality -#' monitoring sites, returning a single `SpatVector` or sf object. +#' monitoring sites, returning a single `SpatVector` or sf object. +#' `date` is used to filter the raw data read from csv files. +#' Filtered rows are then processed according to `mode` argument. +#' Some sites report multiple measurements per day with and without +#' [exceptional events](https://www.epa.gov/sites/default/files/2016-10/documents/exceptional_events.pdf) +#' the internal procedure of this function keeps "Included" if there +#' are multiple event types per site-time. #' @param path character(1). Directory path to daily measurement data. #' @param date character(2). Start and end date. -#' Should be in `"YYYY-MM-DD"` format and sorted. If `NULL`, -#' only unique locations are returned. -#' @param return_format character(1). `"terra"` or `"sf"`. +#' Should be in `"YYYY-MM-DD"` format and sorted. +#' @param mode character(1). One of "full" (all dates * all locations) +#' or "sparse" (date-location pairs with available data) or +#' "location" (unique locations). +#' @param data_field character(1). Data field to extract. +#' @param return_format character(1). `"terra"` or `"sf"` or `"data.table"`. #' @param ... Placeholders. -#' @returns a `SpatVector` or sf object depending on the `return_format` +#' @seealso +#' * [`download_aqs()`] +#' * [EPA, n.d., _AQS Parameter Codes_]( +#' https://aqs.epa.gov/aqsweb/documents/codetables/parameters.csv) +#' @returns a SpatVector, sf, or data.table object depending on the `return_format` #' @importFrom data.table as.data.table #' @importFrom utils read.csv -#' @importFrom terra vect -#' @importFrom terra project +#' @importFrom terra vect project #' @importFrom sf st_as_sf -#' @importFrom dplyr group_by -#' @importFrom dplyr ungroup -#' @importFrom dplyr filter -#' @note `date = NULL` will return a massive data.table -#' object. Please choose proper `date` values. +#' @importFrom dplyr group_by ungroup filter mutate select distinct +#' @note Choose `date` and `mode` values with caution. +#' The function may return a massive data.table, resulting in +#' a long processing time or even a crash. #' @export process_aqs <- function( path = NULL, date = c("2018-01-01", "2022-12-31"), - return_format = "terra", + mode = c("full", "sparse", "location"), + data_field = "Arithmetic.Mean", + return_format = c("terra", "sf", "data.table"), ... ) { + mode <- match.arg(mode) + return_format <- match.arg(return_format) if (!is.null(date)) { date <- try(as.Date(date)) if (inherits(date, "try-error")) { @@ -933,8 +1025,9 @@ process_aqs <- if (length(date) != 2) { stop("date should be a character vector of length 2.") } + } else { + stop("date should be defined.") } - if (length(path) == 1 && dir.exists(path)) { path <- list.files( path = path, @@ -953,22 +1046,54 @@ process_aqs <- ## get unique sites sites$site_id <- sprintf("%02d%03d%04d%05d", - sites$State.Code, - sites$County.Code, - sites$Site.Num, - sites$Parameter.Code) + as.integer(sites$State.Code), + as.integer(sites$County.Code), + as.integer(sites$Site.Num), + as.integer(sites$Parameter.Code)) site_id <- NULL Datum <- NULL POC <- NULL + Date.Local <- NULL + Sample.Duration <- NULL + + date_start <- as.Date(date[1]) + date_end <- as.Date(date[2]) + date_sequence <- seq(date_start, date_end, "day") + date_sequence <- as.character(date_sequence) # select relevant fields only sites <- sites |> dplyr::as_tibble() |> + dplyr::filter(as.character(Date.Local) %in% date_sequence) |> + dplyr::filter(startsWith(Sample.Duration, "24")) |> dplyr::group_by(site_id) |> dplyr::filter(POC == min(POC)) |> + dplyr::mutate(time = Date.Local) |> dplyr::ungroup() - sites_v <- unique(sites[, c("site_id", "Longitude", "Latitude", "Datum")]) + col_sel <- c("site_id", "Longitude", "Latitude", "Datum") + if (mode != "sparse") { + sites_v <- unique(sites[, col_sel]) + } else { + col_sel <- append(col_sel, "Event.Type") + col_sel <- append(col_sel, "time") + col_sel <- append(col_sel, data_field) + sites_v <- sites |> + dplyr::select(dplyr::all_of(col_sel)) |> + dplyr::distinct() + # excluding site-time with multiple event types + # sites_vdup will be "subtracted" from the original sites_v + sites_vdup <- sites_v |> + dplyr::group_by(site_id, time) |> + dplyr::filter(dplyr::n() > 1) |> + dplyr::filter(Event.Type == "Excluded") |> + dplyr::ungroup() + sites_v <- + dplyr::anti_join( + sites_v, sites_vdup, + by = c("site_id", "time", "Event.Type") + ) + } names(sites_v)[2:3] <- c("lon", "lat") sites_v <- data.table::as.data.table(sites_v) @@ -977,7 +1102,7 @@ process_aqs <- # NAD83 to WGS84 sites_v_nad <- - sites_v[Datum == "NAD83"] + sites_v[sites_v$Datum == "NAD83", ] sites_v_nad <- terra::vect( sites_v_nad, @@ -986,15 +1111,14 @@ process_aqs <- ) sites_v_nad <- terra::project(sites_v_nad, "EPSG:4326") # postprocessing: combine WGS84 and new WGS84 records - sites_v_nad <- sites_v_nad[, seq(1, 3)] sites_v_nad <- as.data.frame(sites_v_nad) - sites_v_wgs <- sites_v[Datum == "WGS84"][, -4] - final_sites <- rbind(sites_v_wgs, sites_v_nad) + sites_v_wgs <- sites_v[sites_v$Datum == "WGS84"] + final_sites <- data.table::rbindlist( + list(sites_v_wgs, sites_v_nad), fill = TRUE) + final_sites <- + final_sites[, grep("Datum", names(final_sites), invert = TRUE), with = FALSE] - if (!is.null(date)) { - date_start <- as.Date(date[1]) - date_end <- as.Date(date[2]) - date_sequence <- seq(date_start, date_end, "day") + if (mode == "full") { final_sites <- split(date_sequence, date_sequence) |> lapply(function(x) { @@ -1002,7 +1126,10 @@ process_aqs <- fs_time$time <- x return(fs_time) }) - final_sites <- Reduce(rbind, final_sites) + final_sites <- data.table::rbindlist(final_sites, fill = TRUE) + } + if (mode == "sparse") { + final_sites <- unique(final_sites) } final_sites <- @@ -1021,7 +1148,8 @@ process_aqs <- dim = "XY", coords = c("lon", "lat"), crs = "EPSG:4326" - ) + ), + data.table = final_sites ) return(final_sites) @@ -1084,6 +1212,8 @@ process_sedac_population <- function( split2[1], "...\n" )) + #### year + terra::metags(data) <- c(year = split2[1]) } return(data) } @@ -1096,9 +1226,11 @@ process_sedac_population <- function( #' returning a single `SpatVector` object. #' @param path character(1). Path to geodatabase or shapefiles. #' @param ... Placeholders. -#' @note U.S. context. +#' @note U.S. context. The returned `SpatVector` object contains a +#' `$description` column to represent the temporal range covered by the +#' dataset. For more information, see . #' @author Insang Song -#' @returns a `SpatVector` boject +#' @returns a `SpatVector` object #' @importFrom terra vect #' @export # nolint end @@ -1112,6 +1244,8 @@ process_sedac_groads <- function( } #### import data data <- terra::vect(path) + #### time period + data$description <- "1980 - 2010" return(data) } @@ -1157,7 +1291,7 @@ process_hms <- function( #### identify file paths paths <- list.files( path, - pattern = "hms_smoke", + pattern = "hms_smoke*.*.shp$", full.names = TRUE ) paths <- paths[grep( @@ -1170,6 +1304,12 @@ process_hms <- function( date[2], sub_hyphen = TRUE ) + #### dates of interest with hyphen for return in 0 polygon case + dates_no_polygon <- generate_date_sequence( + date[1], + date[2], + sub_hyphen = FALSE + ) #### subset file paths to only dates of interest data_paths <- unique( grep( @@ -1190,7 +1330,9 @@ process_hms <- function( "EPSG:4326" ) #### subset to density of interest - data_density <- data_date_p[data_date_p$Density == variable] + data_density <- data_date_p[ + tolower(data_date_p$Density) == tolower(variable) + ] #### absent polygons (ie. December 31, 2018) if (nrow(data_density) == 0) { cat(paste0( @@ -1245,6 +1387,11 @@ process_hms <- function( data_aggregate <- data_aggregate[ seq_len(nrow(data_aggregate)), c("Density", "Date") ] + #### apply date format + data_aggregate$Date <- as.Date( + data_aggregate$Date, + format = "%Y%m%d" + ) #### merge with other data data_return <- rbind(data_return, data_aggregate) } @@ -1265,7 +1412,8 @@ process_hms <- function( ), ". Returning vector of dates.\n" )) - return(c(variable, dates_of_interest)) + no_polygon_return <- c(variable, as.character(dates_no_polygon)) + return(no_polygon_return) } else if (nrow(data_return) > 0) { cat(paste0( "Returning daily ", @@ -1294,12 +1442,17 @@ process_hms <- function( #' @param variable vector(1). Vector containing the GMTED statistic first and #' the resolution second. (Example: variable = c("Breakline Emphasis", #' "7.5 arc-seconds")). +#' * Statistic options: "Breakline Emphasis", "Systematic Subsample", +#' "Median Statistic", "Minimum Statistic", "Mean Statistic", +#' "Maximum Statistic", "Standard Deviation Statistic" +#' * Resolution options: "30 arc-seconds", "15 arc-seconds", "7.5 arc-seconds" #' @param path character(1). Directory with downloaded GMTED "*_grd" #' folder containing .adf files. #' @param ... Placeholders. #' @author Mitchell Manware #' @note -#' `SpatRaster` layer name indicates selected variable and resolution. +#' `SpatRaster` layer name indicates selected variable and resolution, and year +#' of release (2010). #' @return a `SpatRaster` object #' @importFrom terra rast #' @importFrom terra varnames @@ -1343,8 +1496,10 @@ process_gmted <- function( #### identify file path paths <- list.files( path, - full.names = TRUE + full.names = TRUE, + recursive = TRUE ) + #### select only the folder containing data data_paths <- unique( grep( @@ -1358,7 +1513,7 @@ process_gmted <- function( value = TRUE ) ) - data_path <- data_paths[endsWith(data_paths, "_grd")] + data_path <- data_paths[grep("(_grd$|w001001.adf)", data_paths)] #### import data data <- terra::rast(data_path) #### layer name @@ -1368,7 +1523,8 @@ process_gmted <- function( "_grd", "", names(data) - ) + ), + "_2010" ) #### varnames terra::varnames(data) <- paste0( @@ -1378,6 +1534,9 @@ process_gmted <- function( resolution, ")" ) + #### year + terra::metags(data) <- + c(year = 2010L) #### set coordinate reference system return(data) } @@ -1418,12 +1577,14 @@ process_narr <- function( data_paths <- list.files( path, pattern = variable, + recursive = TRUE, full.names = TRUE ) - data_paths <- data_paths[grep( - ".nc", - data_paths - )] + data_paths <- grep( + sprintf("%s*.*.nc", variable), + data_paths, + value = TRUE + ) #### define date sequence date_sequence <- generate_date_sequence( date[1], @@ -2400,7 +2561,7 @@ process_terraclimate <- function( #' @param layer_name character(1). Layer name in the `path` #' @param huc_level character(1). Field name of HUC level #' @param huc_header character(1). The upper level HUC code header to extract -#' lower level HUCs. +#' lower level HUCs. #' @param ... Arguments passed to `nhdplusTools::get_huc()` #' @returns a `SpatVector` object #' @seealso [`nhdplusTools::get_huc`] @@ -2442,17 +2603,18 @@ process_huc <- huc_header = NULL, ... ) { - if (!file.exists(path) && !dir.exists(path)) { + # exclude the coverage due to write permission related to memoization + #nocov start + if (missing(path) || (!file.exists(path) && !dir.exists(path))) { hucpoly <- try( rlang::inject(nhdplusTools::get_huc(!!!list(...))) ) if (inherits(hucpoly, "try-error")) { - stop( - "HUC data was not found." - ) + stop("HUC data was not found.") } hucpoly <- terra::vect(hucpoly) } + #nocov end if (file.exists(path) || dir.exists(path)) { if (!is.null(huc_header)) { querybase <- @@ -2587,7 +2749,7 @@ process_prism <- #' Process OpenLandMap data #' @param path character giving OpenLandMap data path #' @param ... Placeholders. -#' @returns SpatRaster +#' @returns a `SpatRaster` object #' @author Insang Song #' @importFrom terra rast #' @export diff --git a/R/process_auxiliary.R b/R/process_auxiliary.R index da571122..851dd70e 100644 --- a/R/process_auxiliary.R +++ b/R/process_auxiliary.R @@ -311,7 +311,8 @@ process_locs_radius <- } else if (radius > 0) { sites_buffer <- terra::buffer( locs, - radius + radius, + quadsegs = 180L ) return(sites_buffer) } @@ -319,7 +320,8 @@ process_locs_radius <- #' Process locations as `SpatVector` #' @description -#' Convert locations from class \code{data.frame} or \code{data.table} to +#' Detect `SpatVector` object, or convert locations from class \code{sf}, +#' \code{data.frame} or \code{data.table} to #' `SpatVector` object, project to coordinate reference system, and apply #' circular buffer. #' @param locs data.frame(1). Data frame containing columns for unique @@ -336,37 +338,47 @@ process_locs_radius <- #' @export process_locs_vector <- function( - locs, - crs, - radius) { - #### sites as data frame - if ("data.table" %in% class(locs)) { - sites_df <- data.frame(locs) - } else if ("data.frame" %in% class(locs) && - !("data.table" %in% class(locs))) { - sites_df <- locs - } else if (!("data.table" %in% class(locs)) && - !("data.frame" %in% class(locs))) { + locs, + crs, + radius + ) { + #### detect SpatVector + if (methods::is(locs, "SpatVector")) { + cat( + paste0( + "Detected `SpatVector` (", + terra::geomtype(locs), + ") extraction locations...\n" + ) + ) + sites_v <- locs + #### detect sf object + } else if (methods::is(locs, "sf")) { + cat("Detected `sf` extraction locations...\n") + sites_v <- terra::vect(locs) + ### detect data.frame object + } else if (methods::is(locs, "data.frame")) { + cat("Detected `data.frame` extraction locations...\n") + #### columns + if (any(!(c("lon", "lat") %in% colnames(locs)))) { + stop(paste0( + "`locs` is missing 'lon', 'lat', or both.\n" + )) + } + sites_v <- terra::vect( + data.frame(locs), + geom = c("lon", "lat"), + crs = "EPSG:4326", + keepgeom = TRUE + ) + } else { stop( paste0( - "Detected a ", - class(locs)[1], - " object. Sites must be class data.frame or data.table.\n" + "`locs` is not a `SpatVector`, `sf`, or `data.frame` object.\n" ) ) } - #### columns - if (any(!(c("lon", "lat") %in% colnames(locs)))) { - stop(paste0( - "Sites data is missing 'lon', 'lat', or both.\n" - )) - } - #### as SpatVector - sites_v <- terra::vect( - sites_df, - geom = c("lon", "lat"), - crs = "EPSG:4326" - ) + ##### project to desired coordinate reference system sites_p <- terra::project( sites_v, crs diff --git a/README.md b/README.md index 813d4d36..43e5694c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -# **A** **M**achine for **D**ata, **E**nvironments, and **U**ser **S**etup for common environmental and climate health datasets - +# amadeus [![R-CMD-check](https://github.com/NIEHS/amadeus/actions/workflows/check-standard.yaml/badge.svg)](https://github.com/NIEHS/amadeus/actions/workflows/check-standard.yaml) [![cov](https://NIEHS.github.io/amadeus/badges/coverage.svg)](https://github.com/NIEHS/amadeus/actions) @@ -7,137 +6,153 @@ [![pkgdown](https://github.com/NIEHS/amadeus/actions/workflows/pkgdown.yaml/badge.svg)](https://github.com/NIEHS/amadeus/actions/workflows/pkgdown.yaml) [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) -`amadeus` is an R package developed to improve and expedite users' access to large, publicly available geospatial datasets. The functions in `amadeus` allow users to download and import cleaned geospatial data directly in R, useful for automated run scripts, analysis pipelines, and reproducible science in general. +`amadeus` is **a** **m**ech**a**nism for **d**ata, **e**nvironments, and **u**ser **s**etup for common environmental and climate health datasets in R. `amadeus` has been developed to improve access to and utility with large scale, publicly available environmental data in R. + +## Installation + +`amadeus` is not yet available from CRAN, but it can be installed with the `devtools`, `remotes`, or `pak` packages. + +``` +devtools::install_github("NIEHS/amadeus") +``` + +``` +remotes::install_github("NIEHS/amadeus") +``` + +``` +pak::pak("NIEHS/amadeus") +``` + +## Contribution + +To add or edit functionality for new data sources or datasets, open a [Pull request](https://github.com/NIEHS/amadeus/pulls) into the main branch with a detailed description of the proposed changes. Pull requests must pass all status checks, and then will be approved or rejected by `amadeus`'s authors. + +Utilize [Issues](https://github.com/NIEHS/amadeus/issues) to notify the authors of bugs, questions, or recommendations. Identify each issue with the appropriate label to help ensure a timely response. + +
+ +
## Download -`download_data()` accesses and downloads raw geospatial data from a variety of open source data repositories. The function is a wrapper that calls source-specific download functions, each of which account for the source's unique combination of URL, file naming conventions, and data types. Download functions cover the following sources: +`download_data` accesses and downloads raw geospatial data from a variety of open source data repositories. The function is a wrapper that calls source-specific download functions, each of which account for the source's unique combination of URL, file naming conventions, and data types. Download functions cover the following sources: | Source | Data Type | Genre | | :--- | :--- | :--- | -| [US EPA Air Data Pre-Generated Data Files](https://aqs.epa.gov/aqsweb/airdata/download_files.html) | CSV | Air Pollution | -| [US EPA Ecoregions](https://www.epa.gov/eco-research/ecoregion) | Shapefile | Climate Regions | -| [NASA Goddard Earth Observing System Composition Forcasting (GEOS-CF)](https://gmao.gsfc.nasa.gov/GEOS_systems/) | netCDF | Atmosphere, Meteorology | -| [USGS Global Multi-resolution Terrain Elevation Data (GMTED2010)](https://www.usgs.gov/coastal-changes-and-impacts/gmted2010) | ESRI ASCII Grid | Elevation | +| [Climatology Lab TerraClimate](https://www.climatologylab.org/terraclimate.html) | netCDF | Climate, Water | +| [Climatology Lab GridMet](https://www.climatologylab.org/gridmet.html) | netCDF | Meteorology | | [Köppen-Geiger Climate Classification (Beck et al., 2018)](https://www.nature.com/articles/sdata2018214) | GeoTIFF | Climate Classification | -| [NASA Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA-2)](https://www.nature.com/articles/sdata2018214) | netCDF | Atmosphere, Meteorology | -| [NASA Moderate Resolution Imaging Spectroradiometer (MODIS)](https://modis.gsfc.nasa.gov/data/) | HDF | Atmosphere, Meteorology, Land Use, Satellite | -| [NOAA NCEP North American Regional Reanalysis (NARR)](https://psl.noaa.gov/data/gridded/data.narr.html) | netCDF | Atmosphere, Meteorology | | [MRLC Consortium National Land Cover Database (NLCD)](https://www.mrlc.gov/data) | GeoTIFF | Land Use | -| [NOAA Hazard Mapping System Fire and Smoke Product](https://www.ospo.noaa.gov/Products/land/hms.html#0) | Shapefile, KML | Wildfire Smoke | -| [NASA SEDAC Global Roads Open Access Data Set](https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/data-download) | Shapefile, Geodatabase | Roadways | +| [NASA Moderate Resolution Imaging Spectroradiometer (MODIS)](https://modis.gsfc.nasa.gov/data/) | HDF | Atmosphere, Meteorology, Land Use, Satellite | +| [NASA Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA-2)](https://www.nature.com/articles/sdata2018214) | netCDF | Atmosphere, Meteorology | | [NASA SEDAC UN WPP-Adjusted Population Density](https://sedac.ciesin.columbia.edu/data/set/gpw-v4-population-density-adjusted-to-2015-unwpp-country-totals-rev11) | GeoTIFF, netCDF | Population | -| [Climatology Lab TerraClimate](https://www.climatologylab.org/terraclimate.html) | netCDF | Climate, Water | -| [Climatology Lab GridMet](https://www.climatologylab.org/gridmet.html) | netCDF | Meteorology | +| [NASA SEDAC Global Roads Open Access Data Set](https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/data-download) | Shapefile, Geodatabase | Roadways | +| [NASA Goddard Earth Observing System Composition Forcasting (GEOS-CF)](https://gmao.gsfc.nasa.gov/GEOS_systems/) | netCDF | Atmosphere, Meteorology | +| [NOAA Hazard Mapping System Fire and Smoke Product](https://www.ospo.noaa.gov/Products/land/hms.html#0) | Shapefile, KML | Wildfire Smoke | +| [NOAA NCEP North American Regional Reanalysis (NARR)](https://psl.noaa.gov/data/gridded/data.narr.html) | netCDF | Atmosphere, Meteorology | +| [US EPA Air Data Pre-Generated Data Files](https://aqs.epa.gov/aqsweb/airdata/download_files.html) | CSV | Air Pollution | +| [US EPA Ecoregions](https://www.epa.gov/eco-research/ecoregion) | Shapefile | Climate Regions | +| [USGS Global Multi-resolution Terrain Elevation Data (GMTED2010)](https://www.usgs.gov/coastal-changes-and-impacts/gmted2010) | ESRI ASCII Grid | Elevation | + -See the `download_functions` vignette for a detailed description of source-specific download functions. +See the "download_data and NASA EarthData Account" vignette for a detailed description of source-specific download functions. -Example use of `download_data()` using NOAA NCEP North American Regional Reanalysis's (NARR) "weasd" (Daily Accumulated Snow at Surface) variable. +Example use of `download_data` using NOAA NCEP North American Regional Reanalysis's (NARR) "weasd" (Daily Accumulated Snow at Surface) variable. ``` +> directory <- "/ EXAMPLE / FILE / PATH /" > download_data( + dataset_name = "narr_monolevel", + year_start = 2022, + year_end = 2022, + variable = "weasd", -+ directory_to_save = directory_to_save, -+ data_download_acknowledgement = TRUE, ++ directory_to_save = directory, ++ acknowledgement = TRUE, + download = TRUE + ) Downloading requested files... Requested files have been downloaded. -> list.files(paste0(directory_to_save, "weasd/")) +> list.files(paste0(directory, "weasd")) [1] "weasd.2022.nc" ``` ## Process -`process_covariates()` imports and cleans raw geospatial data (downloaded with `download_data()`), and returns a single `SpatRaster` or `SpatVector` into the user's R environment. `process_covariates()` "cleans" the data by defining interpretable layer names, ensuring a coordinate reference system is present, and managing `time` data (if applicable). +`process_covariates` imports and cleans raw geospatial data (downloaded with `download_data`), and returns a single `SpatRaster` or `SpatVector` into the user's R environment. `process_covariates` "cleans" the data by defining interpretable layer names, ensuring a coordinate reference system is present, and managing `timedata (if applicable). -To avoid errors when using `process_covariates()`, **do not edit the raw downloaded data files**. Passing user-generated or edited data into `process_covariates()` may result in errors as the underlying functions are adapted to each sources' raw data file type. +To avoid errors when using `process_covariates`, **do not edit the raw downloaded data files**. Passing user-generated or edited data into `process_covariates` may result in errors as the underlying functions are adapted to each sources' raw data file type. -Example use of `process_covariates()` using the downloaded "weasd" data. +Example use of `process_covariates` using the downloaded "weasd" data. ``` > weasd <- process_covariates( + covariate = "narr", + date = c("2022-01-01", "2022-01-05"), + variable = "weasd", -+ path = path ++ path = paste0(directory, "weasd") + ) -Cleaning weasd data for year 2022... +Cleaning weasd data for January, 2022... +Detected monolevel data... Returning daily weasd data from 2022-01-01 to 2022-01-05. > weasd -class : SpatRaster +class : SpatRaster dimensions : 277, 349, 5 (nrow, ncol, nlyr) resolution : 32462.99, 32463 (x, y) extent : -16231.49, 11313351, -16231.5, 8976020 (xmin, xmax, ymin, ymax) -coord. ref. : +proj=lcc +lat_0=50 +lon_0=-107 +lat_1=50 +lat_2=50 +x_0=5632642.22547 +y_0=4612545.65137 +datum=WGS84 +units=m +no_defs -source : weasd.2022.nc:weasd -varname : weasd (Daily Accumulated Snow at Surface) -names : weasd_20220101, weasd_20220102, weasd_20220103, weasd_20220104, weasd_20220105 -unit : kg/m^2, kg/m^2, kg/m^2, kg/m^2, kg/m^2 -time : 2022-01-01 to 2022-01-05 UTC +coord. ref. : +proj=lcc +lat_0=50 +lon_0=-107 +lat_1=50 +lat_2=50 +x_0=5632642.22547 +y_0=4612545.65137 +datum=WGS84 +units=m +no_defs +source : weasd.2022.nc:weasd +varname : weasd (Daily Accumulated Snow at Surface) +names : weasd_20220101, weasd_20220102, weasd_20220103, weasd_20220104, weasd_20220105 +unit : kg/m^2, kg/m^2, kg/m^2, kg/m^2, kg/m^2 +time : 2022-01-01 to 2022-01-05 UTC ``` ## Calculate Covariates -`calc_covariates()` stems from the `beethoven` package, and the *air pollution model's (citation)* need for various types of data extracted at precise locations. `calc_covariates()`, therefore, extracts data from the "cleaned" `SpatRaster` or `SpatVector` object at user defined locations. Users can choose to buffer the locations. The function returns a `data.frame` with data extracted at all locations for each layer or row in the `SpatRaster` or `SpatVector` object, respectively. +`calc_covariates` stems from the `beethoven` package's need for various types of data extracted at precise locations. `calc_covariates`, therefore, extracts data from the "cleaned" `SpatRaster` or `SpatVector` object at user defined locations. Users can choose to buffer the locations. The function returns a `data.frame` with data extracted at all locations for each layer or row in the `SpatRaster` or `SpatVector` object, respectively. -Example of `calc_covariates()` using processed "weasd" data. +Example of `calc_covariates` using processed "weasd" data. ``` +> locs <- data.frame(lon = -78.8277, lat = 35.95013) +> locs$id <- "0001" > weasd_covar <- calc_covariates( + covariate = "narr", -+ from = weasd, ++ from = weasd_process, + locs = locs, -+ locs_id = "site_id", -+ radius = 0 ++ locs_id = "id", ++ radius = 0, ++ geom = FALSE + ) -Converting data.table to data.frame... -Projecting data to desired coordinate reference system... -Utilizing 0 meter buffer for covariate calculations. -Calculating daily weasd covariates at monolevel for date 2022-01-01... -Calculating daily weasd covariates at monolevel for date 2022-01-02... -Calculating daily weasd covariates at monolevel for date 2022-01-03... -Calculating daily weasd covariates at monolevel for date 2022-01-04... -Calculating daily weasd covariates at monolevel for date 2022-01-05... -Returning weasd covariates. +Detected `data.frame` extraction locations... +Calculating weasd covariates for 2022-01-01... +Calculating weasd covariates for 2022-01-02... +Calculating weasd covariates for 2022-01-03... +Calculating weasd covariates for 2022-01-04... +Calculating weasd covariates for 2022-01-05... +Returning extracted covariates. > weasd_covar - site_id date level weasd_0 -1 37183001488101 2022-01-01 monolevel 0.000000000 -2 37183002188101 2022-01-01 monolevel 0.000000000 -3 37063001588101 2022-01-01 monolevel 0.000000000 -4 37183001488101 2022-01-02 monolevel 0.000000000 -5 37183002188101 2022-01-02 monolevel 0.000000000 -6 37063001588101 2022-01-02 monolevel 0.000000000 -7 37183001488101 2022-01-03 monolevel 0.000000000 -8 37183002188101 2022-01-03 monolevel 0.000000000 -9 37063001588101 2022-01-03 monolevel 0.000000000 -10 37183001488101 2022-01-04 monolevel 0.000000000 -11 37183002188101 2022-01-04 monolevel 0.000000000 -12 37063001588101 2022-01-04 monolevel 0.000000000 -13 37183001488101 2022-01-05 monolevel 0.003906250 -14 37183002188101 2022-01-05 monolevel 0.001953125 -15 37063001588101 2022-01-05 monolevel 0.001953125 + id time weasd_0 +1 0001 2022-01-01 0.000000000 +2 0001 2022-01-02 0.000000000 +3 0001 2022-01-03 0.000000000 +4 0001 2022-01-04 0.000000000 +5 0001 2022-01-05 0.001953125 ``` -## Other sources -- Below is a list of other data sources that can be accessed via R packages for climate and weather datasets. - -| Source | Link | R package | -| :----- | :--- | :-------- | -| Monitoring Trends in Burn Severity (MTBS) | https://www.mtbs.gov/ | | -| Daymet | https://daac.ornl.gov/cgi-bin/dataset_lister.pl?p=32 | [`daymetr`](https://cran.r-project.org/web/packages/daymetr/index.html) | -| Gridmet | https://www.climatologylab.org/gridmet.html | [`climateR`](https://github.com/mikejohnson51/climateR?tab=readme-ov-file) | -| NEX-GDDP-CMIP6 | | [`RClimChange`*](https://github.com/hllauca/RClimChange/) | -| ECMWF (e.g., ERA5) | https://www.ecmwf.int/en/forecasts/dataset/ecmwf-reanalysis-v5 | [`ecmwfr`](https://cran.r-project.org/web/packages/ecmwfr/index.html) | -| Copernicus/Sentinel | https://sentinels.copernicus.eu/web/sentinel/home | [`sen2r`**](https://github.com/ranghetti/sen2r) | -| USGS and EPA Hydrology and Water Quality Data | | [`dataRetrieval`](https://cran.r-project.org/web/packages/dataRetrieval/index.html) | -| NASA and USGS Satellite Products | | [`luna`](https://github.com/rspatial/luna) | -| NOAA Operational Model Archive | [https://nomads.ncep.noaa.gov] | [`rNOMADS`](https://cran.r-project.org/web/packages/rNOMADS/) | - -* Updated longer than two years before. -** Archived; no longer maintained. - -## References +## Additional Resources + +The following R packages can also be used to access climate and weather data in R, but each differs from `amadeus` in the data sources covered or type of functionality provided. + +| Package | Source | +| :--- | :----- | +| [`dataRetrieval`](https://cran.r-project.org/web/packages/dataRetrieval/index.html) | [USGS Hydrological Data](https://www.usgs.gov/mission-areas/water-resources/data) and [EPA Water Quality Data](https://www.epa.gov/waterdata/water-quality-data) | +| [`daymetr`](https://cran.r-project.org/web/packages/daymetr/index.html) | [Daymet](https://daac.ornl.gov/cgi-bin/dataset_lister.pl?p=32) | +| [`ecmwfr`](https://cran.r-project.org/web/packages/ecmwfr/index.html) | [ECMWF Reanalysis v5 (ERA5)](https://www.ecmwf.int/en/forecasts/dataset/ecmwf-reanalysis-v5) | +| [`RClimChange`[^1]](https://github.com/hllauca/RClimChange/) | [NASA Earth Exchange Global Daily Downscaled Projections (NEX-GDDP-CMIP6)](https://www.nccs.nasa.gov/services/data-collections/land-based-products/nex-gddp-cmip6) | +| [`rNOMADS`](https://cran.r-project.org/web/packages/rNOMADS/) | [NOAA Operational Model Archive and Distribution System](https://nomads.ncep.noaa.gov/) | +| [`sen2r`[^2]](https://github.com/ranghetti/sen2r) | [Sentinel-2](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-2) | + +[^1]: Last updated more than two years ago. +[^2]: Archived; no longer maintained. diff --git a/inst/extdata/nlcd_classes.csv b/inst/extdata/nlcd_classes.csv index 4e2f088e..4fd90305 100644 --- a/inst/extdata/nlcd_classes.csv +++ b/inst/extdata/nlcd_classes.csv @@ -1,17 +1,18 @@ "","value","class","names","col" -"1",0,"TUNCL","Unclassified","white" +"1",0,"TUNCL","Unclassified","#ffffff00" "2",11,"TWATR","Open Water","#476ba1" -"3",21,"TDVOS","Developed, Open Space","#decaca" -"4",22,"TDVLO","Developed, Low Intensity","#d99482" -"5",23,"TDVMI","Developed, Medium Intensity","#ee0000" -"6",24,"TDVHI","Developed, High Intensity","#ab0000" -"7",31,"TBARN","Barren Land","#b3aea3" -"8",41,"TDFOR","Deciduous Forest","#68ab63" -"9",42,"TEFOR","Evergreen Forest","#1c6330" -"10",43,"TMFOR","Mixed Forest","#b5ca8f" -"11",52,"TSHRB","Shrub/Scrub","#ccba7d" -"12",71,"THERB","Herbaceous","#e3e3c2" -"13",81,"TPAST","Hay/Pasture","#dcd93d" -"14",82,"TPLNT","Cultivated Crops","#ab7028" -"15",90,"TWDWT","Woody Wetlands","#bad9eb" -"16",95,"THWEM","Emergent Herbaceous Wetlands","#70a3ba" +"3",12,"TSNOW","Perennial Snow/Ice","#F5F5F5" +"4",21,"TDVOS","Developed, Open Space","#decaca" +"5",22,"TDVLO","Developed, Low Intensity","#d99482" +"6",23,"TDVMI","Developed, Medium Intensity","#ee0000" +"7",24,"TDVHI","Developed, High Intensity","#ab0000" +"8",31,"TBARN","Barren Land","#b3aea3" +"9",41,"TDFOR","Deciduous Forest","#68ab63" +"10",42,"TEFOR","Evergreen Forest","#1c6330" +"11",43,"TMFOR","Mixed Forest","#b5ca8f" +"12",52,"TSHRB","Shrub/Scrub","#ccba7d" +"13",71,"THERB","Herbaceous","#e3e3c2" +"14",81,"TPAST","Hay/Pasture","#dcd93d" +"15",82,"TPLNT","Cultivated Crops","#ab7028" +"16",90,"TWDWT","Woody Wetlands","#bad9eb" +"17",95,"THWEM","Emergent Herbaceous Wetlands","#70a3ba" diff --git a/man/calc_check_time.Rd b/man/calc_check_time.Rd new file mode 100644 index 00000000..9099defb --- /dev/null +++ b/man/calc_check_time.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calculate_covariates_auxiliary.R +\name{calc_check_time} +\alias{calc_check_time} +\title{Check time values} +\usage{ +calc_check_time(covar, POSIXt = TRUE) +} +\arguments{ +\item{covar}{data.frame(1). Calculated covariates \code{data.frame}.} + +\item{POSIXt}{logical(1). Should the time values in \code{covar} be of class +\code{POSIXt}? If \code{FALSE}, the time values will be checked for integer class +(year and year-month).} +} +\description{ +Check the time values within calculated covariates \code{data.frame} +} +\keyword{internal} diff --git a/man/calc_covariates.Rd b/man/calc_covariates.Rd index 622763c6..189b8251 100644 --- a/man/calc_covariates.Rd +++ b/man/calc_covariates.Rd @@ -47,7 +47,7 @@ SpatRaster or SpatVector objects before passing to \seealso{ \itemize{ \item \code{\link{calc_modis_par}}: \code{"modis"}, \code{"MODIS"} -\item \code{\link{calc_koppen_geiger}}: \code{"koppen-geiger"}, \code{"koeppen-geiger"}, \code{"koppen"}, +\item \code{\link{calc_koppen_geiger}}: \code{"koppen-geiger"}, \code{"koeppen-geiger"}, \code{"koppen"} \item \code{\link{calc_ecoregion}}: \code{"ecoregion"}, \code{"ecoregions"} \item \item \code{\link{calc_hms}}: \code{"hms"}, \code{"noaa"}, \code{"smoke"} diff --git a/man/calc_ecoregion.Rd b/man/calc_ecoregion.Rd index 1ee1d969..21d14855 100644 --- a/man/calc_ecoregion.Rd +++ b/man/calc_ecoregion.Rd @@ -4,7 +4,7 @@ \alias{calc_ecoregion} \title{Calculate ecoregions covariates} \usage{ -calc_ecoregion(from = NULL, locs, locs_id = "site_id", ...) +calc_ecoregion(from = NULL, locs, locs_id = "site_id", geom = FALSE, ...) } \arguments{ \item{from}{SpatVector(1). Output of \code{\link{process_ecoregion}}.} @@ -14,6 +14,13 @@ a unique identifier field named \code{locs_id}} \item{locs_id}{character(1). Name of unique identifier.} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders.} } \value{ diff --git a/man/calc_geos.Rd b/man/calc_geos.Rd index c64bb16d..804fc856 100644 --- a/man/calc_geos.Rd +++ b/man/calc_geos.Rd @@ -4,7 +4,15 @@ \alias{calc_geos} \title{Calculate atmospheric composition covariates} \usage{ -calc_geos(from, locs, locs_id = NULL, radius = 0, fun = "mean", ...) +calc_geos( + from, + locs, + locs_id = NULL, + radius = 0, + fun = "mean", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatRaster(1). Output of \code{process_geos()}.} @@ -20,7 +28,14 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} -\item{...}{Placeholders} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + +\item{...}{Placeholders.} } \value{ a data.frame object diff --git a/man/calc_gmted.Rd b/man/calc_gmted.Rd index 83246d0f..58c19e32 100644 --- a/man/calc_gmted.Rd +++ b/man/calc_gmted.Rd @@ -4,7 +4,15 @@ \alias{calc_gmted} \title{Calculate elevation covariates} \usage{ -calc_gmted(from, locs, locs_id = NULL, radius = 0, fun = "mean", ...) +calc_gmted( + from, + locs, + locs_id = NULL, + radius = 0, + fun = "mean", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatRaster(1). Output from \code{process_gmted()}.} @@ -20,6 +28,13 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders} } \value{ @@ -27,10 +42,10 @@ a data.frame object } \description{ Extract elevation values at point locations. Returns a \code{data.frame} -object containing \code{locs_id} and elevation variable. Elevation variable -column name reflects the elevation statistic, spatial resolution of -\code{from}, and circular buffer radius (ie. Breakline Emphasis at 7.5 -arc-second resolution with 0 meter buffer: breakline_emphasis_r75_0). +object containing \code{locs_id}, year of release, and elevation variable. +Elevation variable column name reflects the elevation statistic, spatial +resolution of \code{from}, and circular buffer radius (ie. Breakline Emphasis +at 7.5 arc-second resolution with 0 meter buffer: breakline_emphasis_r75_0). } \seealso{ \code{\link[=process_gmted]{process_gmted()}} diff --git a/man/calc_gridmet.Rd b/man/calc_gridmet.Rd index eaf9219c..2a28b10a 100644 --- a/man/calc_gridmet.Rd +++ b/man/calc_gridmet.Rd @@ -4,7 +4,15 @@ \alias{calc_gridmet} \title{Calculate gridMET covariates} \usage{ -calc_gridmet(from, locs, locs_id = NULL, radius = 0, fun = "mean") +calc_gridmet( + from, + locs, + locs_id = NULL, + radius = 0, + fun = "mean", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatRaster(1). Output from \code{process_gridmet()}.} @@ -19,6 +27,15 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} + +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + +\item{...}{Placeholders.} } \value{ a data.frame object diff --git a/man/calc_koppen_geiger.Rd b/man/calc_koppen_geiger.Rd index 9c68e241..23b08083 100644 --- a/man/calc_koppen_geiger.Rd +++ b/man/calc_koppen_geiger.Rd @@ -4,7 +4,13 @@ \alias{calc_koppen_geiger} \title{Calculate climate classification covariates} \usage{ -calc_koppen_geiger(from = NULL, locs = NULL, locs_id = "site_id", ...) +calc_koppen_geiger( + from = NULL, + locs = NULL, + locs_id = "site_id", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatVector(1). Output of \code{process_koppen_geiger()}.} @@ -14,6 +20,13 @@ a unique identifier field named \code{locs_id}} \item{locs_id}{character(1). Name of unique identifier.} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders.} } \value{ @@ -25,6 +38,12 @@ Extract climate classification values at point locations. Returns a binary (0 = point not in climate region; 1 = point in climate region) variables for each climate classification region. } +\note{ +The returned \code{data.frame} object contains a +\verb{$description} column to represent the temporal range covered by the +dataset. For more information, see +\url{https://www.nature.com/articles/sdata2018214}. +} \seealso{ \code{\link{process_koppen_geiger}} } diff --git a/man/calc_lagged.Rd b/man/calc_lagged.Rd index ed05fbfa..b067859b 100644 --- a/man/calc_lagged.Rd +++ b/man/calc_lagged.Rd @@ -4,7 +4,7 @@ \alias{calc_lagged} \title{Calculate temporally lagged covariates} \usage{ -calc_lagged(from, date, lag, locs_id, time_id) +calc_lagged(from, date, lag, locs_id, time_id = "time") } \arguments{ \item{from}{data.frame(1). A \code{data.frame} containing calculated covariates @@ -31,6 +31,8 @@ In order to calculate temporally lagged covariates, \code{from} must contain at least the number of lag days before the desired start date. For example, if \verb{date = c("2024-01-01", "2024-01-31)} and \code{lag = 1}, \code{from} must contain data starting at 2023-12-31. +If \code{from} contains geometry features, \code{calc_lagged} will return a column +with geometry features of the same name. \code{calc_lagged()} assumes that all columns other than \code{time_id}, \code{locs_id}, and fixed columns of "lat" and "lon", follow the genre, variable, lag, buffer radius format adopted in \code{calc_setcolumns()}. diff --git a/man/calc_merra2.Rd b/man/calc_merra2.Rd index 50e871ed..9a8e86ff 100644 --- a/man/calc_merra2.Rd +++ b/man/calc_merra2.Rd @@ -4,7 +4,15 @@ \alias{calc_merra2} \title{Calculate meteorological and atmospheric covariates} \usage{ -calc_merra2(from, locs, locs_id = NULL, radius = 0, fun = "mean", ...) +calc_merra2( + from, + locs, + locs_id = NULL, + radius = 0, + fun = "mean", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatRaster(1). Output of \code{process_merra2()}.} @@ -20,6 +28,13 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders} } \value{ diff --git a/man/calc_modis_daily.Rd b/man/calc_modis_daily.Rd index b4357d5f..d92e1532 100644 --- a/man/calc_modis_daily.Rd +++ b/man/calc_modis_daily.Rd @@ -12,7 +12,7 @@ calc_modis_daily( date = NULL, name_extracted = NULL, fun_summary = "mean", - max_cells = 1e+08, + max_cells = 3e+07, ... ) } @@ -55,6 +55,13 @@ Please note that this function does not provide a function to filter swaths or tiles, so it is strongly recommended to check and pre-filter the file names at users' discretion. } +\seealso{ +\itemize{ +\item Preprocessing: \code{\link[=process_modis_merge]{process_modis_merge()}}, \code{\link[=process_modis_swath]{process_modis_swath()}}, +\code{\link[=process_bluemarble]{process_bluemarble()}} +\item Parallelization: \code{\link[=calc_modis_par]{calc_modis_par()}} +} +} \author{ Insang Song } diff --git a/man/calc_modis_par.Rd b/man/calc_modis_par.Rd index f17fc22f..1cf02d19 100644 --- a/man/calc_modis_par.Rd +++ b/man/calc_modis_par.Rd @@ -16,7 +16,7 @@ calc_modis_par( nthreads = floor(length(parallelly::availableWorkers())/2), package_list_add = NULL, export_list_add = NULL, - max_cells = 1e+08, + max_cells = 3e+07, ... ) } @@ -40,7 +40,8 @@ The calculated covariate names will have a form of e.g., 'MOD_NDVIF_0_50000' where 50 km radius circular buffer was used to calculate mean NDVI value.} -\item{subdataset}{Index or search pattern of subdataset.} +\item{subdataset}{Indices, names, or search patterns for subdatasets. +Find detail usage of the argument in notes.} \item{fun_summary}{character or function. Function to summarize extracted raster values.} @@ -63,6 +64,14 @@ See \code{\link[exactextractr:exact_extract]{exactextractr::exact_extract}} for \item{...}{Arguments passed to \code{preprocess}.} } +\value{ +A data.frame with an attribute: +\itemize{ +\item \code{attr(., "dates_dropped")}: Dates with insufficient tiles. +Note that the dates mean the dates with insufficient tiles, +not the dates without available tiles. +} +} \description{ \code{calc_modis_par} essentially runs \code{\link{calc_modis_daily}} function in each thread (subprocess). Based on daily resolution, each day's workload @@ -84,26 +93,41 @@ Common arguments in \code{preprocess} functions such as \code{date} and \code{pa automatically detected and passed to the function. Please note that \code{locs} here and \code{path} in \code{preprocess} functions are assumed to have a standard naming convention of raw files from NASA. +The argument \code{subdataset} should be in a proper format +depending on \code{preprocess} function: +\itemize{ +\item \code{process_modis_merge()}: Regular expression pattern. +e.g., \code{"^LST_"} +\item \code{process_modis_swath()}: Subdataset names. +e.g., \code{c("Cloud_Fraction_Day", "Cloud_Fraction_Night")} +\item \code{process_bluemarble()}: Subdataset number. +e.g., for VNP46A2 product, 3L. +Dates with less than 80 percent of the expected number of tiles, +which are determined by the mode of the number of tiles, are removed. +Users will be informed of the dates with insufficient tiles. +The result data.frame will have an attribute with the dates with +insufficient tiles. +} } \seealso{ See details for setting parallelization: \itemize{ -\item \code{\link[foreach:foreach]{foreach::foreach}} -\item \code{\link[parallelly:makeClusterPSOCK]{parallelly::makeClusterPSOCK}} -\item \code{\link[parallelly:availableCores]{parallelly::availableCores}} -\item \code{\link[doParallel:registerDoParallel]{doParallel::registerDoParallel}} +\item \code{\link[future:plan]{future::plan()}} +\item \code{\link[future.apply:future_lapply]{future.apply::future_lapply()}} +\item \code{\link[parallelly:makeClusterPSOCK]{parallelly::makeClusterPSOCK()}} +\item \code{\link[parallelly:availableCores]{parallelly::availableCores()}} } This function leverages the calculation of single-day MODIS covariates: \itemize{ -\item \code{\link{calc_modis_daily}} +\item \code{\link[=calc_modis_daily]{calc_modis_daily()}} } -Also, for preprocessing, see: +Also, for preprocessing, please refer to: \itemize{ -\item \code{\link{process_modis_merge}} -\item \code{\link{process_modis_swath}} -\item \code{\link{process_bluemarble}} +\item \code{\link[=process_modis_merge]{process_modis_merge()}} +\item \code{\link[=process_modis_swath]{process_modis_swath()}} +\item \code{\link[=process_bluemarble]{process_bluemarble()}} } } diff --git a/man/calc_narr.Rd b/man/calc_narr.Rd index c00f832c..ae6701b6 100644 --- a/man/calc_narr.Rd +++ b/man/calc_narr.Rd @@ -4,7 +4,15 @@ \alias{calc_narr} \title{Calculate meteorological covariates} \usage{ -calc_narr(from, locs, locs_id = NULL, radius = 0, fun = "mean", ...) +calc_narr( + from, + locs, + locs_id = NULL, + radius = 0, + fun = "mean", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatRaster(1). Output of \code{process_narr()}.} @@ -20,6 +28,13 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders} } \value{ diff --git a/man/calc_nlcd.Rd b/man/calc_nlcd.Rd index 97874474..237a6bc3 100644 --- a/man/calc_nlcd.Rd +++ b/man/calc_nlcd.Rd @@ -8,8 +8,11 @@ calc_nlcd( from, locs, locs_id = "site_id", + mode = c("exact", "terra"), radius = 1000, - max_cells = 1e+08, + max_cells = 5e+07, + geom = FALSE, + nthreads = 1L, ... ) } @@ -20,14 +23,28 @@ calc_nlcd( \item{locs_id}{character(1). Unique identifier of locations} +\item{mode}{character(1). One of \code{"exact"} +(using \code{\link[exactextractr:exact_extract]{exactextractr::exact_extract()}}) +or \code{"terra"} (using \code{\link[terra:freq]{terra::freq()}}).} + \item{radius}{numeric (non-negative) giving the radius of buffer around points} \item{max_cells}{integer(1). Maximum number of cells to be read at once. -Higher values will expedite processing, but will increase memory usage. -Maximum possible value is \code{2^31 - 1}. +Higher values may expedite processing, but will increase memory usage. +Maximum possible value is \code{2^31 - 1}. Only valid when +\code{mode = "exact"}. See \code{\link[exactextractr:exact_extract]{exactextractr::exact_extract}} for details.} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + +\item{nthreads}{integer(1). Number of threads to be used} + \item{...}{Placeholders.} } \value{ @@ -38,6 +55,15 @@ Compute ratio of land cover class in circle buffers around points. Returns a \code{data.frame} object containing \code{locs_id}, longitude, latitude, time (year), and computed ratio for each land cover class. } +\note{ +NLCD is available in U.S. only. Users should be aware of +the spatial extent of the data. The results are different depending +on \code{mode} argument. The \code{"terra"} mode is less memory intensive +but less accurate because it counts the number of cells +intersecting with the buffer. The \code{"exact"} may be more accurate +but uses more memory as it will account for the partial overlap +with the buffer. +} \seealso{ \code{\link{process_nlcd}} } diff --git a/man/calc_prepare_locs.Rd b/man/calc_prepare_locs.Rd index ead49266..427e4da3 100644 --- a/man/calc_prepare_locs.Rd +++ b/man/calc_prepare_locs.Rd @@ -4,7 +4,7 @@ \alias{calc_prepare_locs} \title{Prepare extraction locations} \usage{ -calc_prepare_locs(from, locs, locs_id, radius) +calc_prepare_locs(from, locs, locs_id, radius, geom = FALSE) } \arguments{ \item{from}{SpatRaster(1) or SpatVector(1). Output from @@ -19,6 +19,9 @@ Passed from \code{calc_\*()}.} \item{radius}{integer(1). Circular buffer distance around site locations. (Default = 0). Passed from \code{calc_\*()}.} + +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}.} } \value{ A \code{list} containing \code{SpatVector} and \code{data.frame} objects diff --git a/man/calc_sedac_groads.Rd b/man/calc_sedac_groads.Rd index 9286602d..2d23b4f2 100644 --- a/man/calc_sedac_groads.Rd +++ b/man/calc_sedac_groads.Rd @@ -9,7 +9,8 @@ calc_sedac_groads( locs = NULL, locs_id = NULL, radius = 1000, - fun = sum, + fun = "sum", + geom = FALSE, ... ) } @@ -27,6 +28,13 @@ containing identifier for each unique coordinate location.} \item{fun}{function(1). Function used to summarize the length of roads within sites location buffer (Default is \code{sum}).} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders.} } \value{ @@ -40,7 +48,9 @@ the total length from the area of the buffer. \code{terra::linearUnits()} is used to convert the unit of length to meters. } \note{ -Unit is km / sq km. +Unit is km / sq km. The returned \code{data.frame} object contains a +\verb{$time} column to represent the temporal range covered by the +dataset. For more information, see \url{https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/metadata}. } \seealso{ \code{\link{process_sedac_groads}} diff --git a/man/calc_sedac_population.Rd b/man/calc_sedac_population.Rd index 801b25d2..764b42a3 100644 --- a/man/calc_sedac_population.Rd +++ b/man/calc_sedac_population.Rd @@ -10,6 +10,7 @@ calc_sedac_population( locs_id = NULL, radius = 0, fun = "mean", + geom = FALSE, ... ) } @@ -27,6 +28,13 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + \item{...}{Placeholders} } \value{ diff --git a/man/calc_terraclimate.Rd b/man/calc_terraclimate.Rd index 720e5e8c..4f97714d 100644 --- a/man/calc_terraclimate.Rd +++ b/man/calc_terraclimate.Rd @@ -4,7 +4,15 @@ \alias{calc_terraclimate} \title{Calculate TerraClimate covariates} \usage{ -calc_terraclimate(from, locs, locs_id = NULL, radius = 0, fun = "mean") +calc_terraclimate( + from = NULL, + locs = NULL, + locs_id = NULL, + radius = 0, + fun = "mean", + geom = FALSE, + ... +) } \arguments{ \item{from}{SpatRaster(1). Output from \code{process_terraclimate()}.} @@ -19,6 +27,15 @@ containing identifier for each unique coordinate location.} \item{fun}{character(1). Function used to summarize multiple raster cells within sites location buffer (Default = \code{mean}).} + +\item{geom}{logical(1). Should the geometry of \code{locs} be returned in the +\code{data.frame}? Default is \code{FALSE}. If \code{geom = TRUE} and \code{locs} contain +polygon geometries, the \verb{$geometry} column in the returned data frame may +make the \code{data.frame} difficult to read due to long geometry strings. The +coordinate reference system of the \verb{$geometry} is the coordinate +reference system of \code{from}.} + +\item{...}{Placeholders.} } \value{ a data.frame object diff --git a/man/calc_time.Rd b/man/calc_time.Rd index 247395cd..43ebb99a 100644 --- a/man/calc_time.Rd +++ b/man/calc_time.Rd @@ -10,8 +10,9 @@ calc_time(time, format) \item{time}{Time value} \item{format}{Type of time to return in the \verb{$time} column. Can be -"timeless" (ie. GMTED data), "date" (ie. NARR data), "hour", (ie. GEOS data), -"year" (ie. SEDAC population data), or "yearmonth" (ie. TerraClimate data).} +"timeless" (ie. Ecoregions data), "date" (ie. NARR data), "hour" +(ie. GEOS data), "year" (ie. SEDAC population data), or "yearmonth" +(ie. TerraClimate data).} } \value{ a \code{Date}, \code{POSIXt}, or \code{integer} object based on \verb{format =} diff --git a/man/calc_worker.Rd b/man/calc_worker.Rd index ea4a1150..6749c869 100644 --- a/man/calc_worker.Rd +++ b/man/calc_worker.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/calculate_covariates_auxiliary.R \name{calc_worker} \alias{calc_worker} -\title{Peform covariate extraction} +\title{Perform covariate extraction} \usage{ calc_worker( dataset, @@ -14,7 +14,9 @@ calc_worker( time, time_type = c("date", "hour", "year", "yearmonth", "timeless"), radius, - level = NULL + level = NULL, + max_cells = 1e+08, + ... ) } \arguments{ @@ -44,6 +46,13 @@ value(s).} \item{level}{integer. Position within the layer name containing the vertical pressure level value (if applicable). Default = \code{NULL}.} + +\item{max_cells}{integer(1). Maximum number of cells to be read at once. +Higher values will expedite processing, but will increase memory usage. +Maximum possible value is \code{2^31 - 1}. +See \code{\link[exactextractr:exact_extract]{exactextractr::exact_extract}} for details.} + +\item{...}{Placeholders.} } \value{ a \code{data.frame} object diff --git a/man/check_url_status.Rd b/man/check_url_status.Rd index 94c64901..23cd3577 100644 --- a/man/check_url_status.Rd +++ b/man/check_url_status.Rd @@ -15,7 +15,7 @@ check_url_status(url, method = c("HEAD", "GET")) logical object } \description{ -Check if provided URL returns HTTP status 200. +Check if provided URL returns HTTP status 200 or 206. } \author{ Insang Song; Mitchell Manware diff --git a/man/download_aqs_data.Rd b/man/download_aqs.Rd similarity index 73% rename from man/download_aqs_data.Rd rename to man/download_aqs.Rd index 620b50d0..22500b67 100644 --- a/man/download_aqs_data.Rd +++ b/man/download_aqs.Rd @@ -1,16 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_aqs_data} -\alias{download_aqs_data} +\name{download_aqs} +\alias{download_aqs} \title{Download air quality data} \usage{ -download_aqs_data( +download_aqs( parameter_code = 88101, resolution_temporal = "daily", year_start = 2018, year_end = 2022, url_aqs_download = "https://aqs.epa.gov/aqsweb/airdata/", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -37,11 +36,9 @@ End year for downloading data.} \item{url_aqs_download}{character(1). URL to the AQS pre-generated datasets.} -\item{directory_to_download}{character(1). -Directory to download zip files from AQS data mart.} - -\item{directory_to_save}{character(1). -Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped data files ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -61,12 +58,11 @@ the text file containing download commands. Default is FALSE.} Default \code{FALSE}.} } \value{ -NULL; Separate comma-separated value (CSV) files of -monitors and the daily representative values -will be stored in \code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +\code{directory_to_save}. } \description{ -The \code{download_aqs_data()} function accesses and downloads Air Quality System (AQS) data from the \href{https://aqs.epa.gov/aqsweb/airdata/download_files.html}{U.S. Environmental Protection Agency's (EPA) Pre-Generated Data Files}. +The \code{download_aqs()} function accesses and downloads Air Quality System (AQS) data from the \href{https://aqs.epa.gov/aqsweb/airdata/download_files.html}{U.S. Environmental Protection Agency's (EPA) Pre-Generated Data Files}. } \author{ Mariana Kassien, Insang Song, Mitchell Manware diff --git a/man/download_cropscape_data.Rd b/man/download_cropscape.Rd similarity index 94% rename from man/download_cropscape_data.Rd rename to man/download_cropscape.Rd index 01956576..82b6e0fa 100644 --- a/man/download_cropscape_data.Rd +++ b/man/download_cropscape.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_cropscape_data} -\alias{download_cropscape_data} +\name{download_cropscape} +\alias{download_cropscape} \title{Download CropScape data} \usage{ -download_cropscape_data( +download_cropscape( year = seq(1997, 2023), source = c("USDA", "GMU"), directory_to_save = NULL, @@ -55,7 +55,7 @@ JSON files should be found at STAC catalog of OpenLandMap } \examples{ \dontrun{ -download_cropscape_data( +download_cropscape( 2020, "~/data", acknowledgement = TRUE, download = TRUE, diff --git a/man/download_data.Rd b/man/download_data.Rd index 021c1e58..d7da4c3e 100644 --- a/man/download_data.Rd +++ b/man/download_data.Rd @@ -5,11 +5,11 @@ \title{Download raw data wrapper function} \usage{ download_data( - dataset_name = c("aqs", "ecoregion", "geos", "gmted", "koppen", "koppengeiger", - "merra2", "merra", "narr_monolevel", "modis", "narr_p_levels", "nlcd", "noaa", - "sedac_groads", "sedac_population", "groads", "population", "plevels", "p_levels", - "monolevel", "hms", "smoke", "tri", "nei", "gridmet", "terraclimate", "huc", - "cropscape", "cdl", "prism", "olm", "openlandmap"), + dataset_name = c("aqs", "ecoregion", "ecoregions", "geos", "gmted", "koppen", + "koppengeiger", "merra2", "merra", "narr_monolevel", "modis", "narr_p_levels", + "nlcd", "noaa", "sedac_groads", "sedac_population", "groads", "population", + "plevels", "p_levels", "monolevel", "hms", "smoke", "tri", "nei", "gridmet", + "terraclimate", "huc", "cropscape", "cdl", "prism", "olm", "openlandmap"), directory_to_save = NULL, acknowledgement = FALSE, ... @@ -32,30 +32,32 @@ The \code{download_data()} function accesses and downloads atmospheric, meteorol } \note{ \itemize{ -\item All download function names are in \code{download_*_data} formats +\item All download function names are in \code{download_*} formats } } \seealso{ For details of each download function per dataset, Please refer to: \itemize{ -\item \link{download_aqs_data}: "aqs", "AQS" -\item \link{download_ecoregion_data}: "ecoregion" -\item \link{download_geos_data}: "geos" -\item \link{download_gmted_data}: "gmted", "GMTED" -\item \link{download_koppen_geiger_data}: "koppen", "koppengeiger" -\item \link{download_merra2_data}: "merra2", "merra", "MERRA", "MERRA2" -\item \link{download_narr_monolevel_data}: "narr_monolevel", "monolevel" -\item \link{download_narr_p_levels_data}: "narr_p_levels", "p_levels", "plevels" -\item \link{download_nlcd_data}: "nlcd", "NLCD" -\item \link{download_hms_data}: "noaa", "smoke", "hms" -\item \link{download_sedac_groads_data}: "sedac_groads", "groads" -\item \link{download_sedac_population_data}: "sedac_population", "population" -\item \link{download_modis_data}: "modis", "MODIS" -\item \link{download_tri_data}: "tri", "TRI" -\item \link{download_nei_data}: "nei", "NEI" -\item \link{download_gridmet_data}: "gridMET", "gridmet" -\item \link{download_terraclimate_data}: "TerraClimate", "terraclimate" +\item \code{\link{download_aqs}}: \code{"aqs"}, \code{"AQS"} +\item \code{\link{download_ecoregion}}: \code{"ecoregions"}, \code{"ecoregion"} +\item \code{\link{download_geos}}: \code{"geos"} +\item \code{\link{download_gmted}}: \code{"gmted"}, \code{"GMTED"} +\item \code{\link{download_koppen_geiger}}: \code{"koppen"}, \code{"koppengeiger"} +\item \code{\link{download_merra2}}: "merra2", \code{"merra"}, \code{"MERRA"}, \code{"MERRA2"} +\item \code{\link{download_narr_monolevel}}: \code{"narr_monolevel"}, \code{"monolevel"} +\item \code{\link{download_narr_p_levels}}: \code{"narr_p_levels"}, \code{"p_levels"}, +\code{"plevels"} +\item \code{\link{download_nlcd}}: \code{"nlcd"}, \code{"NLCD"} +\item \code{\link{download_hms}}: \code{"noaa"}, \code{"smoke"}, \code{"hms"} +\item \code{\link{download_sedac_groads}}: \code{"sedac_groads"}, \code{"groads"} +\item \code{\link{download_sedac_population}}: \code{"sedac_population"}, +\code{"population"} +\item \code{\link{download_modis}}: \code{"modis"}, \code{"MODIS"} +\item \code{\link{download_tri}}: \code{"tri"}, \code{"TRI"} +\item \code{\link{download_nei}}: \code{"nei"}, \code{"NEI"} +\item \code{\link{download_gridmet}}: \code{"gridMET"}, \code{"gridmet"} +\item \code{\link{download_terraclimate}}: \code{"TerraClimate"}, \code{"terraclimate"} } } \author{ diff --git a/man/download_ecoregion_data.Rd b/man/download_ecoregion.Rd similarity index 69% rename from man/download_ecoregion_data.Rd rename to man/download_ecoregion.Rd index 53071a3a..bd7d71d0 100644 --- a/man/download_ecoregion_data.Rd +++ b/man/download_ecoregion.Rd @@ -1,14 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_ecoregion_data} -\alias{download_ecoregion_data} +\name{download_ecoregion} +\alias{download_ecoregion} \title{Download ecoregion data} \usage{ -download_ecoregion_data( +download_ecoregion( epa_certificate_path = system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus"), certificate_url = "http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -25,10 +24,9 @@ for EPA DataCommons. Default is \item{certificate_url}{character(1). URL to certificate file. See notes for details.} -\item{directory_to_download}{character(1). Directory to download zip file -of Ecoregion level 3 shapefiles} - -\item{directory_to_save}{character(1). Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped data files ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -44,14 +42,15 @@ the text file containing download commands.} \item{unzip}{logical(1). Unzip zip files. Default \code{TRUE}.} -\item{remove_zip}{logical(1). Remove zip file from directory_to_download. -Default \code{FALSE}.} +\item{remove_zip}{logical(1). Remove zip file from +\code{directory_to_download}. Default \code{FALSE}.} } \value{ -NULL; +NULL; Zip and/or data files will be downloaded and stored in +\code{directory_to_save}. } \description{ -The \code{download_ecoregion_data()} function accesses and downloads United States Ecoregions data from the \href{https://www.epa.gov/eco-research/ecoregions}{U.S. Environmental Protection Agency's (EPA) Ecorgions}. Level 3 data, where all pieces of information in the higher levels are included, are downloaded. +The \code{download_ecoregion()} function accesses and downloads United States Ecoregions data from the \href{https://www.epa.gov/eco-research/ecoregions}{U.S. Environmental Protection Agency's (EPA) Ecorgions}. Level 3 data, where all pieces of information in the higher levels are included, are downloaded. } \note{ For EPA Data Commons certificate errors, follow the steps below: diff --git a/man/download_geos_data.Rd b/man/download_geos.Rd similarity index 80% rename from man/download_geos_data.Rd rename to man/download_geos.Rd index 20277c5d..766cfc67 100644 --- a/man/download_geos_data.Rd +++ b/man/download_geos.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_geos_data} -\alias{download_geos_data} +\name{download_geos} +\alias{download_geos} \title{Download atmospheric composition data} \usage{ -download_geos_data( +download_geos( collection = c("aqc_tavg_1hr_g1440x721_v1", "chm_tavg_1hr_g1440x721_v1", "met_tavg_1hr_g1440x721_x1", "xgc_tavg_1hr_g1440x721_x1", "chm_inst_1hr_g1440x721_p23", "met_inst_1hr_g1440x721_p23"), @@ -25,7 +25,9 @@ data. Format YYYY-MM-DD (ex. September 1, 2023 = \code{"2023-09-01"}).} \item{date_end}{character(1). length of 10. End date for downloading data. Format YYYY-MM-DD (ex. September 1, 2023 = \code{"2023-09-01"}).} -\item{directory_to_save}{character(1). Directory to save data.} +\item{directory_to_save}{character(1). Directory to save data. +Sub-directories will be created within \code{directory_to_save} for each +GEOS-CF collection.} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -40,11 +42,11 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; Hourly netCDF (.nc4) files will be stored in -\code{directory_to_save}. +NULL; netCDF (.nc4) files will be stored in a +collection-specific folder within \code{directory_to_save}. } \description{ -The \code{download_geos_data()} function accesses and downloads various +The \code{download_geos()} function accesses and downloads various atmospheric composition collections from \href{https://gmao.gsfc.nasa.gov/GEOS_systems/}{NASA's Global Earth Observing System (GEOS) model}. } \author{ diff --git a/man/download_gmted_data.Rd b/man/download_gmted.Rd similarity index 76% rename from man/download_gmted_data.Rd rename to man/download_gmted.Rd index abc7e55d..ce6440e6 100644 --- a/man/download_gmted_data.Rd +++ b/man/download_gmted.Rd @@ -1,15 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_gmted_data} -\alias{download_gmted_data} +\name{download_gmted} +\alias{download_gmted} \title{Download elevation data} \usage{ -download_gmted_data( +download_gmted( statistic = c("Breakline Emphasis", "Systematic Subsample", "Median Statistic", "Minimum Statistic", "Mean Statistic", "Maximum Statistic", "Standard Deviation Statistic"), resolution = c("7.5 arc-seconds", "15 arc-seconds", "30 arc-seconds"), - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -25,10 +24,9 @@ download_gmted_data( \item{resolution}{character(1). Available resolutions include \code{"7.5 arc-seconds"}, \code{"15 arc-seconds"}, and \code{"30 arc-seconds"}.} -\item{directory_to_download}{character(1). Directory to download zip files -from Global Multi-resolution Terrain Elevation Data (GMTED2010).} - -\item{directory_to_save}{character(1). Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped data files ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -48,12 +46,11 @@ the text file containing download commands. Default is FALSE.} Default is \code{FALSE}.} } \value{ -NULL; Statistic and resolution-specific zip files will be stored in -\code{directory_to_download}, and directories containing raw ASCII Grid data -will be stored in \code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +\code{directory_to_save}. } \description{ -The \code{download_gmted_data()} function accesses and downloads Global +The \code{download_gmted()} function accesses and downloads Global Multi-resolution Terrain Elevation Data (GMTED2010) from \href{https://www.usgs.gov/coastal-changes-and-impacts/gmted2010}{U.S. Geological Survey and National Geospatial-Intelligence Agency}. } diff --git a/man/download_gridmet_data.Rd b/man/download_gridmet.Rd similarity index 79% rename from man/download_gridmet_data.Rd rename to man/download_gridmet.Rd index 2c2fe91e..4ef5c8cc 100644 --- a/man/download_gridmet_data.Rd +++ b/man/download_gridmet.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_gridmet_data} -\alias{download_gridmet_data} +\name{download_gridmet} +\alias{download_gridmet} \title{Download gridMET data} \usage{ -download_gridmet_data( +download_gridmet( variables = NULL, year_start = 2022, year_end = 2022, @@ -41,11 +41,11 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; Yearly netCDF (.nc) files will be stored in a variable-specific +NULL; netCDF (.nc) files will be stored in a variable-specific folder within \code{directory_to_save}. } \description{ -The \code{download_gridmet_data} function accesses and downloads gridded surface meteorological data from the \href{https://www.climatologylab.org/gridmet.html}{University of California Merced Climatology Lab's gridMET dataset}. +The \code{download_gridmet} function accesses and downloads gridded surface meteorological data from the \href{https://www.climatologylab.org/gridmet.html}{University of California Merced Climatology Lab's gridMET dataset}. } \author{ Mitchell Manware diff --git a/man/download_hms_data.Rd b/man/download_hms.Rd similarity index 74% rename from man/download_hms_data.Rd rename to man/download_hms.Rd index 02ef3a36..91c8930c 100644 --- a/man/download_hms_data.Rd +++ b/man/download_hms.Rd @@ -1,14 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_hms_data} -\alias{download_hms_data} +\name{download_hms} +\alias{download_hms} \title{Download wildfire smoke data} \usage{ -download_hms_data( +download_hms( data_format = "Shapefile", date_start = "2023-09-01", date_end = "2023-09-01", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -26,12 +25,11 @@ data. Format YYYY-MM-DD (ex. September 1, 2023 is \code{"2023-09-01"}).} \item{date_end}{character(1). length of 10. End date for downloading data. Format YYYY-MM-DD (ex. September 10, 2023 is \code{"2023-09-10"}).} -\item{directory_to_download}{character(1). Directory to download zip files -from NOAA Hazard Mapping System Fire and Smoke Product. (Ignored if -\code{data_format = "KML"}.)} - -\item{directory_to_save}{character(1). Directory to save unzipped shapefiles -and KML files.} +\item{directory_to_save}{character(1). Directory to save data. If +\code{data_format = "Shapefile"}, two sub-directories will be created for the +downloaded zip files ("/zip_files") and the unzipped shapefiles +("/data_files"). If \code{data_format = "KML"}, a single sub-directory +("/data_files") will be created.} \item{acknowledgement}{logical(1). By setting \code{TRUE} the @@ -54,12 +52,11 @@ directory_to_download. Default is \code{FALSE}. (Ignored if \code{data_format = "KML"}.)} } \value{ -NULL; Zip file will be stored in \code{directory_to_download}, and -Shapefiles (.shp) or KML files (.kml) will be stored in -\code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +respective sub-directories within \code{directory_to_save}. } \description{ -The \code{download_hms_data()} function accesses and downloads +The \code{download_hms()} function accesses and downloads wildfire smoke plume coverage data from \href{https://www.ospo.noaa.gov/Products/land/hms.html#0}{NOAA's Hazard Mapping System Fire and Smoke Product}. } \author{ diff --git a/man/download_huc_data.Rd b/man/download_huc.Rd similarity index 91% rename from man/download_huc_data.Rd rename to man/download_huc.Rd index ff2477d0..ec3c2e7e 100644 --- a/man/download_huc_data.Rd +++ b/man/download_huc.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_huc_data} -\alias{download_huc_data} +\name{download_huc} +\alias{download_huc} \title{Download National Hydrography Dataset (NHD) data} \usage{ -download_huc_data( +download_huc( region = c("Lower48", "Islands"), type = c("Seamless", "OceanCatchment"), directory_to_save = NULL, @@ -38,7 +38,7 @@ the text file containing download commands.} Default is \code{FALSE}. Not working for this function since HUC data is in 7z format.} } \value{ -None. Downloaded files will be stored in \code{directory_to_save}. +NULL. Downloaded files will be stored in \code{directory_to_save}. } \description{ NHDPlus data provides the most comprehensive and high-resolution @@ -53,7 +53,7 @@ please visit \href{https://www.epa.gov/waterdata/get-nhdplus-national-hydrograph } \examples{ \dontrun{ -download_huc("Lower48", "Seamless", "~/data" +download_huc("Lower48", "Seamless", "/data" acknowledgement = TRUE, download = TRUE, unzip = TRUE) diff --git a/man/download_koppen_geiger_data.Rd b/man/download_koppen_geiger.Rd similarity index 75% rename from man/download_koppen_geiger_data.Rd rename to man/download_koppen_geiger.Rd index f38ad4be..926cd4f5 100644 --- a/man/download_koppen_geiger_data.Rd +++ b/man/download_koppen_geiger.Rd @@ -1,13 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_koppen_geiger_data} -\alias{download_koppen_geiger_data} +\name{download_koppen_geiger} +\alias{download_koppen_geiger} \title{Download climate classification data} \usage{ -download_koppen_geiger_data( +download_koppen_geiger( data_resolution = c("0.0083", "0.083", "0.5"), time_period = c("Present", "Future"), - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -25,11 +24,9 @@ degrees (approx. 1 km), \code{"0.083"} degrees (approx. 10 km), and and \code{"Future"} (2071-2100). ("Future" classifications are based on scenario RCP8.5).} -\item{directory_to_download}{character(1). Directory to download zip files -from Present and future Köppen-Geiger climate classification maps at 1-km -resolution.} - -\item{directory_to_save}{character(1). Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped shapefiles ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -49,11 +46,11 @@ the text file containing download commands.} Default is \code{FALSE}.} } \value{ -NULL; Zip file will be stored in \code{directory_to_download}, and -selected GeoTIFF (.tif) files will be stored in \code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +respective sub-directories within \code{directory_to_save}. } \description{ -The \code{download_koppen_geiger_data()} function accesses and downloads +The \code{download_koppen_geiger()} function accesses and downloads climate classification data from the \emph{Present and future Köppen-Geiger climate classification maps at 1-km resolution}(\href{https://www.nature.com/articles/sdata2018214}{link for article}; \href{https://figshare.com/articles/dataset/Present_and_future_K_ppen-Geiger_climate_classification_maps_at_1-km_resolution/6396959/2}{link for data}). diff --git a/man/download_merra2_data.Rd b/man/download_merra2.Rd similarity index 90% rename from man/download_merra2_data.Rd rename to man/download_merra2.Rd index 1225703b..070dc701 100644 --- a/man/download_merra2_data.Rd +++ b/man/download_merra2.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_merra2_data} -\alias{download_merra2_data} +\name{download_merra2} +\alias{download_merra2} \title{Download meteorological and atmospheric data} \usage{ -download_merra2_data( +download_merra2( collection = c("inst1_2d_asm_Nx", "inst1_2d_int_Nx", "inst1_2d_lfo_Nx", "inst3_3d_asm_Np", "inst3_3d_aer_Nv", "inst3_3d_asm_Nv", "inst3_3d_chm_Nv", "inst3_3d_gas_Nv", "inst3_2d_gas_Nx", "inst6_3d_ana_Np", "inst6_3d_ana_Nv", @@ -48,11 +48,11 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; Daily netCDF (.nc4) files will be stored in -\code{directory_to_save}. +NULL; netCDF (.nc4) files will be stored in a +collection-specific folder within \code{directory_to_save}. } \description{ -The \code{download_merra2_data()} function accesses and downloads various +The \code{download_merra2()} function accesses and downloads various meteorological and atmospheric collections from \href{https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/}{NASA's Modern-Era Retrospective analysis for Research and Applications, Version 2 (MERRA-2) model}. } \author{ diff --git a/man/download_modis_data.Rd b/man/download_modis.Rd similarity index 95% rename from man/download_modis_data.Rd rename to man/download_modis.Rd index cb46d19c..85e56764 100644 --- a/man/download_modis_data.Rd +++ b/man/download_modis.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_modis_data} -\alias{download_modis_data} +\name{download_modis} +\alias{download_modis} \title{Download MODIS product files} \usage{ -download_modis_data( +download_modis( product = c("MOD09GA", "MOD11A1", "MOD06_L2", "MCD19A2", "MOD13A2", "VNP46A2"), version = "61", horizontal_tiles = c(7, 13), @@ -56,7 +56,7 @@ large and use lots of machine storage and memory.} the text file containing download commands.} } \value{ -NULL; Raw HDF (.hdf) files will be stored in +NULL; HDF (.hdf) files will be stored in \code{directory_to_save}. } \description{ diff --git a/man/download_narr_monolevel_data.Rd b/man/download_narr_monolevel.Rd similarity index 67% rename from man/download_narr_monolevel_data.Rd rename to man/download_narr_monolevel.Rd index f52d9bac..7d4814f8 100644 --- a/man/download_narr_monolevel_data.Rd +++ b/man/download_narr_monolevel.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_narr_monolevel_data} -\alias{download_narr_monolevel_data} +\name{download_narr_monolevel} +\alias{download_narr_monolevel} \title{Download meteorological data (monolevel)} \usage{ -download_narr_monolevel_data( +download_narr_monolevel( variables = NULL, year_start = 2022, year_end = 2022, @@ -40,11 +40,11 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; Yearly netCDF (.nc) files will be stored in a variable-specific +NULL; netCDF (.nc) files will be stored in a variable-specific folder within \code{directory_to_save}. } \description{ -The \code{download_narr_monolevel_data} function accesses and downloads monolevel meteorological data from \href{https://psl.noaa.gov/data/gridded/data.narr.html}{NOAA's North American Regional Reanalysis (NARR) model}. "Monolevel" variables contain a single value for the entire atmospheric column (ie. Variable: Convective cloud cover; Level: Entire atmosphere considered as a single layer), or represent a specific altitude associated with the variable (ie. Variable: Air temperature; Level: 2 m). +The \code{download_narr_monolevel} function accesses and downloads monolevel meteorological data from \href{https://psl.noaa.gov/data/gridded/data.narr.html}{NOAA's North American Regional Reanalysis (NARR) model}. "Monolevel" variables contain a single value for the entire atmospheric column (ie. Variable: Convective cloud cover; Level: Entire atmosphere considered as a single layer), or represent a specific altitude associated with the variable (ie. Variable: Air temperature; Level: 2 m). } \author{ Mitchell Manware, Insang Song diff --git a/man/download_narr_p_levels_data.Rd b/man/download_narr_p_levels.Rd similarity index 71% rename from man/download_narr_p_levels_data.Rd rename to man/download_narr_p_levels.Rd index ac87c519..ecb28eda 100644 --- a/man/download_narr_p_levels_data.Rd +++ b/man/download_narr_p_levels.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_narr_p_levels_data} -\alias{download_narr_p_levels_data} +\name{download_narr_p_levels} +\alias{download_narr_p_levels} \title{Download meteorological data (pressure levels)} \usage{ -download_narr_p_levels_data( +download_narr_p_levels( variables = NULL, year_start = 2022, year_end = 2022, @@ -40,11 +40,11 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; Monthly netCDF (.nc) files will be stored in +NULL; netCDF (.nc) files will be stored in \code{directory_to_save}. } \description{ -The \code{download_narr_p_levels_data} function accesses and downloads pressure levels meteorological data from \href{https://psl.noaa.gov/data/gridded/data.narr.html}{NOAA's North American Regional Reanalysis (NARR) model}. "Pressure levels" variables contain variable values at 29 atmospheric levels, ranging from 1000 hPa to 100 hPa. All pressure levels data will be downloaded for each variable. +The \code{download_narr_p_levels} function accesses and downloads pressure levels meteorological data from \href{https://psl.noaa.gov/data/gridded/data.narr.html}{NOAA's North American Regional Reanalysis (NARR) model}. "Pressure levels" variables contain variable values at 29 atmospheric levels, ranging from 1000 hPa to 100 hPa. All pressure levels data will be downloaded for each variable. } \author{ Mitchell Manware, Insang Song diff --git a/man/download_nei_data.Rd b/man/download_nei.Rd similarity index 75% rename from man/download_nei_data.Rd rename to man/download_nei.Rd index 4b76ad06..5b842b01 100644 --- a/man/download_nei_data.Rd +++ b/man/download_nei.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_nei_data} -\alias{download_nei_data} +\name{download_nei} +\alias{download_nei} \title{Download road emissions data} \usage{ -download_nei_data( +download_nei( epa_certificate_path = system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus"), certificate_url = "http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt", @@ -27,7 +27,9 @@ details.} \item{year_target}{Available years of NEI data. Default is \code{c(2017L, 2020L)}.} -\item{directory_to_save}{character(1). Directory to download files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped data files ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -45,11 +47,11 @@ the text file containing download commands.} Default is \code{FALSE}.} } \value{ -NULL; Yearly comma-separated value (CSV) files will be stored in -\code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +respective sub-directories within \code{directory_to_save}. } \description{ -The \code{download_nei_data()} function accesses and downloads road emissions data from the \href{https://www.epa.gov/air-emissions-inventories/national-emissions-inventory-nei}{U.S Environmental Protection Agency's (EPA) National Emissions Inventory (NEI)}. +The \code{download_nei()} function accesses and downloads road emissions data from the \href{https://www.epa.gov/air-emissions-inventories/national-emissions-inventory-nei}{U.S Environmental Protection Agency's (EPA) National Emissions Inventory (NEI)}. } \note{ For EPA Data Commons certificate errors, follow the steps below: diff --git a/man/download_nlcd_data.Rd b/man/download_nlcd.Rd similarity index 75% rename from man/download_nlcd_data.Rd rename to man/download_nlcd.Rd index ac1ca866..521b1ad0 100644 --- a/man/download_nlcd_data.Rd +++ b/man/download_nlcd.Rd @@ -1,13 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_nlcd_data} -\alias{download_nlcd_data} +\name{download_nlcd} +\alias{download_nlcd} \title{Download land cover data} \usage{ -download_nlcd_data( +download_nlcd( collection = "Coterminous United States", year = 2021, - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -24,10 +23,9 @@ include \code{2001}, \code{2004}, \code{2006}, \code{2008}, \code{2011}, \code{2 \code{2019}, and \code{2021}. Available years for Alaska include \code{2001}, \code{2011}, and \code{2016}.} -\item{directory_to_download}{character(1). Directory to download zip files -from National Land Cover Database Science Research Products.} - -\item{directory_to_save}{character(1). Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped shapefiles ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -47,11 +45,11 @@ the text file containing download commands.} Default is \code{FALSE}.} } \value{ -NULL; Zip file will be stored in \code{directory_to_download}, and -selected GeoTIFF (.tif) files will be stored in \code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +respective sub-directories within \code{directory_to_save}. } \description{ -The \code{download_nlcd_data()} function accesses and downloads +The \code{download_nlcd()} function accesses and downloads land cover data from the \href{https://www.mrlc.gov/data}{Multi-Resolution Land Characteristics (MRLC) Consortium's National Land Cover Database (NLCD) products data base}. } diff --git a/man/download_olm_data.Rd b/man/download_olm.Rd similarity index 98% rename from man/download_olm_data.Rd rename to man/download_olm.Rd index 2c65cc74..e02d965d 100644 --- a/man/download_olm_data.Rd +++ b/man/download_olm.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_olm_data} -\alias{download_olm_data} +\name{download_olm} +\alias{download_olm} \title{Download OpenLandMap data} \usage{ -download_olm_data( +download_olm( product = NULL, format = "tif", directory_to_save = NULL, diff --git a/man/download_prism_data.Rd b/man/download_prism.Rd similarity index 95% rename from man/download_prism_data.Rd rename to man/download_prism.Rd index a59a3639..3dadecf5 100644 --- a/man/download_prism_data.Rd +++ b/man/download_prism.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_prism_data} -\alias{download_prism_data} +\name{download_prism} +\alias{download_prism} \title{Download PRISM data} \usage{ -download_prism_data( +download_prism( time, element = c("ppt", "tmin", "tmax", "tmean", "tdmean", "vpdmin", "vpdmax", "solslope", "soltotal", "solclear", "soltrans"), @@ -63,8 +63,8 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; .bil (normals) or single grid files depending on the format choice. -\code{directory_to_save}. +NULL; .bil (normals) or single grid files depending on the format +choice will be stored in \code{directory_to_save}. } \description{ Accesses and downloads Oregon State University's @@ -72,7 +72,7 @@ PRISM data from the PRISM Climate Group Web Service } \examples{ \dontrun{ -download_prism_data( +download_prism( time = "202104", element = "ppt", data_type = "ts", diff --git a/man/download_remove_zips.Rd b/man/download_remove_zips.Rd index a28d5902..72d26dbc 100644 --- a/man/download_remove_zips.Rd +++ b/man/download_remove_zips.Rd @@ -15,7 +15,9 @@ download_remove_zips(remove = FALSE, download_name) Remove downloaded ".zip" files. } \note{ +!!! USE THE FUNCTION WITH CAUTION !!! If \code{remove = TRUE}, ensure that \code{unzip = TRUE}. Choosing to remove ".zip" files without unzipping will retain none of the downloaded data. +then it will remove all files in the second higher level directory. } \keyword{internal} diff --git a/man/download_sedac_groads_data.Rd b/man/download_sedac_groads.Rd similarity index 75% rename from man/download_sedac_groads_data.Rd rename to man/download_sedac_groads.Rd index c070b0bf..cda88773 100644 --- a/man/download_sedac_groads_data.Rd +++ b/man/download_sedac_groads.Rd @@ -1,14 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_sedac_groads_data} -\alias{download_sedac_groads_data} +\name{download_sedac_groads} +\alias{download_sedac_groads} \title{Download roads data} \usage{ -download_sedac_groads_data( +download_sedac_groads( data_region = c("Americas", "Global", "Africa", "Asia", "Europe", "Oceania East", "Oceania West"), data_format = c("Shapefile", "Geodatabase"), - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -24,10 +23,9 @@ download_sedac_groads_data( \item{data_format}{character(1). Data can be downloaded as \code{"Shapefile"} or \code{"Geodatabase"}. (Only \code{"Geodatabase"} available for \code{"Global"} region).} -\item{directory_to_download}{character(1). Directory to download zip files -from NASA Global Roads Open Access Data Set.} - -\item{directory_to_save}{character(1). Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped shapefiles ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -47,12 +45,11 @@ the text file containing download commands.} Default is \code{FALSE}.} } \value{ -NULL; Zip file will be stored in \code{directory_to_download}, and -selected Shapefile (.shp) or Geodatabase (.gdb) files will be stored in -\code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +respective sub-directories within \code{directory_to_save}. } \description{ -The \code{download_sedac_groads_data()} function accesses and downloads +The \code{download_sedac_groads()} function accesses and downloads roads data from \href{https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/data-download}{NASA's Global Roads Open Access Data Set (gROADS), v1 (1980-2010)}. } \author{ diff --git a/man/download_sedac_population_data.Rd b/man/download_sedac_population.Rd similarity index 76% rename from man/download_sedac_population_data.Rd rename to man/download_sedac_population.Rd index 18b28ae6..73840a15 100644 --- a/man/download_sedac_population_data.Rd +++ b/man/download_sedac_population.Rd @@ -1,14 +1,13 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_sedac_population_data} -\alias{download_sedac_population_data} +\name{download_sedac_population} +\alias{download_sedac_population} \title{Download population density data} \usage{ -download_sedac_population_data( +download_sedac_population( data_resolution = "60 minute", data_format = c("GeoTIFF", "ASCII", "netCDF"), year = "2020", - directory_to_download = NULL, directory_to_save = NULL, acknowledgement = FALSE, download = FALSE, @@ -28,10 +27,9 @@ download_sedac_population_data( \item{year}{character(1). Available years are \code{2000}, \code{2005}, \code{2010}, \code{2015}, and \code{2020}, or \code{"all"} for all years.} -\item{directory_to_download}{character(1). Directory to download zip files -from NASA UN WPP-Adjusted Population Density, v4.11.} - -\item{directory_to_save}{character(1). Directory to decompress zip files.} +\item{directory_to_save}{character(1). Directory to save data. Two +sub-directories will be created for the downloaded zip files ("/zip_files") +and the unzipped shapefiles ("/data_files").} \item{acknowledgement}{logical(1). By setting \code{TRUE} the user acknowledges that the data downloaded using this function may be very @@ -51,11 +49,11 @@ the text file containing download commands.} Default is \code{FALSE}.} } \value{ -NULL; Zip file will be stored in \code{directory_to_download}, and -selected GeoTIFF (.tif) files will be stored in \code{directory_to_save}. +NULL; Zip and/or data files will be downloaded and stored in +respective sub-directories within \code{directory_to_save}. } \description{ -The \code{download_sedac_population_data()} function accesses and downloads +The \code{download_sedac_population()} function accesses and downloads population density data from \href{https://sedac.ciesin.columbia.edu/data/set/gpw-v4-population-density-adjusted-to-2015-unwpp-country-totals-rev11}{NASA's UN WPP-Adjusted Population Density, v4.11}. } \author{ diff --git a/man/download_setup_dir.Rd b/man/download_setup_dir.Rd index 9a3f833d..6f0c8653 100644 --- a/man/download_setup_dir.Rd +++ b/man/download_setup_dir.Rd @@ -4,10 +4,17 @@ \alias{download_setup_dir} \title{Setup directory} \usage{ -download_setup_dir(directory) +download_setup_dir(directory, zip = FALSE) } \arguments{ \item{directory}{character(1) directory path} + +\item{zip}{logical(1). Should sub-directories be created for zip files and +data files? If \code{TRUE}, a vector of sub-directoy names will be returned.} +} +\value{ +NULL; if \code{zip = TRUE} a vector of directories for zip files and +data files } \description{ Create \code{directory} if it does not already exist. diff --git a/man/download_terraclimate_data.Rd b/man/download_terraclimate.Rd similarity index 77% rename from man/download_terraclimate_data.Rd rename to man/download_terraclimate.Rd index 0b523ce0..1f1f35de 100644 --- a/man/download_terraclimate_data.Rd +++ b/man/download_terraclimate.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_terraclimate_data} -\alias{download_terraclimate_data} +\name{download_terraclimate} +\alias{download_terraclimate} \title{Download TerraClimate data} \usage{ -download_terraclimate_data( +download_terraclimate( variables = NULL, year_start = 2022, year_end = 2022, @@ -40,11 +40,11 @@ Remove (\code{TRUE}) or keep (\code{FALSE}) the text file containing download commands.} } \value{ -NULL; Yearly netCDF (.nc) files will be stored in a variable-specific +NULL; netCDF (.nc) files will be stored in a variable-specific folder within \code{directory_to_save}. } \description{ -The \code{download_terraclimate_data} function accesses and downloads climate and water balance data from the \href{https://www.climatologylab.org/terraclimate.html}{University of California Merced Climatology Lab's TerraClimate dataset}. +The \code{download_terraclimate} function accesses and downloads climate and water balance data from the \href{https://www.climatologylab.org/terraclimate.html}{University of California Merced Climatology Lab's TerraClimate dataset}. } \author{ Mitchell Manware, Insang Song diff --git a/man/download_tri_data.Rd b/man/download_tri.Rd similarity index 73% rename from man/download_tri_data.Rd rename to man/download_tri.Rd index b5f31132..56ea0435 100644 --- a/man/download_tri_data.Rd +++ b/man/download_tri.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download.R -\name{download_tri_data} -\alias{download_tri_data} +\name{download_tri} +\alias{download_tri} \title{Download toxic release data} \usage{ -download_tri_data( +download_tri( year_start = 2018L, year_end = 2022L, directory_to_save = NULL, @@ -32,11 +32,11 @@ will download all of the requested data files.} the text file containing download commands.} } \value{ -NULL; Yearly comma-separated value (CSV) files will be stored in +NULL; Comma-separated value (CSV) files will be stored in \code{directory_to_save}. } \description{ -The \code{download_tri_data()} function accesses and downloads toxic release data from the \href{https://www.epa.gov/toxics-release-inventory-tri-program/find-understand-and-use-tri}{U.S. Environmental Protection Agency's (EPA) Toxic Release Inventory (TRI) Program}. +The \code{download_tri()} function accesses and downloads toxic release data from the \href{https://www.epa.gov/toxics-release-inventory-tri-program/find-understand-and-use-tri}{U.S. Environmental Protection Agency's (EPA) Toxic Release Inventory (TRI) Program}. } \author{ Mariana Kassien, Insang Song diff --git a/man/process_aqs.Rd b/man/process_aqs.Rd index 0495909c..f8c7fa29 100644 --- a/man/process_aqs.Rd +++ b/man/process_aqs.Rd @@ -7,7 +7,9 @@ process_aqs( path = NULL, date = c("2018-01-01", "2022-12-31"), - return_format = "terra", + mode = c("full", "sparse", "location"), + data_field = "Arithmetic.Mean", + return_format = c("terra", "sf", "data.table"), ... ) } @@ -15,21 +17,39 @@ process_aqs( \item{path}{character(1). Directory path to daily measurement data.} \item{date}{character(2). Start and end date. -Should be in \code{"YYYY-MM-DD"} format and sorted. If \code{NULL}, -only unique locations are returned.} +Should be in \code{"YYYY-MM-DD"} format and sorted.} -\item{return_format}{character(1). \code{"terra"} or \code{"sf"}.} +\item{mode}{character(1). One of "full" (all dates * all locations) +or "sparse" (date-location pairs with available data) or +"location" (unique locations).} + +\item{data_field}{character(1). Data field to extract.} + +\item{return_format}{character(1). \code{"terra"} or \code{"sf"} or \code{"data.table"}.} \item{...}{Placeholders.} } \value{ -a \code{SpatVector} or sf object depending on the \code{return_format} +a SpatVector, sf, or data.table object depending on the \code{return_format} } \description{ The \code{process_aqs()} function cleans and imports raw air quality monitoring sites, returning a single \code{SpatVector} or sf object. +\code{date} is used to filter the raw data read from csv files. +Filtered rows are then processed according to \code{mode} argument. +Some sites report multiple measurements per day with and without +\href{https://www.epa.gov/sites/default/files/2016-10/documents/exceptional_events.pdf}{exceptional events} +the internal procedure of this function keeps "Included" if there +are multiple event types per site-time. } \note{ -\code{date = NULL} will return a massive data.table -object. Please choose proper \code{date} values. +Choose \code{date} and \code{mode} values with caution. +The function may return a massive data.table, resulting in +a long processing time or even a crash. +} +\seealso{ +\itemize{ +\item \code{\link[=download_aqs]{download_aqs()}} +\item \href{https://aqs.epa.gov/aqsweb/documents/codetables/parameters.csv}{EPA, n.d., \emph{AQS Parameter Codes}} +} } diff --git a/man/process_covariates.Rd b/man/process_covariates.Rd index e475eefd..20e34bb1 100644 --- a/man/process_covariates.Rd +++ b/man/process_covariates.Rd @@ -38,7 +38,7 @@ data files before passing to \code{process_covariates}}. \item \item \item -\item \code{\link{process_koppen_geiger}}: \code{"koppen-geiger"}, \code{"koeppen-geiger"}, \code{"koppen"}, +\item \code{\link{process_koppen_geiger}}: \code{"koppen-geiger"}, \code{"koeppen-geiger"}, \code{"koppen"} \item \code{\link{process_ecoregion}}: \code{"ecoregion"}, \code{"ecoregions"} \item \item @@ -56,7 +56,7 @@ data files before passing to \code{process_covariates}}. \item \item \code{\link{process_cropscape}}: \code{"cropscape"}, \code{"cdl"} \item -\item \code{\link{process_olm}}: \code{"olm"}, \verb{"openlandmap} +\item \code{\link{process_olm}}: \code{"olm"}, \code{"openlandmap"} } } \author{ diff --git a/man/process_ecoregion.Rd b/man/process_ecoregion.Rd index b49d1c79..dc74c786 100644 --- a/man/process_ecoregion.Rd +++ b/man/process_ecoregion.Rd @@ -18,6 +18,11 @@ a \code{SpatVector} object The \code{\link{process_ecoregion}} function imports and cleans raw ecoregion data, returning a \code{SpatVector} object. } +\note{ +The function will fix Tukey's bridge in Portland, ME. +This fix will ensure that the EPA air quality monitoring sites +will be located within the ecoregion. +} \author{ Insang Song } diff --git a/man/process_gmted.Rd b/man/process_gmted.Rd index 4069bdd0..c26eeab0 100644 --- a/man/process_gmted.Rd +++ b/man/process_gmted.Rd @@ -9,7 +9,13 @@ process_gmted(variable = NULL, path = NULL, ...) \arguments{ \item{variable}{vector(1). Vector containing the GMTED statistic first and the resolution second. (Example: variable = c("Breakline Emphasis", -"7.5 arc-seconds")).} +"7.5 arc-seconds")). +\itemize{ +\item Statistic options: "Breakline Emphasis", "Systematic Subsample", +"Median Statistic", "Minimum Statistic", "Mean Statistic", +"Maximum Statistic", "Standard Deviation Statistic" +\item Resolution options: "30 arc-seconds", "15 arc-seconds", "7.5 arc-seconds" +}} \item{path}{character(1). Directory with downloaded GMTED "*_grd" folder containing .adf files.} @@ -24,7 +30,8 @@ The \code{process_gmted()} function imports and cleans raw elevation data, returning a single \code{SpatRaster} object. } \note{ -\code{SpatRaster} layer name indicates selected variable and resolution. +\code{SpatRaster} layer name indicates selected variable and resolution, and year +of release (2010). } \author{ Mitchell Manware diff --git a/man/process_locs_vector.Rd b/man/process_locs_vector.Rd index c1291d88..e80697dd 100644 --- a/man/process_locs_vector.Rd +++ b/man/process_locs_vector.Rd @@ -20,7 +20,8 @@ be named "lat" and "lon", respectively.} a \code{SpatVector} object } \description{ -Convert locations from class \code{data.frame} or \code{data.table} to +Detect \code{SpatVector} object, or convert locations from class \code{sf}, +\code{data.frame} or \code{data.table} to \code{SpatVector} object, project to coordinate reference system, and apply circular buffer. } diff --git a/man/process_modis_swath.Rd b/man/process_modis_swath.Rd index 0ab6b99f..a04244c6 100644 --- a/man/process_modis_swath.Rd +++ b/man/process_modis_swath.Rd @@ -18,13 +18,17 @@ process_modis_swath( \item{date}{character(1). Date to query.} -\item{subdataset}{character. Subdatasets to process.} +\item{subdataset}{character. Subdatasets to process. +\strong{Unlike other preprocessing functions, this argument should specify +the exact subdataset name.} For example, when using MOD06_L2 product, +one may specify \code{c("Cloud_Fraction", "Cloud_Optical_Thickness")}, +etc. The subdataset names can be found in \code{terra::describe()} output.} \item{suffix}{character(1). Should be formatted \verb{:\{product\}:}, e.g., \verb{:mod06:}} \item{resolution}{numeric(1). Resolution of output raster. -Unit is degree.} +Unit is degree (decimal degree in WGS84).} \item{...}{For internal use.} } @@ -47,10 +51,10 @@ the full path to the hdf file. } \seealso{ \itemize{ -\item \code{\link{process_modis_warp}} +\item \code{\link[=process_modis_warp]{process_modis_warp()}}, \code{\link[stars:read_stars]{stars::read_stars()}}, \code{\link[stars:st_warp]{stars::st_warp()}} \item \href{https://gdal.org/drivers/raster/hdf4.html}{GDAL HDF4 driver documentation} -\item \code{\link[terra:describe]{terra::describe}}: to list the full subdataset list with \code{sds = TRUE} -\item \code{\link[terra:sprc]{terra::sprc}}, \code{\link[terra:rast]{terra::rast}} +\item \code{\link[terra:describe]{terra::describe()}}: to list the full subdataset list with \code{sds = TRUE} +\item \code{\link[terra:sprc]{terra::sprc()}}, \code{\link[terra:rast]{terra::rast()}} } } \author{ diff --git a/man/process_modis_warp.Rd b/man/process_modis_warp.Rd index b707504b..abf290d5 100644 --- a/man/process_modis_warp.Rd +++ b/man/process_modis_warp.Rd @@ -7,7 +7,7 @@ process_modis_warp( path = NULL, cellsize = 0.1, - threshold = cellsize * 2, + threshold = cellsize * 4, crs = 4326, ... ) diff --git a/man/process_nei.Rd b/man/process_nei.Rd index 0feb1936..cfc6a522 100644 --- a/man/process_nei.Rd +++ b/man/process_nei.Rd @@ -20,7 +20,7 @@ is accepted.} a \code{SpatVector} object } \description{ -The \code{process_tri()} function imports and cleans raw road emissions data, +The \code{process_nei()} function imports and cleans raw road emissions data, returning a single \code{SpatVector} object. NEI data comprises multiple csv files where emissions of diff --git a/man/process_nlcd.Rd b/man/process_nlcd.Rd index c8d67341..472012f4 100644 --- a/man/process_nlcd.Rd +++ b/man/process_nlcd.Rd @@ -4,13 +4,16 @@ \alias{process_nlcd} \title{Process land cover data} \usage{ -process_nlcd(path = NULL, year = 2021, ...) +process_nlcd(path = NULL, year = 2021, extent = NULL, ...) } \arguments{ \item{path}{character giving nlcd data path} \item{year}{numeric giving the year of NLCD data used} +\item{extent}{numeric(4) or SpatExtent giving the extent of the raster +if \code{NULL} (default), the entire raster is loaded} + \item{...}{Placeholders.} } \value{ diff --git a/man/process_olm.Rd b/man/process_olm.Rd index 17ebfa68..73980c52 100644 --- a/man/process_olm.Rd +++ b/man/process_olm.Rd @@ -12,7 +12,7 @@ process_olm(path = NULL, ...) \item{...}{Placeholders.} } \value{ -SpatRaster +a \code{SpatRaster} object } \description{ Process OpenLandMap data diff --git a/man/process_sedac_groads.Rd b/man/process_sedac_groads.Rd index 2ed4ec8d..38c03e8d 100644 --- a/man/process_sedac_groads.Rd +++ b/man/process_sedac_groads.Rd @@ -12,14 +12,16 @@ process_sedac_groads(path = NULL, ...) \item{...}{Placeholders.} } \value{ -a \code{SpatVector} boject +a \code{SpatVector} object } \description{ The \code{process_sedac_groads()} function imports and cleans raw road data, returning a single \code{SpatVector} object. } \note{ -U.S. context. +U.S. context. The returned \code{SpatVector} object contains a +\verb{$description} column to represent the temporal range covered by the +dataset. For more information, see \url{https://sedac.ciesin.columbia.edu/data/set/groads-global-roads-open-access-v1/metadata}. } \author{ Insang Song diff --git a/tests/testdata/nei/onroadnc17.csv b/tests/testdata/nei/onroadnc2017.csv similarity index 100% rename from tests/testdata/nei/onroadnc17.csv rename to tests/testdata/nei/onroadnc2017.csv diff --git a/tests/testdata/us_eco_l3_state_boundaries_2024-02-07_wget_command.txt b/tests/testdata/us_eco_l3_state_boundaries_2024-02-07_wget_command.txt deleted file mode 100644 index 88706648..00000000 --- a/tests/testdata/us_eco_l3_state_boundaries_2024-02-07_wget_command.txt +++ /dev/null @@ -1 +0,0 @@ -wget --ca-certificate=/tmp/Rtmp5cEQ3l/cacert_gaftp_epa.pem https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/us_eco_l3_state_boundaries.zip -O tests/testthat/../testdata//us_eco_l3_state_boundaries.zip diff --git a/tests/testdata/us_eco_l3_state_boundaries_2024-02-08_wget_command.txt b/tests/testdata/us_eco_l3_state_boundaries_2024-02-08_wget_command.txt deleted file mode 100644 index 41b8e70e..00000000 --- a/tests/testdata/us_eco_l3_state_boundaries_2024-02-08_wget_command.txt +++ /dev/null @@ -1 +0,0 @@ -wget --ca-certificate=/tmp/RtmpeRLmFs/cacert_gaftp_epa.pem https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/us_eco_l3_state_boundaries.zip -O tests/testthat/../testdata//us_eco_l3_state_boundaries.zip diff --git a/tests/testthat/test-calculate_covariates.R b/tests/testthat/test-calculate_covariates.R index ddadaea6..e4050a0e 100644 --- a/tests/testthat/test-calculate_covariates.R +++ b/tests/testthat/test-calculate_covariates.R @@ -1,5 +1,5 @@ ## test for calculating covariates - +## 1. Koppen-Geiger #### testthat::test_that("calc_koppen_geiger works well", { withr::local_package("terra") withr::local_package("sf") @@ -34,12 +34,23 @@ testthat::test_that("calc_koppen_geiger works well", { ) # the result is a data frame testthat::expect_s3_class(kg_res, "data.frame") - # ncol is equal to 6 - testthat::expect_equal(ncol(kg_res), 6) + # ncol is equal to 7 + testthat::expect_equal(ncol(kg_res), 7) # should have only one climate zone - testthat::expect_equal(sum(unlist(kg_res[, -1])), 1) + testthat::expect_equal(sum(unlist(kg_res[, c(-1, -2)])), 1) + # with included geometry + testthat::expect_no_error( + kg_geom <- calc_koppen_geiger( + from = kgras, + locs = sf::st_as_sf(site_faux), + geom = TRUE + ) + ) + testthat::expect_equal(ncol(kg_geom), 8) + testthat::expect_true("geometry" %in% names(kg_geom)) }) +## 2. Temporal Dummies #### testthat::test_that("calc_dummies works well", { site_faux <- @@ -47,7 +58,7 @@ testthat::test_that("calc_dummies works well", { site_id = "37031000188101", lon = -78.90, lat = 35.97, - time = "2022-01-01" + time = as.POSIXlt("2022-01-01") ) testthat::expect_no_error( @@ -97,6 +108,7 @@ testthat::test_that("calc_temporal_dummies errors.", { ) }) +## 3. Ecoregions #### testthat::test_that("calc_ecoregion works well", { withr::local_package("terra") withr::local_package("sf") @@ -141,25 +153,40 @@ testthat::test_that("calc_ecoregion works well", { # the result is a data frame testthat::expect_s3_class(ecor_res, "data.frame") # ncol is equal to 2 + 5 + 2 + 1 + 1 - testthat::expect_equal(ncol(ecor_res), 3L) + testthat::expect_equal(ncol(ecor_res), 4L) # should have each of the indicator groups dum_cn <- grep("DUM_", colnames(ecor_res)) testthat::expect_equal( sum(unlist(ecor_res[, dum_cn])), 2L ) -}) + testthat::expect_no_error( + ecor_geom <- calc_ecoregion( + from = erras, + locs = site_faux, + locs_id = "site_id", + geom = TRUE + ) + ) + testthat::expect_equal( + ncol(ecor_geom), 5 + ) + testthat::expect_true( + "geometry" %in% names(ecor_geom) + ) +}) +## 4. MODIS-VIIRS #### testthat::test_that("calc_modis works well.", { withr::local_package("sf") withr::local_package("terra") withr::local_package("stars") withr::local_package("lwgeom") - withr::local_package("foreach") - withr::local_package("doParallel") withr::local_options( - list(sf_use_s2 = FALSE, - foreachDoparLocal = TRUE) + list( + sf_use_s2 = FALSE, + future.resolve.recursive = 2L + ) ) site_faux <- @@ -173,7 +200,7 @@ testthat::test_that("calc_modis works well.", { terra::vect( site_faux, geom = c("lon", "lat"), - keepgeom = TRUE, + keepgeom = FALSE, crs = "EPSG:4326") # case 1: standard mod11a1 @@ -331,6 +358,13 @@ testthat::test_that("calc_modis works well.", { locs = sf::st_as_sf(site_faux) ) ) + testthat::expect_error( + calc_modis_daily( + from = terra::rast(nrow = 3, ncol = 3, vals = 1:9, names = "a"), + date = "2021-08-15", + locs = array(1:12, dim = c(2, 2, 3)) + ) + ) site_faux0 <- site_faux names(site_faux0)[2] <- "date" testthat::expect_error( @@ -341,7 +375,7 @@ testthat::test_that("calc_modis works well.", { ) ) site_faux2 <- site_faux - site_faux2[, 4] <- NULL + #site_faux2[, 4] <- NULL path_mcd19 <- testthat::test_path( @@ -388,7 +422,7 @@ testthat::test_that("calc_modis works well.", { preprocess = process_bluemarble, name_covariates = c("MOD_NITLT_0_", "MOD_K1_"), subdataset = 3L, - nthreads = 1, + nthreads = 2, tile_df = process_bluemarble_corners(c(9, 10), c(5, 5)) ) ) @@ -404,19 +438,25 @@ testthat::test_that("calc_modis works well.", { ) ) testthat::expect_s3_class(flushed, "data.frame") - testthat::expect_true(any(unlist(flushed) == -99999)) + testthat::expect_true(unlist(flushed[, 2]) == -99999) }) - +## 5. NLCD #### testthat::test_that("Check calc_nlcd works", { withr::local_package("terra") withr::local_package("exactextractr") + withr::local_package("sf") + withr::local_package("future") + withr::local_package("future.apply") + withr::local_options( + list(sf_use_s2 = FALSE, future.resolve.recursive = 2L) + ) - point_us1 <- cbind(lon = -114.7, lat = 38.9, dem = 40) - point_us2 <- cbind(lon = -114, lat = 39, dem = 15) - point_ak <- cbind(lon = -155.997, lat = 69.3884, dem = 100) # alaska - point_fr <- cbind(lon = 2.957, lat = 43.976, dem = 15) # france + point_us1 <- cbind(lon = -114.7, lat = 38.9, site_id = 1) + point_us2 <- cbind(lon = -114, lat = 39, site_id = 2) + point_ak <- cbind(lon = -155.997, lat = 69.3884, site_id = 3) # alaska + point_fr <- cbind(lon = 2.957, lat = 43.976, site_id = 4) # france eg_data <- rbind(point_us1, point_us2, point_ak, point_fr) |> as.data.frame() |> terra::vect(crs = "EPSG:4326") @@ -439,6 +479,12 @@ testthat::test_that("Check calc_nlcd works", { radius = "1000"), "radius is not a numeric." ) + testthat::expect_error( + calc_nlcd(locs = eg_data, + from = nlcdras, + mode = "whatnot", + radius = 1000) + ) # -- buf_radius has likely value testthat::expect_error( calc_nlcd(locs = eg_data, @@ -446,6 +492,37 @@ testthat::test_that("Check calc_nlcd works", { radius = -3), "radius has not a likely value." ) + + # -- two modes work properly + testthat::expect_no_error( + calc_nlcd(locs = sf::st_as_sf(eg_data), + from = nlcdras, + mode = "exact", + radius = 1000) + ) + testthat::expect_no_error( + calc_nlcd(locs = eg_data, + from = nlcdras, + mode = "terra", + radius = 300) + ) + # -- multicore mode works properly + testthat::expect_no_error( + calc_nlcd(locs = eg_data, + from = nlcdras, + mode = "exact", + radius = 1000, + nthreads = 2L) + ) + testthat::expect_no_error( + calc_nlcd(locs = eg_data, + from = nlcdras, + mode = "terra", + radius = 1000, + nthreads = 2L) + ) + + # -- year is numeric testthat::expect_error( process_nlcd(path = path_testdata, year = "2021"), @@ -462,11 +539,10 @@ testthat::test_that("Check calc_nlcd works", { year = 1789), "NLCD data not available for this year." ) - # -- data_vect is a SpatVector testthat::expect_error( calc_nlcd(locs = 12, - from = nlcdras), - "locs is not a terra::SpatVector." + locs_id = "site_id", + from = nlcdras) ) testthat::expect_error( calc_nlcd(locs = eg_data, @@ -492,39 +568,58 @@ testthat::test_that("Check calc_nlcd works", { testthat::expect_no_error( calc_nlcd( locs = eg_data, + locs_id = "site_id", from = nlcdras, radius = buf_radius ) ) output <- calc_nlcd( locs = eg_data, + locs_id = "site_id", radius = buf_radius, from = nlcdras ) - # -- returns a SpatVector - testthat::expect_equal(class(output)[1], "SpatVector") - # -- crs is the same than input - testthat::expect_true(terra::same.crs(eg_data, output)) - # -- out-of-mainland-US points removed (France and Alaska) - testthat::expect_equal(nrow(output), 2) - # -- initial names are still in the output SpatVector + # -- returns a data.frame + testthat::expect_equal(class(output)[1], "data.frame") + # nrow(output) == nrow(input) + testthat::expect_equal(nrow(output), 4) + # -- initial names are still in the output data.frame testthat::expect_true(all(names(eg_data) %in% names(output))) # -- check the value of some of the points in the US + # the value has changed. What affected this behavior? testthat::expect_equal( - output$LDU_TEFOR_0_03000[1], 0.7940682, tolerance = 1e-7 + output$LDU_TEFOR_0_03000[1], 0.8119843, tolerance = 1e-7 ) testthat::expect_equal( - output$LDU_TSHRB_0_03000[2], 0.9987249, tolerance = 1e-7 + output$LDU_TSHRB_0_03000[2], 0.9630467, tolerance = 1e-7 ) # -- class fraction rows should sum to 1 testthat::expect_equal( - rowSums(as.data.frame(output[, 2:(ncol(output) - 1)])), + unname(rowSums(output[1:2, 3:(ncol(output))])), rep(1, 2), tolerance = 1e-7 ) + # without geometry will have 11 columns + testthat::expect_equal( + ncol(output), 15 + ) + output_geom <- calc_nlcd( + locs = eg_data, + locs_id = "site_id", + radius = buf_radius, + from = nlcdras, + geom = TRUE + ) + # with geometry will have 12 columns + testthat::expect_equal( + ncol(output_geom), 16 + ) + testthat::expect_true( + "geometry" %in% names(output_geom) + ) }) - +## 6. NEI #### testthat::test_that("NEI calculation", { withr::local_package("terra") withr::local_package("sf") @@ -555,17 +650,19 @@ testthat::test_that("NEI calculation", { ) ) # inspecting calculated results - testthat::expect_s4_class(neiras, "SpatVector") + testthat::expect_true(inherits(neiras, "SpatVector")) + testthat::expect_true(nrow(neiras) == 3) # sf case testthat::expect_no_error( - process_nei( + neires <- process_nei( path = neipath, county = sf::st_as_sf(nc), year = 2017 ) ) - + testthat::expect_true(inherits(neires, "SpatVector")) + testthat::expect_true(nrow(neires) == 3) # error cases testthat::expect_error( @@ -581,7 +678,7 @@ testthat::test_that("NEI calculation", { # calc_nei ncp <- data.frame(lon = -78.8277, lat = 35.95013) ncp$site_id <- "3799900018810101" - ncp$time <- 2018 + ncp$time <- 2018L ncp <- terra::vect(ncp, keepgeom = TRUE, crs = "EPSG:4326") nc <- terra::project(nc, "EPSG:4326") @@ -604,7 +701,7 @@ testthat::test_that("NEI calculation", { }) - +## 7. TRI #### testthat::test_that("TRI calculation", { withr::local_package("terra") withr::local_package("sf") @@ -615,11 +712,11 @@ testthat::test_that("TRI calculation", { ncp <- data.frame(lon = -78.8277, lat = 35.95013) ncp$site_id <- "3799900018810101" - ncp$time <- 2018 + ncp$time <- 2018L ncpt <- terra::vect(ncp, geom = c("lon", "lat"), keepgeom = TRUE, crs = "EPSG:4326") - ncpt$time <- c(2018) + ncpt$time <- 2018L path_tri <- testthat::test_path("..", "testdata", "tri") testthat::expect_no_error( @@ -666,7 +763,7 @@ testthat::test_that("TRI calculation", { ) }) - +## 8. SEDC #### testthat::test_that("calc_sedc tests", { withr::local_package("terra") withr::local_package("sf") @@ -677,7 +774,7 @@ testthat::test_that("calc_sedc tests", { ncp <- data.frame(lon = -78.8277, lat = 35.95013) ncp$site_id <- "3799900018810101" - ncp$time <- 2018 + ncp$time <- 2018L ncpt <- terra::vect(ncp, geom = c("lon", "lat"), keepgeom = TRUE, crs = "EPSG:4326") @@ -726,6 +823,7 @@ testthat::test_that("calc_sedc tests", { }) +## 9. HMS #### testthat::test_that("calc_hms returns expected.", { withr::local_package("terra") densities <- c( @@ -787,10 +885,6 @@ testthat::test_that("calc_hms returns expected.", { expect_true( all(unique(hms_covariate[, 3]) %in% c(0, 1)) ) - # expect $time is class Date - expect_true( - "Date" %in% class(hms_covariate$time) - ) } } }) @@ -809,6 +903,9 @@ testthat::test_that("calc_hms returns expected with missing polygons.", { expect_true( is.function(calc_hms) ) + hms_dir <- testthat::test_path( + "..", "testdata", "hms" + ) for (d in seq_along(densities)) { density <- densities[d] for (r in seq_along(radii)) { @@ -816,11 +913,7 @@ testthat::test_that("calc_hms returns expected with missing polygons.", { process_hms( date = c("2022-06-10", "2022-06-13"), variable = density, - path = testthat::test_path( - "..", - "testdata", - "hms" - ) + path = hms_dir ) hms_covariate <- calc_hms( @@ -856,14 +949,11 @@ testthat::test_that("calc_hms returns expected with missing polygons.", { expect_true( all(unique(hms_covariate[, 3]) %in% c(0, 1)) ) - # expect $time is class Date - expect_true( - "Date" %in% class(hms_covariate$time) - ) } } }) +## 10. GMTED #### testthat::test_that("calc_gmted returns expected.", { withr::local_package("terra") statistics <- c( @@ -891,20 +981,7 @@ testthat::test_that("calc_gmted returns expected.", { testthat::test_path( "..", "testdata", - "gmted", - paste0( - process_gmted_codes( - statistic, - statistic = TRUE, - invert = FALSE - ), - process_gmted_codes( - resolution, - resolution = TRUE, - invert = FALSE - ), - "_grd" - ) + "gmted" ) ) gmted_covariate <- @@ -928,17 +1005,40 @@ testthat::test_that("calc_gmted returns expected.", { ) # expect 2 columns expect_true( - ncol(gmted_covariate) == 2 + ncol(gmted_covariate) == 3 ) # expect numeric value expect_true( - class(gmted_covariate[, 2]) == "numeric" + class(gmted_covariate[, 3]) == "numeric" ) } } } + testthat::expect_no_error( + gmted <- process_gmted( + variable = c("Breakline Emphasis", "7.5 arc-seconds"), + testthat::test_path( + "..", "testdata", "gmted", "be75_grd" + ) + ) + ) + testthat::expect_no_error( + gmted_geom <- calc_gmted( + gmted, + ncp, + "site_id", + geom = TRUE + ) + ) + testthat::expect_equal( + ncol(gmted_geom), 4 + ) + testthat::expect_true( + "geometry" %in% names(gmted_geom) + ) }) +## 11. NARR #### testthat::test_that("calc_narr returns expected.", { withr::local_package("terra") variables <- c( @@ -1007,12 +1107,13 @@ testthat::test_that("calc_narr returns expected.", { } # expect $time is class Date expect_true( - "Date" %in% class(narr_covariate$time) + "POSIXct" %in% class(narr_covariate$time) ) } } }) +## 11. GEOS-CF #### testthat::test_that("calc_geos returns as expected.", { withr::local_package("terra") withr::local_package("data.table") @@ -1077,6 +1178,7 @@ testthat::test_that("calc_geos returns as expected.", { } }) +## 12. SEDAC: Population #### testthat::test_that("calc_sedac_population returns as expected.", { withr::local_package("terra") withr::local_package("data.table") @@ -1132,7 +1234,7 @@ testthat::test_that("calc_sedac_population returns as expected.", { } }) - +## 13. SEDAC: Global Roads #### testthat::test_that("groads calculation works", { withr::local_package("terra") withr::local_package("sf") @@ -1169,8 +1271,27 @@ testthat::test_that("groads calculation works", { # expect data.frame testthat::expect_s3_class(groads_res, "data.frame") + + # return with geometry + testthat::expect_no_error( + groads_geom <- calc_sedac_groads( + from = groads, + locs = ncp, + locs_id = "site_id", + radius = 5000, + geom = TRUE + ) + ) + testthat::expect_equal( + ncol(groads_geom), 5 + ) + testthat::expect_true( + "geometry" %in% names(groads_geom) + ) }) + +## 14. MERRA2 #### testthat::test_that("calc_merra2 returns as expected.", { withr::local_package("terra") withr::local_package("data.table") @@ -1191,7 +1312,7 @@ testthat::test_that("calc_merra2 returns as expected.", { ncp$site_id <- "3799900018810101" # expect function expect_true( - is.function(calc_geos) + is.function(calc_merra2) ) for (c in seq_along(collections)) { collection <- collections[c] @@ -1255,6 +1376,7 @@ testthat::test_that("calc_merra2 returns as expected.", { } }) +## 15. GRIDMET #### testthat::test_that("calc_gridmet returns as expected.", { withr::local_package("terra") withr::local_package("data.table") @@ -1307,11 +1429,12 @@ testthat::test_that("calc_gridmet returns as expected.", { ) # expect $time is class Date expect_true( - "Date" %in% class(gridmet_covariate$time) + "POSIXt" %in% class(gridmet_covariate$time) ) } }) +## 16. TerraClimate #### testthat::test_that("calc_terraclimate returns as expected.", { withr::local_package("terra") withr::local_package("data.table") @@ -1369,6 +1492,7 @@ testthat::test_that("calc_terraclimate returns as expected.", { } }) +## 17. Lagged variables #### testthat::test_that("calc_lagged returns as expected.", { withr::local_package("terra") withr::local_package("data.table") @@ -1376,7 +1500,7 @@ testthat::test_that("calc_lagged returns as expected.", { ncp <- data.frame(lon = -78.8277, lat = 35.95013) ncp$site_id <- "3799900018810101" # expect function - expect_true( + testthat::expect_true( is.function(calc_lagged) ) for (l in seq_along(lags)) { @@ -1411,15 +1535,15 @@ testthat::test_that("calc_lagged returns as expected.", { if (lags[l] == 0) { narr_lagged <- calc_lagged( from = narr_covariate, - date = c("2018-01-05", "2018-01-10"), + date = c("2018-01-01", "2018-01-10"), lag = lags[l], locs_id = "site_id", time_id = "time" ) - expect_identical(narr_lagged, narr_covariate) + testthat::expect_identical(narr_lagged, narr_covariate) } else { # expect error because 2018-01-01 will not have lag data from 2017-12-31 - expect_error( + testthat::expect_error( calc_lagged( from = narr_covariate, date = c("2018-01-01", "2018-01-10"), @@ -1436,18 +1560,19 @@ testthat::test_that("calc_lagged returns as expected.", { time_id = "time" ) # expect output is data.frame - expect_true( + testthat::expect_true( class(narr_lagged) == "data.frame" ) # expect lag day - expect_true(grepl("_[0-9]{1}_", colnames(narr_lagged)[3])) + testthat::expect_true(grepl("_[0-9]{1}$", colnames(narr_lagged)[3])) # expect no NA - expect_true(all(!is.na(narr_lagged))) + testthat::expect_true(all(!is.na(narr_lagged))) } } }) +## 18. Wrapper #### testthat::test_that("calc_covariates wrapper works", { withr::local_package("rlang") @@ -1520,3 +1645,71 @@ testthat::test_that("calc_covariates wrapper works", { ) } }) + +testthat::test_that("calc_check_time identifies missing `time` column.", { + testthat::expect_error( + # provide integer instead of data.frame to provoke error + calc_check_time(12, TRUE) + ) + testthat::expect_message( + # provide data.frame without time to provoke message + calc_check_time( + data.frame(x = 10, y = 20), + true + ) + ) +}) + +# Calc message +testthat::test_that("calc_message exception", + { + testthat::expect_no_error( + calc_message("gmted", "mean", "2020", "year", NULL) + ) + testthat::expect_no_error( + calc_message("narr", "shum", 2000, "year", NULL) + ) + } +) + +# calc time +testthat::test_that("calc time remains", { + testthat::expect_no_error( + rr <- calc_time("eternal", "timeless") + ) + testthat::expect_true(rr == "eternal") +}) + +# calc worker +testthat::test_that("calc_worker remaining", { + withr::local_package("terra") + withr::local_package("sf") + withr::local_package("exactextractr") + withr::local_options(sf_use_s2 = FALSE) + + ncp <- data.frame(lon = -78.8277, lat = 35.95013, time = "boundless") + ncp$site_id <- "3799900018810101" + ncpt <- + terra::vect(ncp, geom = c("lon", "lat"), + keepgeom = TRUE, crs = "EPSG:4326") + nc <- system.file("gpkg/nc.gpkg", package = "sf") + nc <- terra::vect(nc) + nc <- terra::project(nc, "EPSG:4326") + ncrast <- terra::rast(nc, resolution = 0.05) + terra::values(ncrast) <- rgamma(terra::ncell(ncrast), 1, 1e-4) + + testthat::expect_no_error( + cwres <- + calc_worker( + from = ncrast, + dataset = "whatever", + locs_vector = ncpt, + locs_df = ncp, + time = ncpt$time, + time_type = "timeless", + radius = 1e5, + max_cells = 3e7 + ) + ) + testthat::expect_s3_class(cwres, "data.frame") +}) diff --git a/tests/testthat/test-download_functions.R b/tests/testthat/test-download_functions.R index 8fbf8e43..a7c113ae 100644 --- a/tests/testthat/test-download_functions.R +++ b/tests/testthat/test-download_functions.R @@ -39,7 +39,7 @@ testthat::test_that("Error when one parameter is NULL.", { testthat::test_that("Errors when temporal ranges invalid.", { expect_error( - download_geos_data( + download_geos( date_start = "1900-01-01", collection = "aqc_tavg_1hr_g1440x721_v1", acknowledgement = TRUE, @@ -47,15 +47,15 @@ testthat::test_that("Errors when temporal ranges invalid.", { ) ) expect_error( - download_aqs_data( + download_aqs( year_start = 1900, + year_end = 1919, acknowledgement = TRUE, directory_to_save = testthat::test_path("..", "testdata/", ""), - directory_to_download = testthat::test_path("..", "testdata/", "") ) ) expect_error( - download_narr_monolevel_data( + download_narr_monolevel( year_start = 1900, variables = "air.sfc", acknowledgement = TRUE, @@ -63,7 +63,7 @@ testthat::test_that("Errors when temporal ranges invalid.", { ) ) expect_error( - download_narr_p_levels_data( + download_narr_p_levels( year_start = 1900, variables = "omega", acknowledgement = TRUE, @@ -71,23 +71,29 @@ testthat::test_that("Errors when temporal ranges invalid.", { ) ) expect_error( - download_merra2_data( + download_merra2( date_start = "1900-01-01", collection = "inst1_2d_asm_Nx", directory_to_save = testthat::test_path("..", "testdata/", ""), - acknowledgement = TRUE + acknowledgement = TRUE, + remove_command = TRUE + ) + ) + file.remove( + testthat::test_path( + "../testdata", "merra2_1900-01-01_2023-09-01_wget_commands.txt" ) ) + sink() expect_error( - download_hms_data( + download_hms( date_start = "1900-01-01", directory_to_save = testthat::test_path("..", "testdata/", ""), - directory_to_download = testthat::test_path("..", "testdata/", ""), acknowledgement = TRUE ) ) expect_error( - download_gridmet_data( + download_gridmet( year_start = 1900, variables = "Precipitation", acknowledgement = TRUE, @@ -95,7 +101,7 @@ testthat::test_that("Errors when temporal ranges invalid.", { ) ) expect_error( - download_terraclimate_data( + download_terraclimate( year_start = 1900, variables = "Wind Speed", acknowledgement = TRUE, @@ -112,23 +118,29 @@ testthat::test_that("EPA AQS download URLs have HTTP status 200.", { year_end <- 2022 resolution_temporal <- "daily" parameter_code <- 88101 - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "aqs_temp") # run download function download_data(dataset_name = "aqs", year_start = year_start, year_end = year_end, directory_to_save = directory_to_save, - directory_to_download = directory_to_download, acknowledgement = TRUE, unzip = FALSE, remove_zip = FALSE, download = FALSE, remove_command = FALSE) + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # define file path with commands commands_path <- paste0( - directory_to_download, + download_sanitize_path(directory_to_save), "aqs_", parameter_code, "_", @@ -139,9 +151,9 @@ testthat::test_that("EPA AQS download URLs have HTTP status 200.", { # import commands commands <- read_commands(commands_path = commands_path) # extract urls - urls <- extract_urls(commands = commands, position = 2) + urls <- extract_urls(commands = commands, position = 4) # check HTTP URL status - url_status <- check_urls(urls = urls, size = length(urls), method = "HEAD") + url_status <- check_urls(urls = urls, size = 1L, method = "HEAD") # implement unit tets test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, @@ -150,28 +162,34 @@ testthat::test_that("EPA AQS download URLs have HTTP status 200.", { file.remove(commands_path) }) - +# Ecoregion tests #### testthat::test_that("Ecoregion download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") # function parameters - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "eco_temp") certificate <- system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus") # run download function download_data(dataset_name = "ecoregion", directory_to_save = directory_to_save, - directory_to_download = directory_to_download, acknowledgement = TRUE, unzip = FALSE, remove_zip = FALSE, download = FALSE, remove_command = FALSE, epa_certificate_path = certificate) + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # define file path with commands commands_path <- paste0( - directory_to_download, + download_sanitize_path(directory_to_save), "us_eco_l3_state_boundaries_", Sys.Date(), "_wget_command.txt" @@ -188,10 +206,28 @@ testthat::test_that("Ecoregion download URLs have HTTP status 200.", { test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) - # remove file with commands after test - file.remove(commands_path) + + file.create( + file.path(directory_to_save, "zip_files", + "us_eco_l3_state_boundaries.zip"), + recursive = TRUE + ) + testthat::expect_no_error( + download_data( + dataset_name = "ecoregion", + directory_to_save = directory_to_save, + acknowledgement = TRUE, + unzip = FALSE, + remove_zip = TRUE, + download = FALSE, + remove_command = TRUE, + epa_certificate_path = certificate + ) + ) + }) +# GEOS-CF tests #### testthat::test_that("GEOS-CF download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") @@ -201,62 +237,78 @@ testthat::test_that("GEOS-CF download URLs have HTTP status 200.", { collections <- c("aqc_tavg_1hr_g1440x721_v1", "chm_inst_1hr_g1440x721_p23") directory_to_save <- testthat::test_path("..", "testdata/", "") - for (c in seq_along(collections)) { - # run download function + directory_to_save2 <- testthat::test_path("..", "testdata", "geos_temp") + + # run download function + testthat::expect_no_error( download_data(dataset_name = "geos", date_start = date_start, date_end = date_end, - collection = collections[c], + collection = collections, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE) - # define file path with commands - commands_path <- paste0(directory_to_save, - collections[c], - "_", - date_start, - "_", - date_end, - "_wget_commands.txt") - # import commands - commands <- read_commands(commands_path = commands_path) - # extract urls - urls <- extract_urls(commands = commands, position = 2) - # check HTTP URL status - url_status <- check_urls(urls = urls, size = 20L, method = "HEAD") - # implement unit tests - test_download_functions(directory_to_save = directory_to_save, - commands_path = commands_path, - url_status = url_status) - # remove file with commands after test - file.remove(commands_path) - } + ) + # define file path with commands + commands_path <- paste0(directory_to_save, + "geos_", + date_start, + "_", + date_end, + "_wget_commands.txt") + # import commands + commands <- read_commands(commands_path = commands_path) + # extract urls + urls <- extract_urls(commands = commands, position = 2) + # check HTTP URL status + url_status <- check_urls(urls = urls, size = 2L, method = "HEAD") + # implement unit tests + test_download_functions(directory_to_save = directory_to_save, + commands_path = commands_path, + url_status = url_status) + testthat::expect_no_error( + download_data(dataset_name = "geos", + date_start = date_start, + date_end = date_end, + collection = collections, + directory_to_save = directory_to_save2, + acknowledgement = TRUE, + remove_command = TRUE, + download = FALSE) + ) + + # remove file with commands after test + file.remove(commands_path) }) +# GMTED tests #### testthat::test_that("GMTED download URLs have HTTP status 200.", { withr::local_package("httr") # function parameters statistics <- c("Breakline Emphasis", - # "Systematic Subsample", - # "Median Statistic", "Minimum Statistic", - # "Mean Statistic", "Maximum Statistic", "Standard Deviation Statistic") resolution <- "7.5 arc-seconds" - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "gmted_temp") for (s in seq_along(statistics)) { # run download function download_data(dataset_name = "gmted", statistic = statistics[s], resolution = resolution, - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, unzip = FALSE, remove_zip = FALSE, download = FALSE) + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # define file path with commands - commands_path <- paste0(directory_to_download, + commands_path <- paste0(download_sanitize_path(directory_to_save), "gmted_", gsub(" ", "", statistics[s]), "_", @@ -268,17 +320,36 @@ testthat::test_that("GMTED download URLs have HTTP status 200.", { commands <- read_commands(commands_path = commands_path) # extract urls urls <- extract_urls(commands = commands, position = 6) + filename <- extract_urls(commands = commands, position = 4) # check HTTP URL status url_status <- check_urls(urls = urls, size = 1L, method = "HEAD") # implement unit tests test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) + + file.create( + file.path(filename), + recursive = TRUE + ) # remove file with commands after test - file.remove(commands_path) + # remove temporary gmted + testthat::expect_no_error( + download_data(dataset_name = "gmted", + statistic = statistics[s], + resolution = resolution, + directory_to_save = directory_to_save, + acknowledgement = TRUE, + unzip = FALSE, + remove_zip = TRUE, + remove_command = TRUE, + download = FALSE) + ) + unlink(directory_to_save, recursive = TRUE) } }) +# MERRA2 #### testthat::test_that("MERRA2 download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") @@ -287,38 +358,49 @@ testthat::test_that("MERRA2 download URLs have HTTP status 200.", { date_end <- "2022-03-08" collections <- c("inst1_2d_asm_Nx", "inst3_3d_asm_Np") directory_to_save <- testthat::test_path("..", "testdata/", "") - for (c in seq_along(collections)) { - # run download function + directory_to_save2 <- testthat::test_path("..", "testdata", "hej") + # run download function + testthat::expect_no_error( download_data(dataset_name = "merra2", date_start = date_start, date_end = date_end, - collection = collections[c], + collection = collections, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE) - # define path with commands - commands_path <- paste0(directory_to_save, - collections[c], - "_", - date_start, - "_", - date_end, - "_wget_commands.txt") - # import commands - commands <- read_commands(commands_path = commands_path) - # extract urls - urls <- extract_urls(commands = commands, position = 2) - # check HTTP URL status - url_status <- check_urls(urls = urls, size = 3L, method = "HEAD") - # implement unit tests - test_download_functions(directory_to_save = directory_to_save, - commands_path = commands_path, - url_status = url_status) - # remove file with commands after test - file.remove(commands_path) - } + ) + # define path with commands + commands_path <- paste0(directory_to_save, + "merra2_", + date_start, + "_", + date_end, + "_wget_commands.txt") + # import commands + commands <- read_commands(commands_path = commands_path) + # extract urls + urls <- extract_urls(commands = commands, position = 2) + # check HTTP URL status + url_status <- check_urls(urls = urls, size = 3L, method = "HEAD") + # implement unit tests + test_download_functions(directory_to_save = directory_to_save, + commands_path = commands_path, + url_status = url_status) + testthat::expect_no_error( + download_data(dataset_name = "merra2", + date_start = date_start, + date_end = date_end, + collection = collections, + directory_to_save = directory_to_save2, + acknowledgement = TRUE, + remove_command = TRUE, + download = FALSE) + ) + # remove file with commands after test + file.remove(commands_path) }) +# MERRA2 Collection error test #### testthat::test_that("MERRA2 returns message with unrecognized collection.", { # function parameters collections <- "uNrEcOgNiZeD" @@ -333,6 +415,7 @@ testthat::test_that("MERRA2 returns message with unrecognized collection.", { ) }) +## NARR Monolevel #### testthat::test_that("NARR monolevel download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") @@ -381,6 +464,7 @@ testthat::test_that("NARR monolevel error with invalid years.", { ) }) +# NARR -- p-levels #### testthat::test_that("NARR p-levels download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") @@ -389,6 +473,7 @@ testthat::test_that("NARR p-levels download URLs have HTTP status 200.", { year_end <- 2021 variables <- c("shum", "omega") directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save2 <- testthat::test_path("..", "testdata", "hej") # run download function download_data(dataset_name = "narr_p_levels", year_start = year_start, @@ -407,11 +492,19 @@ testthat::test_that("NARR p-levels download URLs have HTTP status 200.", { # extract urls urls <- extract_urls(commands = commands, position = 6) # check HTTP URL status - url_status <- check_urls(urls = urls, size = 20L, method = "HEAD") + url_status <- check_urls(urls = urls, size = 10L, method = "HEAD") # implement unit tests test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) + download_data(dataset_name = "narr_p_levels", + year_start = year_start, + year_end = year_end, + variables = variables, + directory_to_save = directory_to_save2, + acknowledgement = TRUE, + remove_command = TRUE, + download = FALSE) # remove file with commands after test file.remove(commands_path) }) @@ -422,8 +515,7 @@ testthat::test_that("NOAA HMS Smoke download URLs have HTTP status 200.", { # function parameters date_start <- "2022-08-12" date_end <- "2022-09-21" - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "hms_temp") data_formats <- c("Shapefile", "KML") for (d in seq_along(data_formats)) { # run download function @@ -431,7 +523,6 @@ testthat::test_that("NOAA HMS Smoke download URLs have HTTP status 200.", { date_start = date_start, date_end = date_end, data_format = data_formats[d], - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE, @@ -439,34 +530,48 @@ testthat::test_that("NOAA HMS Smoke download URLs have HTTP status 200.", { unzip = FALSE, remove_zip = FALSE) # define file path with commands - commands_path <- paste0(directory_to_download, + commands_path <- paste0(download_sanitize_path(directory_to_save), "hms_smoke_", gsub("-", "", date_start), "_", gsub("-", "", date_end), "_curl_commands.txt") + # expect sub-directories to be created + if (data_formats[d] == "Shapefile") { + expected_folders <- 3 + } else { + expected_folders <- 2 + } + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == expected_folders + ) # import commands commands <- read_commands(commands_path = commands_path) # extract urls urls <- extract_urls(commands = commands, position = 6) # check HTTP URL status - url_status <- check_urls(urls = urls, size = 3L, method = "HEAD") + url_status <- check_urls(urls = urls, size = 10L, method = "HEAD") # implement unit tests test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) # remove file with commands after test file.remove(commands_path) + # remove temporary hms + unlink(directory_to_save, recursive = TRUE) } }) -testthat::test_that("download_hms_data error for unzip and directory.", { +testthat::test_that("download_hms error for unzip and directory.", { testthat::expect_error( download_data( dataset_name = "hms", acknowledgement = TRUE, directory_to_save = testthat::test_path("..", "testdata/", ""), - directory_to_download = testthat::test_path("..", "testdata/", ""), unzip = FALSE, remove_zip = TRUE ) @@ -480,14 +585,12 @@ testthat::test_that("NLCD download URLs have HTTP status 200.", { years <- c(2021, 2019, 2016) collections <- c(rep("Coterminous United States", 2), "Alaska") collection_codes <- c(rep("l48", 2), "ak") - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "nlcd_temp") # run download function for (y in seq_along(years)) { download_data(dataset_name = "nlcd", year = years[y], collection = collections[y], - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE, @@ -495,7 +598,7 @@ testthat::test_that("NLCD download URLs have HTTP status 200.", { unzip = FALSE, remove_zip = FALSE) # define file path with commands - commands_path <- paste0(directory_to_download, + commands_path <- paste0(download_sanitize_path(directory_to_save), "nlcd_", years[y], "_land_cover_", @@ -503,6 +606,14 @@ testthat::test_that("NLCD download URLs have HTTP status 200.", { "_", Sys.Date(), "_curl_command.txt") + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # import commands commands <- read_commands(commands_path = commands_path) # extract urls @@ -510,18 +621,18 @@ testthat::test_that("NLCD download URLs have HTTP status 200.", { # check HTTP URL status url_status <- check_urls(urls = urls, size = 1L, method = "HEAD") # implement unit tests - test_download_functions(directory_to_download = directory_to_download, - directory_to_save = directory_to_save, + test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) # remove file with commands after test file.remove(commands_path) + # remove temporary nlcd + unlink(directory_to_save, recursive = TRUE) } testthat::expect_error( download_data(dataset_name = "nlcd", year = 2000, collection = "Coterminous United States", - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE, @@ -529,7 +640,6 @@ testthat::test_that("NLCD download URLs have HTTP status 200.", { unzip = FALSE, remove_zip = FALSE) ) - }) testthat::test_that("SEDAC groads download URLs have HTTP status 200.", { @@ -538,8 +648,7 @@ testthat::test_that("SEDAC groads download URLs have HTTP status 200.", { # function parameters data_regions <- c("Americas", "Global") data_formats <- c("Geodatabase", "Shapefile") - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "groad_temp") # run download function for (r in seq_along(data_regions)) { data_region <- data_regions[r] @@ -549,13 +658,20 @@ testthat::test_that("SEDAC groads download URLs have HTTP status 200.", { acknowledgement = TRUE, data_format = data_formats[f], data_region = data_region, - directory_to_download = directory_to_download, download = FALSE, unzip = FALSE, remove_zip = FALSE, remove_command = FALSE) + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # define file path with commands - commands_path <- paste0(directory_to_download, + commands_path <- paste0(download_sanitize_path(directory_to_save), "sedac_groads_", gsub(" ", "_", tolower(data_region)), "_", @@ -568,12 +684,13 @@ testthat::test_that("SEDAC groads download URLs have HTTP status 200.", { # check HTTP URL status url_status <- check_urls(urls = urls, size = 1L, method = "GET") # implement unit tests - test_download_functions(directory_to_download = directory_to_download, - directory_to_save = directory_to_save, + test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) # remove file with commands after test file.remove(commands_path) + # remove temporary groads + unlink(directory_to_save, recursive = TRUE) } } @@ -581,7 +698,6 @@ testthat::test_that("SEDAC groads download URLs have HTTP status 200.", { download_data(dataset_name = "sedac_groads", data_format = "Shapefile", data_region = "Global", - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE, @@ -599,8 +715,7 @@ testthat::test_that("SEDAC population download URLs have HTTP status 200.", { data_formats <- c("GeoTIFF") data_resolutions <- cbind(c("30 second"), c("30_sec")) - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "pop_temp") for (f in seq_along(data_formats)) { data_format <- data_formats[f] for (y in seq_along(years)) { @@ -611,13 +726,20 @@ testthat::test_that("SEDAC population download URLs have HTTP status 200.", { year = year, data_format = data_format, data_resolution = data_resolutions[r, 1], - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE, unzip = FALSE, remove_zip = FALSE, remove_command = FALSE) + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # define file path with commands if (year == "all") { year <- "totpop" @@ -629,7 +751,7 @@ testthat::test_that("SEDAC population download URLs have HTTP status 200.", { } else { resolution <- data_resolutions[r, 2] } - commands_path <- paste0(directory_to_download, + commands_path <- paste0(download_sanitize_path(directory_to_save), "sedac_population_", year, "_", @@ -644,12 +766,13 @@ testthat::test_that("SEDAC population download URLs have HTTP status 200.", { # check HTTP URL status url_status <- check_urls(urls = urls, size = 1L, method = "GET") # implement unit tests - test_download_functions(directory_to_download = directory_to_download, - directory_to_save = directory_to_save, + test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) # remove file with commands after test file.remove(commands_path) + # remove temporary groads + unlink(directory_to_save, recursive = TRUE) } } } @@ -669,7 +792,6 @@ testthat::test_that("SEDAC population data types are coerced.", { year = year, data_format = data_formats[f], data_resolution = data_resolutions[1], - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, download = FALSE, @@ -706,8 +828,7 @@ testthat::test_that("Koppen Geiger download URLs have HTTP status 200.", { # function parameters time_periods <- c("Present", "Future") data_resolutions <- c("0.0083") - directory_to_download <- testthat::test_path("..", "testdata/", "") - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "kop_temp") # run download function for (p in seq_along(time_periods)) { time_period <- time_periods[p] @@ -715,7 +836,6 @@ testthat::test_that("Koppen Geiger download URLs have HTTP status 200.", { download_data(dataset_name = "koppen", time_period = time_period, data_resolution = data_resolutions[d], - directory_to_download = directory_to_download, directory_to_save = directory_to_save, acknowledgement = TRUE, unzip = FALSE, @@ -723,7 +843,7 @@ testthat::test_that("Koppen Geiger download URLs have HTTP status 200.", { download = FALSE, remove_command = FALSE) # define file path with commands - commands_path <- paste0(directory_to_download, + commands_path <- paste0(download_sanitize_path(directory_to_save), "koppen_geiger_", time_period, "_", @@ -733,15 +853,22 @@ testthat::test_that("Koppen Geiger download URLs have HTTP status 200.", { "_", Sys.Date(), "_wget_command.txt") + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # import commands commands <- read_commands(commands_path = commands_path) # extract urls urls <- extract_urls(commands = commands, position = 2) # check HTTP URL status - url_status <- check_urls(urls = urls, size = 1L, method = "GET") + url_status <- check_urls(urls = urls, size = 1L, method = "HEAD") # implement unit tests - test_download_functions(directory_to_download = directory_to_download, - directory_to_save = directory_to_save, + test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, url_status = url_status) # remove file with commands after test @@ -792,7 +919,7 @@ testthat::test_that("MODIS-MOD09GA download URLs have HTTP status 200.", { # extract urls urls <- extract_urls(commands = commands, position = 4) # check HTTP URL status - url_status <- check_urls(urls = urls, size = 10L, method = "SKIP") + url_status <- check_urls(urls = urls, size = 3L, method = "SKIP") # implement unit tests test_download_functions(directory_to_save = directory_to_save, commands_path = commands_path, @@ -994,7 +1121,6 @@ testthat::test_that("MODIS download error cases.", { remove_command = FALSE) ) - # define file path with commands commands_path <- paste0( directory_to_save, @@ -1020,7 +1146,6 @@ testthat::test_that("MODIS download error cases.", { }) - testthat::test_that("EPA TRI download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") @@ -1064,7 +1189,7 @@ testthat::test_that("EPA NEI (AADT) download URLs have HTTP status 200.", { withr::local_package("httr") withr::local_package("stringr") # function parameters - directory_to_save <- testthat::test_path("..", "testdata/", "") + directory_to_save <- testthat::test_path("..", "testdata", "nei_temp") certificate <- system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus") # run download function @@ -1077,9 +1202,17 @@ testthat::test_that("EPA NEI (AADT) download URLs have HTTP status 200.", { remove_command = FALSE, epa_certificate_path = certificate ) + # expect sub-directories to be created + testthat::expect_true( + length( + list.files( + directory_to_save, include.dirs = TRUE + ) + ) == 3 + ) # define file path with commands commands_path <- paste0( - directory_to_save, + download_sanitize_path(directory_to_save), "NEI_AADT_", paste(year_target, collapse = "-"), "_", @@ -1101,6 +1234,8 @@ testthat::test_that("EPA NEI (AADT) download URLs have HTTP status 200.", { url_status = url_status) # remove file with commands after test file.remove(commands_path) + # remove temporary nei + unlink(directory_to_save, recursive = TRUE) }) testthat::test_that("Test error cases in EPA gaftp sources 1", { @@ -1165,7 +1300,6 @@ testthat::test_that("Test error cases in EPA gaftp sources 2", { remove_command = FALSE, unzip = FALSE, remove_zip = FALSE, - directory_to_download = directory_to_save, epa_certificate_path = certificate ) ) @@ -1190,7 +1324,7 @@ testthat::test_that("epa certificate", { testthat::expect_error( download_epa_certificate("file.txt") ) - testthat::expect_message( + testthat::expect_no_error( download_epa_certificate(file.path(tempdir(), "file.pem")) ) testthat::expect_no_error( @@ -1225,43 +1359,32 @@ testthat::test_that("check_urls returns NULL undefined size.", { ) }) -testthat::test_that("download_hms_data LIVE run.", { +testthat::test_that("download_hms LIVE run.", { # function parameters date <- "2018-01-01" directory <- testthat::test_path("..", "testdata", "hms_live") - # create file to be deleted - dir.create(directory) - file.create( - paste0( - directory, - "/hms_smoke_20180101_20180101_curl_commands.txt" - ) - ) # run download function download_data( dataset_name = "hms", date_start = date, date_end = date, directory_to_save = directory, - directory_to_download = directory, acknowledgement = TRUE, download = TRUE, unzip = TRUE, - remove_zip = TRUE, + remove_zip = FALSE, remove_command = FALSE ) + Sys.sleep(1.5) testthat::expect_true( - length(list.files(directory)) == 5 + length(list.files(directory, recursive = TRUE, include.dirs = TRUE)) == 8 ) commands <- list.files(directory, pattern = ".txt", full.names = TRUE) testthat::expect_true( file.exists(commands) ) - Sys.sleep(1.5) # remove directory - files <- list.files(directory, full.names = TRUE) - sapply(files, file.remove) - file.remove(directory) + unlink(directory, recursive = TRUE) }) testthat::test_that("gridmet download URLs have HTTP status 200.", { @@ -1400,18 +1523,16 @@ testthat::test_that("terraclimate error with invalid variables", { }) - - -testthat::test_that("download_cropscape_data throws an error for invalid year", { +testthat::test_that("download_cropscape throws an error for invalid year", { # Set up test data invalid_year <- 1996 - testthat::expect_error(download_cropscape_data(year = 2020, source = "CMU")) + testthat::expect_error(download_cropscape(year = 2020, source = "CMU")) # Call the function and expect an error - testthat::expect_error(download_cropscape_data(year = invalid_year, source = "GMU")) - testthat::expect_error(download_cropscape_data(year = 2000, source = "USDA")) + testthat::expect_error(download_cropscape(year = invalid_year, source = "GMU")) + testthat::expect_error(download_cropscape(year = 2000, source = "USDA")) }) -testthat::test_that("download_cropscape_data generates correct download commands (GMU)", { +testthat::test_that("download_cropscape generates correct download commands (GMU)", { withr::local_package("httr") withr::local_package("stringr") # Set up test data @@ -1420,7 +1541,7 @@ testthat::test_that("download_cropscape_data generates correct download commands # Call the function testthat::expect_no_error( - download_cropscape_data( + download_cropscape( year = year, source = "GMU", directory_to_save = directory_to_save, @@ -1457,7 +1578,7 @@ testthat::test_that("download_cropscape_data generates correct download commands }) -test_that("download_cropscape_data generates correct download commands (USDA)", { +test_that("download_cropscape generates correct download commands (USDA)", { withr::local_package("httr") withr::local_package("stringr") # Set up test data @@ -1466,7 +1587,7 @@ test_that("download_cropscape_data generates correct download commands (USDA)", # Call the function testthat::expect_no_error( - download_cropscape_data( + download_cropscape( year = year, source = "USDA", directory_to_save = directory_to_save, @@ -1503,7 +1624,7 @@ test_that("download_cropscape_data generates correct download commands (USDA)", }) -testthat::test_that("download_prism_data downloads the correct data files", { +testthat::test_that("download_prism downloads the correct data files", { # Set up test data time <- seq(201005, 201012, by = 1) element <- c("ppt", "tmin", "tmax", "tmean", "tdmean", @@ -1522,7 +1643,7 @@ testthat::test_that("download_prism_data downloads the correct data files", { remove_command <- FALSE # Call the function - download_prism_data( + download_prism( time = time, element = element, data_type = data_type, @@ -1534,7 +1655,7 @@ testthat::test_that("download_prism_data downloads the correct data files", { ) testthat::expect_message( - download_prism_data( + download_prism( time = time, element = "ppt", data_type = "normals", @@ -1583,7 +1704,7 @@ testthat::test_that("download_prism_data downloads the correct data files", { remove_command <- FALSE # Call the function and expect an error - testthat::expect_error(download_prism_data( + testthat::expect_error(download_prism( time = time, element = element, data_type = data_type, @@ -1597,13 +1718,12 @@ testthat::test_that("download_prism_data downloads the correct data files", { }) - testthat::test_that("list_stac_files returns a character vector of file links", { withr::local_package("rstac") # Set up test data stac_json <- "https://s3.eu-central-1.wasabisys.com/stac/openlandmap/catalog.json" format <- "tif" - which <- 64 + which <- 35 # Call the function testthat::expect_message( @@ -1630,7 +1750,7 @@ testthat::test_that("list_stac_files returns a character vector of file links", }) -testthat::test_that("download_huc_data works", +testthat::test_that("download_huc works", { withr::local_package("httr") @@ -1641,7 +1761,7 @@ testthat::test_that("download_huc_data works", for (region in allregions) { for (type in alltypes) { testthat::expect_no_error( - download_huc_data( + download_huc( region, type, directory_to_save, acknowledgement = TRUE, @@ -1677,7 +1797,7 @@ testthat::test_that("download_huc_data works", } testthat::expect_error( - download_huc_data( + download_huc( "Lower48", "OceanCatchment", tempdir(), acknowledgement = TRUE, @@ -1703,7 +1823,7 @@ testthat::test_that( download <- FALSE testthat::expect_no_error( - download_olm_data( + download_olm( product = product, format = format, directory_to_save = directory_to_save, @@ -1737,3 +1857,23 @@ testthat::test_that( } ) + +testthat::test_that("download_sink test", { + testfile <- testthat::test_path("../testdata", "sink_test.txt") + file.create(testfile) + testthat::expect_no_error( + download_sink(testfile) + ) + sink() + file.remove(testfile) +}) + +testthat::test_that("download_remove_zips test", { + testfile <- + testthat::test_path("..", "testdata", "yellowstone/barren", "coyote.zip") + dir.create(dirname(testfile), recursive = TRUE) + file.create(testfile, recursive = TRUE) + testthat::expect_no_error( + download_remove_zips(remove = TRUE, testfile) + ) +}) diff --git a/tests/testthat/test-manipulate_spacetime_data.R b/tests/testthat/test-manipulate_spacetime_data.R index 2e581a5a..ee759f01 100644 --- a/tests/testthat/test-manipulate_spacetime_data.R +++ b/tests/testthat/test-manipulate_spacetime_data.R @@ -1,5 +1,9 @@ +testthat::test_that("check_mysftime works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) -test_that("check_mysftime works as expected", { # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -12,16 +16,16 @@ test_that("check_mysftime works as expected", { ) # should work - expect_no_error(check_mysftime(x = mysft)) + testthat::expect_no_error(check_mysftime(x = mysft)) # check that error messages work well - expect_error(check_mysftime(stdata), "x is not a sftime") + testthat::expect_error(check_mysftime(stdata), "x is not a sftime") mysft <- sftime::st_as_sftime(as.data.frame(stdata), coords = c("lon", "lat"), crs = 4326, time_column_name = "time" ) - expect_error( + testthat::expect_error( check_mysftime(x = mysft), "x is not inherited from a data.table" ) @@ -32,7 +36,9 @@ test_that("check_mysftime works as expected", { crs = 4326, time_column_name = "date" ) - expect_error(check_mysftime(mysft), "time column should be called time") + testthat::expect_error( + check_mysftime(mysft), "time column should be called time" + ) mysft <- stdata |> sftime::st_as_sftime( coords = c("lon", "lat"), @@ -40,7 +46,7 @@ test_that("check_mysftime works as expected", { time_column_name = "time" ) |> dplyr::rename("geom" = "geometry") - expect_error( + testthat::expect_error( check_mysftime(mysft), "geometry column should be called geometry" ) @@ -58,14 +64,18 @@ test_that("check_mysftime works as expected", { for (i in 1:27) { mysft$geometry[i] <- pol } - expect_error( + testthat::expect_error( check_mysftime(mysft), "geometry is not a sfc_POINT" ) }) -test_that("check_mysf works as expected", { +testthat::test_that("check_mysf works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_options(list(sf_use_s2 = FALSE)) + # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -77,15 +87,15 @@ test_that("check_mysf works as expected", { ) # should work - expect_no_error(check_mysf(x = mysf)) + testthat::expect_no_error(check_mysf(x = mysf)) # check that error messages work well - expect_error(check_mysf(stdata), "x is not a sf") + testthat::expect_error(check_mysf(stdata), "x is not a sf") mysf <- sf::st_as_sf(as.data.frame(stdata), coords = c("lon", "lat"), crs = 4326 ) - expect_error( + testthat::expect_error( check_mysf(x = mysf), "x is not inherited from a data.table" ) @@ -95,7 +105,7 @@ test_that("check_mysf works as expected", { crs = 4326 ) |> dplyr::rename("geom" = "geometry") - expect_error( + testthat::expect_error( check_mysf(mysf), "geometry column should be called geometry" ) @@ -112,13 +122,17 @@ test_that("check_mysf works as expected", { for (i in 1:27) { mysf$geometry[i] <- pol } - expect_error( + testthat::expect_error( check_mysf(mysf), "geometry is not a sfc_POINT" ) }) -test_that("rename_time works as expected", { +testthat::test_that("rename_time works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -129,39 +143,39 @@ test_that("rename_time works as expected", { crs = 4326, time_column_name = "time" ) - expect_no_error(rename_time(mysft, "date")) - expect_equal( + testthat::expect_no_error(rename_time(mysft, "date")) + testthat::expect_equal( attributes(rename_time(mysft, "date"))$time_column, "date" ) - expect_error( + testthat::expect_error( rename_time(stdata, "date"), "x is not a sftime" ) }) -test_that("dt_as_mysftime works as expected", { +testthat::test_that("dt_as_mysftime works as expected", { # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), "spacetime_table.csv" )) # should work - expect_no_error(dt_as_mysftime( + testthat::expect_no_error(dt_as_mysftime( x = stdata, lonname = "lon", latname = "lat", timename = "time", crs = 4326 )) - expect_no_error(check_mysftime(dt_as_mysftime( + testthat::expect_no_error(check_mysftime(dt_as_mysftime( x = stdata, lonname = "lon", latname = "lat", timename = "time", crs = 4326 ))) - expect_error( + testthat::expect_error( dt_as_mysftime( x = stdata, lonname = "longitude", @@ -183,7 +197,7 @@ test_that("dt_as_mysftime works as expected", { ) }) -test_that("as_mysftime works as expected", { +testthat::test_that("as_mysftime works as expected", { withr::local_package("terra") withr::local_package("data.table") # open testing data @@ -192,33 +206,33 @@ test_that("as_mysftime works as expected", { "spacetime_table.csv" )) # with data.table - expect_no_error(as_mysftime( + testthat::expect_no_error(as_mysftime( x = stdata, lonname = "lon", latname = "lat", timename = "time", crs = 4326 )) - expect_no_error(check_mysftime(as_mysftime( + testthat::expect_no_error(check_mysftime(as_mysftime( x = stdata, lonname = "lon", latname = "lat", timename = "time", crs = 4326 ))) - expect_error( + testthat::expect_error( as_mysftime(x = stdata), "argument \"lonname\" is missing, with no default" ) # with data.frame - expect_no_error(as_mysftime( + testthat::expect_no_error(as_mysftime( x = as.data.frame(stdata), lonname = "lon", latname = "lat", timename = "time", crs = 4326 )) - expect_no_error(check_mysftime(as_mysftime( + testthat::expect_no_error(check_mysftime(as_mysftime( x = as.data.frame(stdata), lonname = "lon", latname = "lat", @@ -230,19 +244,19 @@ test_that("as_mysftime works as expected", { coords = c("lon", "lat"), crs = 4326 ) - expect_no_error(as_mysftime(mysf, "time")) + testthat::expect_no_error(as_mysftime(mysf, "time")) b <- mysf |> dplyr::rename("date" = "time") - expect_no_error(as_mysftime(b, "date")) - expect_no_error(check_mysftime(as_mysftime(b, "date"))) + testthat::expect_no_error(as_mysftime(b, "date")) + testthat::expect_no_error(check_mysftime(as_mysftime(b, "date"))) # with sftime mysft <- sftime::st_as_sftime(stdata, coords = c("lon", "lat"), crs = 4326, time_column_name = "time" ) - expect_no_error(as_mysftime(mysft, "time")) - expect_no_error(check_mysftime(as_mysftime(mysft, "time"))) + testthat::expect_no_error(as_mysftime(mysft, "time")) + testthat::expect_no_error(check_mysftime(as_mysftime(mysft, "time"))) # with SpatRaster myrast <- terra::rast( @@ -254,8 +268,10 @@ test_that("as_mysftime works as expected", { terra::values(myrast) <- seq(-5, 19) terra::add(myrast) <- c(myrast**2, myrast**3) names(myrast) <- c("2023-11-01", "2023-11-02", "2023-11-03") - expect_no_error(as_mysftime(x = myrast, varname = "altitude")) - expect_no_error(check_mysftime(as_mysftime(x = myrast, varname = "altitude"))) + testthat::expect_no_error(as_mysftime(x = myrast, varname = "altitude")) + testthat::expect_no_error( + check_mysftime(as_mysftime(x = myrast, varname = "altitude")) + ) # with SpatVector myvect <- terra::vect( stdata, @@ -263,8 +279,8 @@ test_that("as_mysftime works as expected", { crs = "EPSG:4326", keepgeom = FALSE ) - expect_no_error(as_mysftime(x = myvect)) - expect_no_error(check_mysftime(as_mysftime(x = myvect))) + testthat::expect_no_error(as_mysftime(x = myvect)) + testthat::expect_no_error(check_mysftime(as_mysftime(x = myvect))) myvect <- stdata |> dplyr::rename("time2" = time) |> terra::vect( @@ -272,7 +288,7 @@ test_that("as_mysftime works as expected", { crs = "EPSG:4326", keepgeom = FALSE ) - expect_error( + testthat::expect_error( as_mysftime(x = myvect), "timename column missing or mispelled" ) @@ -306,15 +322,15 @@ test_that("as_mysftime works as expected", { names(var2) <- c("2023-11-01", "2023-11-02", "2023-11-03") myrds <- terra::sds(var1, var2) names(myrds) <- c("var1", "var2") - expect_no_error(as_mysftime(myrds)) - expect_error( + testthat::expect_no_error(as_mysftime(myrds)) + testthat::expect_error( as_mysftime(x = "roquefort"), "x class not accepted" ) }) -test_that("sftime_as_spatvector as expected", { +testthat::test_that("sftime_as_spatvector as expected", { # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -325,32 +341,44 @@ test_that("sftime_as_spatvector as expected", { time_column_name = "time", crs = 4326 ) - expect_no_error(sftime_as_spatvector(mysftime)) + testthat::expect_no_error(sftime_as_spatvector(mysftime)) # with a different time column name: attributes(mysftime)$time_column <- "date" mysftime <- dplyr::rename(mysftime, "date" = "time") - expect_no_error(sftime_as_spatvector(mysftime)) + testthat::expect_no_error(sftime_as_spatvector(mysftime)) # doesn't work with other classes: - expect_error(sftime_as_spatvector(stdata)) + testthat::expect_error(sftime_as_spatvector(stdata)) }) -test_that("sf_as_mysftime works as expected", { +testthat::test_that("sf_as_mysftime works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) + # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), "spacetime_table.csv" )) mysf <- sf::st_as_sf(stdata, coords = c("lon", "lat"), crs = 4326) - expect_no_error(sf_as_mysftime(mysf, "time")) - expect_no_error(check_mysftime(sf_as_mysftime(mysf, "time"))) + testthat::expect_no_error(sf_as_mysftime(mysf, "time")) + testthat::expect_no_error(check_mysftime(sf_as_mysftime(mysf, "time"))) b <- mysf |> dplyr::rename("date" = "time") - expect_no_error(check_mysftime(sf_as_mysftime(b, "date"))) - expect_error(sf_as_mysftime(b, "time"), - "time column missing or mispelled") + testthat::expect_no_error(check_mysftime(sf_as_mysftime(b, "date"))) + testthat::expect_error( + sf_as_mysftime(b, "time"), + "time column missing or mispelled" + ) }) -test_that("sftime_as_mysftime works as expected", { +testthat::test_that("sftime_as_mysftime works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) + # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -361,18 +389,25 @@ test_that("sftime_as_mysftime works as expected", { time_column_name = "time", crs = 4326 ) - expect_no_error(sftime_as_mysftime(mysft, "time")) - expect_no_error(check_mysftime(sftime_as_mysftime(mysft, "time"))) - expect_error(sftime_as_mysftime(mysft, "date")) + testthat::expect_no_error(sftime_as_mysftime(mysft, "time")) + testthat::expect_no_error( + check_mysftime(sftime_as_mysftime(mysft, "time")) + ) + testthat::expect_error(sftime_as_mysftime(mysft, "date")) attributes(mysft)$time_column <- "date" mysft <- dplyr::rename(mysft, "date" = "time") - expect_no_error(check_mysftime(sf_as_mysftime(mysft, "date"))) - expect_error(sf_as_mysftime(mysft, "time"), - "time column missing or mispelled") + testthat::expect_no_error(check_mysftime(sf_as_mysftime(mysft, "date"))) + testthat::expect_error( + sf_as_mysftime(mysft, "time"), + "time column missing or mispelled" + ) }) -test_that("spatraster_as_sftime works as expected", { +testthat::test_that("spatraster_as_sftime works as expected", { + withr::local_package("terra") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) myrast <- terra::rast( extent = c(-112, -101, 33.5, 40.9), @@ -384,20 +419,23 @@ test_that("spatraster_as_sftime works as expected", { terra::add(myrast) <- c(myrast**2, myrast**3) names(myrast) <- c("2023-11-01", "2023-11-02", "2023-11-03") # conversion should work - expect_no_error(spatraster_as_sftime(myrast, "myvar")) - expect_no_error(spatraster_as_sftime(myrast, "myvar", "date")) + testthat::expect_no_error(spatraster_as_sftime(myrast, "myvar")) + testthat::expect_no_error(spatraster_as_sftime(myrast, "myvar", "date")) mysft <- spatraster_as_sftime(myrast, "myvar", "date") - expect_equal(attributes(mysft)$time, "date") + testthat::expect_equal(attributes(mysft)$time, "date") # conversion does not work because raster's names are not dates names(myrast) <- c("roquefort", "comte", "camembert") - expect_error( + testthat::expect_error( spatraster_as_sftime(myrast, "myvar"), "x layers might not be time" ) }) -test_that("spatrds_as_sftime works as expected", { +testthat::test_that("spatrds_as_sftime works as expected", { + withr::local_package("terra") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) var1 <- terra::rast( extent = c(-112, -101, 33.5, 40.9), @@ -428,13 +466,19 @@ test_that("spatrds_as_sftime works as expected", { myrds <- terra::sds(var1, var2) names(myrds) <- c("var1", "var2") # conversion should work - expect_no_error(spatrds_as_sftime(myrds, "time")) + testthat::expect_no_error(spatrds_as_sftime(myrds, "time")) mysft <- spatrds_as_sftime(myrds, "date") - expect_equal(attributes(mysft)$time, "date") + testthat::expect_equal(attributes(mysft)$time, "date") }) -test_that("sftime_as_sf works as expected", { +testthat::test_that("sftime_as_sf works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) + + # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -445,15 +489,26 @@ test_that("sftime_as_sf works as expected", { time_column_name = "time", crs = 4326 ) - expect_no_error(sftime_as_sf(mysftime)) - expect_no_error(sftime_as_sf(mysftime, keeptime = FALSE)) - expect_equal(class(sftime_as_sf(mysftime))[1], "sf") - expect_equal(class(sftime_as_sf(mysftime, keeptime = FALSE))[1], "sf") - expect_true("time" %in% colnames(sftime_as_sf(mysftime, keeptime = TRUE))) - expect_false("time" %in% colnames(sftime_as_sf(mysftime, keeptime = FALSE))) + testthat::expect_no_error(sftime_as_sf(mysftime)) + testthat::expect_no_error(sftime_as_sf(mysftime, keeptime = FALSE)) + testthat::expect_equal(class(sftime_as_sf(mysftime))[1], "sf") + testthat::expect_equal( + class(sftime_as_sf(mysftime, keeptime = FALSE))[1], "sf" + ) + testthat::expect_true( + "time" %in% colnames(sftime_as_sf(mysftime, keeptime = TRUE)) + ) + testthat::expect_false( + "time" %in% colnames(sftime_as_sf(mysftime, keeptime = FALSE)) + ) }) -test_that("sftime_as_sf works as expected", { +testthat::test_that("sftime_as_sf works as expected", { + withr::local_package("data.table") + withr::local_package("sf") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) + # open testing data stdata <- data.table::fread(paste0( testthat::test_path("..", "testdata/", ""), @@ -464,16 +519,25 @@ test_that("sftime_as_sf works as expected", { time_column_name = "time", crs = 4326 ) - expect_no_error(sftime_as_sf(mysftime)) - expect_no_error(sftime_as_sf(mysftime, keeptime = FALSE)) - expect_equal(class(sftime_as_sf(mysftime))[1], "sf") - expect_equal(class(sftime_as_sf(mysftime, keeptime = FALSE))[1], "sf") - expect_true("time" %in% colnames(sftime_as_sf(mysftime, keeptime = TRUE))) - expect_false("time" %in% colnames(sftime_as_sf(mysftime, keeptime = FALSE))) + testthat::expect_no_error(sftime_as_sf(mysftime)) + testthat::expect_no_error(sftime_as_sf(mysftime, keeptime = FALSE)) + testthat::expect_equal(class(sftime_as_sf(mysftime))[1], "sf") + testthat::expect_equal( + class(sftime_as_sf(mysftime, keeptime = FALSE))[1], "sf" + ) + testthat::expect_true( + "time" %in% colnames(sftime_as_sf(mysftime, keeptime = TRUE)) + ) + testthat::expect_false( + "time" %in% colnames(sftime_as_sf(mysftime, keeptime = FALSE)) + ) }) -test_that("sftime_as_spatraster works as expected", { +testthat::test_that("sftime_as_spatraster works as expected", { + withr::local_package("terra") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) myrast <- terra::rast( extent = c(-112, -101, 33.5, 40.9), @@ -485,15 +549,18 @@ test_that("sftime_as_spatraster works as expected", { terra::add(myrast) <- c(myrast**2, myrast**3) names(myrast) <- c("2023-11-01", "2023-11-02", "2023-11-03") mysftime <- as_mysftime(myrast, varname = "roquefort") - expect_no_error(sftime_as_spatraster(mysftime, "roquefort")) - expect_error( + testthat::expect_no_error(sftime_as_spatraster(mysftime, "roquefort")) + testthat::expect_error( sftime_as_spatraster(mysftime, "cheddar"), "varname missing or mispelled" ) }) -test_that("sftime_as_spatrds works as expected", { +testthat::test_that("sftime_as_spatrds works as expected", { + withr::local_package("terra") + withr::local_package("sftime") + withr::local_options(list(sf_use_s2 = FALSE)) var1 <- terra::rast( extent = c(-112, -101, 33.5, 40.9), @@ -526,8 +593,8 @@ test_that("sftime_as_spatrds works as expected", { # create a structured sftime mysft <- spatrds_as_sftime(myrds, "time") # conversion should work - expect_no_error(sftime_as_spatrds(mysft)) - expect_error(sftime_as_spatrds("hello"), "x is not a sftime") + testthat::expect_no_error(sftime_as_spatrds(mysft)) + testthat::expect_error(sftime_as_spatrds("hello"), "x is not a sftime") rename_time(mysft, "date") - expect_no_error(sftime_as_spatrds(mysft)) + testthat::expect_no_error(sftime_as_spatrds(mysft)) }) diff --git a/tests/testthat/test-process.R b/tests/testthat/test-process.R index b0106f78..02e655ca 100644 --- a/tests/testthat/test-process.R +++ b/tests/testthat/test-process.R @@ -266,7 +266,6 @@ testthat::test_that("process_modis_merge is good to go", { ) ) - }) @@ -378,26 +377,30 @@ testthat::test_that("Other MODIS function errors", { testthat::expect_no_error( suppressWarnings( process_modis_swath( - path = path_mod06e, + path = path_mod06, + subdataset = "Cloud_Fraction_Night", date = "2021-08-15" ) ) ) testthat::expect_error( process_modis_swath( - path = path_mod06e, + path = path_mod06, + subdataset = "Cloud_Fraction_Night", date = "2021~08~15" ) ) testthat::expect_error( process_modis_swath( - path = path_mod06e, + path = path_mod06, + subdataset = "Cloud_Fraction_Night", date = "2021-13-15" ) ) testthat::expect_error( process_modis_swath( - path = path_mod06e, + path = path_mod06, + subdataset = "Cloud_Fraction_Night", date = "2021-12-45" ) ) @@ -407,12 +410,27 @@ testthat::test_that("Other MODIS function errors", { # test Ecoregions #### testthat::test_that("read ecoregion", { withr::local_package("terra") + withr::local_package("sf") + withr::local_options(list(sf_use_s2 = FALSE)) path_eco <- testthat::test_path("..", "testdata", "eco_l3_clip.gpkg") - testthat::expect_no_error( process_ecoregion(path_eco) ) + + ecotemp <- sf::st_read(path_eco) + # nolint start + addpoly <- + "POLYGON ((-70.2681 43.6787, -70.252234 43.677145, -70.251036 -43.680758, -70.268666 43.681505, -70.2681 43.6787))" + # nolint end + addpoly <- sf::st_as_sfc(addpoly, crs = "EPSG:4326") + addpoly <- sf::st_transform(addpoly, sf::st_crs(ecotemp)) + ecotemp[1, "geom"] <- addpoly + tdir <- tempdir() + sf::st_write(ecotemp, paste0(tdir, "/ecoregions.gpkg"), append = FALSE) + testthat::expect_no_error( + suppressWarnings(process_ecoregion(paste0(tdir, "/ecoregions.gpkg"))) + ) }) @@ -483,11 +501,14 @@ testthat::test_that("process_nei tests", { path_cnty$GEOID <- path_cnty$FIPS testthat::expect_no_error( - neinc <- process_nei(path = path_nei, year = 2020, county = path_cnty) + neinc <- process_nei(path = path_nei, year = 2017, county = path_cnty) ) testthat::expect_s4_class(neinc, "SpatVector") # error cases + testthat::expect_error( + process_nei(testthat::test_path("../testdata", "modis"), year = 2017) + ) testthat::expect_error( process_nei(path_nei, year = 2030, county = path_cnty) ) @@ -505,7 +526,6 @@ testthat::test_that("process_nei tests", { }) - ## ephemeral: process_conformity tests testthat::test_that("process_conformity tests", { withr::local_package("terra") @@ -599,6 +619,15 @@ testthat::test_that("process_sedac_population returns null for netCDF.", { ) }) +testthat::test_that("sedac_codes", { + string <- "2.5 minute" + testthat::expect_no_error( + code <- process_sedac_codes(string) + ) + testthat::expect_equal(code, "2pt5_min") +}) + + # test HMS #### testthat::test_that("process_hms returns expected.", { withr::local_package("terra") @@ -608,7 +637,7 @@ testthat::test_that("process_hms returns expected.", { "Heavy" ) # expect function - expect_true( + testthat::expect_true( is.function(process_hms) ) for (d in seq_along(densities)) { @@ -623,30 +652,30 @@ testthat::test_that("process_hms returns expected.", { ) ) # expect output is a SpatVector or character - expect_true( + testthat::expect_true( class(hms)[1] %in% c("SpatVector", "character") ) if (class(hms)[1] == "SpatVector") { # expect non-null coordinate reference system - expect_false( + testthat::expect_false( is.null(terra::crs(hms)) ) # expect two columns - expect_true( + testthat::expect_true( ncol(hms) == 2 ) # expect density and date column - expect_true( + testthat::expect_true( all(c("Density", "Date") %in% names(hms)) ) } else if (class(hms)[1] == "character") { # expect first is density type - expect_true( + testthat::expect_true( hms[1] %in% c("Light", "Medium", "Heavy") ) - # expect other elements are 8 character dates - expect_true( - all(nchar(hms[2:length(hms)]) == 8) + # expect other elements are 10 character dates + testthat::expect_true( + all(nchar(hms[2:length(hms)]) == 10) ) } } @@ -726,6 +755,32 @@ testthat::test_that("import_gmted returns error with non-vector variable.", { ) }) +testthat::test_that("gmted_codes inversion", { + teststring <- "mx" + testthat::expect_no_error( + statorig <- process_gmted_codes( + teststring, + statistic = TRUE, + resolution = FALSE, + invert = TRUE + ) + ) + testthat::expect_equal(statorig, "Maximum Statistic") + + teststring <- "75" + testthat::expect_no_error( + resoorig <- process_gmted_codes( + teststring, + statistic = FALSE, + resolution = TRUE, + invert = TRUE + ) + ) + testthat::expect_equal(resoorig, "7.5 arc-seconds") +}) + + +## test NARR #### testthat::test_that("process_narr returns expected.", { withr::local_package("terra") variables <- c( @@ -928,8 +983,8 @@ testthat::test_that("process_locs_vector vector data and missing columns.", { locs_id = "site_id" ) ) - # expect error when sites are SpatVector - expect_error( + # expect error when sites are SpatVector (points) + expect_no_error( calc_narr( from = narr, locs = terra::vect( @@ -940,6 +995,45 @@ testthat::test_that("process_locs_vector vector data and missing columns.", { locs_id = "site_id" ) ) + # expect error when sites are SpatVector (polygons) + expect_no_error( + calc_narr( + from = narr, + locs = terra::buffer( + terra::vect( + ncp, + geom = c("lon", "lat"), + crs = "EPSG:4326" + ), + 1000 + ), + locs_id = "site_id" + ) + ) + # expect error when sites are sf + expect_no_error( + calc_narr( + from = narr, + locs = sf::st_as_sf( + ncp, + coords = c("lon", "lat"), + crs = "EPSG:4326" + ), + locs_id = "site_id" + ) + ) + # error if one of "lat" or "lon" is missing (or both) + ncpp <- data.frame(long = -78.8277, lat = 35.95013) + ncpp$site_id <- "3799900018810101" + + expect_error( + process_locs_vector( + locs = ncpp, crs = "EPSG:4326", 0 + ) + ) + expect_error( + process_locs_vector(array(1)) + ) }) # test AQS #### @@ -958,33 +1052,116 @@ testthat::test_that("process_aqs", { # main test testthat::expect_no_error( - aqs <- process_aqs(path = aqssub, date = NULL) + aqsft <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "full", + return_format = "terra" + ) ) testthat::expect_no_error( - aqse <- process_aqs( + aqsst <- process_aqs( path = aqssub, - date = c("2022-02-04", "2022-02-28") + date = c("2022-02-04", "2022-02-28"), + mode = "sparse", + return_format = "terra" + ) + ) + testthat::expect_no_error( + aqslt <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "location", + return_format = "terra" ) ) # expect - testthat::expect_s4_class(aqs, "SpatVector") - testthat::expect_s4_class(aqse, "SpatVector") + testthat::expect_s4_class(aqsft, "SpatVector") + testthat::expect_s4_class(aqsst, "SpatVector") + testthat::expect_s4_class(aqslt, "SpatVector") testthat::expect_no_error( - aqssf <- process_aqs(path = aqssub, date = NULL, return_format = "sf") + aqsfs <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "full", + return_format = "sf" + ) + ) + testthat::expect_no_error( + aqsss <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "sparse", + return_format = "sf" + ) ) testthat::expect_no_error( - aqsesf <- process_aqs( + aqsls <- process_aqs( path = aqssub, date = c("2022-02-04", "2022-02-28"), + mode = "location", return_format = "sf" ) ) + testthat::expect_s3_class(aqsfs, "sf") + testthat::expect_s3_class(aqsss, "sf") + testthat::expect_s3_class(aqsls, "sf") + + testthat::expect_no_error( + aqsfd <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "full", + return_format = "data.table" + ) + ) + testthat::expect_no_error( + aqssd <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "sparse", + return_format = "data.table" + ) + ) + testthat::expect_no_error( + aqssdd <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "sparse", + data_field = "Arithmetic.Mean", + return_format = "data.table" + ) + ) + testthat::expect_no_error( + aqsld <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "location", + return_format = "data.table" + ) + ) + testthat::expect_no_error( + aqsldd <- process_aqs( + path = aqssub, + date = c("2022-02-04", "2022-02-28"), + mode = "location", + data_field = "Arithmetic.Mean", + return_format = "data.table" + ) + ) + testthat::expect_s3_class(aqsfd, "data.table") + testthat::expect_s3_class(aqssd, "data.table") + testthat::expect_s3_class(aqssdd, "data.table") + testthat::expect_s3_class(aqsld, "data.table") + testthat::expect_s3_class(aqsldd, "data.table") + testthat::expect_no_error( aqssf <- process_aqs( path = testd, date = c("2022-02-04", "2022-02-28"), + mode = "location", return_format = "sf" ) ) @@ -1000,10 +1177,11 @@ testthat::test_that("process_aqs", { # expect testthat::expect_s3_class(aqssf, "sf") - testthat::expect_s3_class(aqsesf, "sf") - # error cases + testthat::expect_error( + process_aqs(testthat::test_path("../testdata", "modis")) + ) testthat::expect_error( process_aqs(path = 1L) ) @@ -1363,47 +1541,55 @@ testthat::test_that("gridmet and terraclimate auxiliary functions.", { # test PRISM #### -test_that("process_prism returns a SpatRaster object with correct metadata", { - # Set up test data - withr::local_package("terra") - path <- testthat::test_path( - "..", "testdata", "prism", "PRISM_tmin_30yr_normal_4kmD1_0228_bil_test.nc" - ) - path_dir <- testthat::test_path( - "..", "testdata", "prism" - ) - element <- "tmin" - time <- "0228" +testthat::test_that( + "process_prism returns a SpatRaster object with correct metadata", + { + # Set up test data + withr::local_package("terra") + path <- testthat::test_path( + "..", "testdata", "prism", "PRISM_tmin_30yr_normal_4kmD1_0228_bil_test.nc" + ) + path_dir <- testthat::test_path( + "..", "testdata", "prism" + ) + element <- "tmin" + time <- "0228" - # Call the function - expect_no_error(result <- process_prism(path, element, time)) - expect_no_error(result2 <- process_prism(path_dir, element, time)) + # Call the function + testthat::expect_no_error(result <- process_prism(path, element, time)) + testthat::expect_no_error(result2 <- process_prism(path_dir, element, time)) - # Check the return type - expect_true(inherits(result, "SpatRaster")) - expect_true(inherits(result2, "SpatRaster")) + # Check the return type + testthat::expect_true(inherits(result, "SpatRaster")) + testthat::expect_true(inherits(result2, "SpatRaster")) - # Check the metadata - expect_equal(unname(terra::metags(result)["time"]), time) - expect_equal(unname(terra::metags(result)["element"]), element) + # Check the metadata + testthat::expect_equal(unname(terra::metags(result)["time"]), time) + testthat::expect_equal(unname(terra::metags(result)["element"]), element) - # Set up test data - path_bad <- "/path/to/nonexistent/folder" - element_bad <- "invalid_element" - time_bad <- "invalid_time" + # Set up test data + path_bad <- "/path/to/nonexistent/folder" + element_bad <- "invalid_element" + time_bad <- "invalid_time" - # Call the function and expect an error - expect_error(process_prism(NULL, element, time)) - expect_error(process_prism(path_bad, element, time)) - expect_error(process_prism(path_dir, element_bad, time)) - expect_error(process_prism(path_dir, element, time_bad)) -}) + # Call the function and expect an error + testthat::expect_error(process_prism(NULL, element, time)) + testthat::expect_error( + testthat::expect_warning( + process_prism(path_bad, element, time) + ) + ) + testthat::expect_error(process_prism(path_dir, element_bad, time)) + testthat::expect_error(process_prism(path_dir, element, time_bad)) + } +) # test CropScape #### testthat::test_that( "process_cropscape returns a SpatRaster object with correct metadata", { # Set up test data + withr::local_package("terra") filepath <- testthat::test_path("..", "testdata/cropscape/cdl_30m_r_nc_2019_sub.tif") dirpath <- testthat::test_path("..", "testdata/cropscape") @@ -1476,24 +1662,13 @@ testthat::test_that("process_huc", ) ) - # Set up test data - path <- file.path(path, "..") + path2 <- testthat::test_path( + "..", "testdata", "huc12" + ) # Call the function and expect an error - testthat::expect_error(process_huc(path)) - # using nhdplusTools - testthat::expect_no_error( - test3 <- process_huc( - "", - layer_name = NULL, - huc_level = NULL, - huc_header = NULL, - id = "030202", - type = "huc06" - ) - ) - testthat::expect_s4_class(test3, "SpatVector") + testthat::expect_error(process_huc(path2)) } ) @@ -1512,3 +1687,72 @@ testthat::test_that("process_olm", { ) }) # nolint end + +## AUX tests #### +testthat::test_that("loc_radius tests", { + withr::local_package("terra") + withr::local_package("sf") + withr::local_options(list(sf_use_s2 = FALSE)) + + lon <- seq(-112, -101, length.out = 5) # create lon sequence + lat <- seq(33.5, 40.9, length.out = 5) # create lat sequence + df <- expand.grid("lon" = lon, "lat" = lat) # expand to regular grid + df <- rbind(df, df) + df$time <- c(rep("2023-11-02", 25), rep("2023-11-03", 25)) + df$var1 <- 1:50 + df$var2 <- 51:100 + dfsf <- sf::st_as_sf( + df, + coords = c("lon", "lat"), + crs = "EPSG:4326", + remove = FALSE + ) + dftr <- terra::vect(dfsf) + + testthat::expect_no_error( + dftrb00 <- process_locs_radius(dftr, 0) + ) + testthat::expect_no_error( + dftrb1k <- process_locs_radius(dftr, 1000L) + ) + testthat::expect_true(terra::geomtype(dftrb00) == "points") + testthat::expect_true(terra::geomtype(dftrb1k) == "polygons") + testthat::expect_s4_class(dftrb00, "SpatVector") + testthat::expect_s4_class(dftrb1k, "SpatVector") +}) + +testthat::test_that("process_locs_vector tests", { + withr::local_package("terra") + withr::local_package("sf") + withr::local_options(list(sf_use_s2 = FALSE)) + + lon <- seq(-112, -101, length.out = 5) # create lon sequence + lat <- seq(33.5, 40.9, length.out = 5) # create lat sequence + df <- expand.grid("lon" = lon, "lat" = lat) # expand to regular grid + dfsf <- sf::st_as_sf( + df, + coords = c("lon", "lat"), + crs = "EPSG:4326", + remove = FALSE + ) + dftr <- terra::vect(dfsf) + + testthat::expect_no_error( + dftr1 <- process_locs_vector(dftr, "EPSG:4326", 0) + ) + testthat::expect_no_error( + dfsftr <- process_locs_vector(dfsf, "EPSG:4326", 0) + ) + testthat::expect_no_error( + dfdftr <- process_locs_vector(df, "EPSG:4326", 0) + ) + testthat::expect_no_error( + dfdftrb <- process_locs_vector(df, "EPSG:4326", radius = 1000L) + ) + testthat::expect_s4_class(dftr1, "SpatVector") + testthat::expect_s4_class(dfsftr, "SpatVector") + testthat::expect_s4_class(dfdftr, "SpatVector") + testthat::expect_s4_class(dfdftrb, "SpatVector") + testthat::expect_true(terra::geomtype(dfdftr) == "points") + testthat::expect_true(terra::geomtype(dfdftrb) == "polygons") +}) diff --git a/vignettes/download_functions.Rmd b/vignettes/download_functions.Rmd index 6ee91083..f180a70a 100644 --- a/vignettes/download_functions.Rmd +++ b/vignettes/download_functions.Rmd @@ -1,46 +1,42 @@ --- -title: "download_data() and NASA EarthData Account" +title: "download_data and NASA EarthData Account" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{download_data() and NASA EarthData Account} + %\VignetteIndexEntry{download_data and NASA EarthData Account} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} -date: "2024-01-18" +date: "2024-05-02" author: "Mitchell Manware" --- -```{r, echo = FALSE, warning = FALSE, message = FALSE} +```{r, echo = FALSE, warning = FALSE, message = FALSE, results = "hide"} # packages library(knitr) library(testthat) +library(devtools) # source functions -sapply( - list.files( - "../R/", - pattern = "download", - full.names = TRUE - ), - source -) +devtools::load_all("../") ``` ## Motivation -The `data_download()` function was developed to improve researchers' access to publicly available geospatial datasets. Although the data are already available online, using a web browser to manually download hundreds or thousands of data files is slow, arduous, and not (efficiently) repeatable. Additionally, as users may not be familiar with creating download recipes in Bash (Unix shell), `data_download()` allows researchers to download data directly with R, a common coding language in the field of environmental health research. Finally, function-izing data downloads is useful for repeated code or automated analysis pipelines. +The `data_download` function was developed to improve researchers' access to publicly available geospatial datasets. Although the data are already available online, using a web browser to manually download hundreds or thousands of data files is slow, arduous, and not efficiently repeatable. A function which downloads raw data files onto the user's machine allows for re + +Additionally, as users may not be familiar with creating download recipes in Bash (Unix shell), `data_download` allows researchers to download data directly with R, a common coding language in the field of environmental health research. Finally, function-izing data downloads is useful for repeated code or automated analysis pipelines. -## data_download() +## data_download -`data_download()` is capable of accessing and downloading geospatial datasets, collections, and variables from a variety of sources. This wrapper function calls on source-specific data download functions, each utilizing a unique combination of input parameters, host URL, naming convention, and data formats. +`data_download` is capable of accessing and downloading geospatial datasets, collections, and variables from a variety of sources. This wrapper function calls on source-specific data download functions, each utilizing a unique combination of input parameters, host URL, naming convention, and data formats. ```{r, echo = FALSE} functions <- c( - "download_aqs_data", "download_ecoregion_data", - "download_geos_cf_data", "download_gmted_data", - "download_koppen_geiger_data", "download_merra2_data", - "download_narr_monolevel_data", "download_narr_p_levels_data", - "download_nlcd_data", "download_hms_data", - "download_sedac_groads_data", "download_sedac_population_data", - "download_modis_data" + "download_aqs", "download_ecoregion", + "download_geos_cf_data", "download_gmted", + "download_koppen_geiger", "download_merra2", + "download_narr_monolevel", "download_narr_p_levels", + "download_nlcd", "download_hms", + "download_sedac_groads", "download_sedac_population", + "download_modis" ) source <- c( "US EPA Air Data Pre-Generated Data Files", @@ -102,18 +98,18 @@ kable(functions_sources, ) ``` -It is important to note that `data_download()` calls a source-specific function based on the `dataset_name =` parameter. Using the source-specific function directly will return the exact same data (**if the parameters are the same**), but `data_download()` may be beneficial if using a `for` loop to download data from various sources. For example, `download_data(dataset_name = "hms", ...)` will return the same data as `download_hms_data(...)` assuming that `...` indicates the same parameters. +It is important to note that `data_download` calls a source-specific function based on the `dataset_name` parameter. Using the source-specific function directly will return the exact same data (**if the parameters are the same**), but the error messages produced by each differ slightly/ ### Parameters User-defined parameters differ based on the data source. Required parameters for each source can be checked with `names(formals())`. ```{r} -names(formals(download_hms_data)) -names(formals(download_narr_monolevel_data)) +names(formals(download_hms)) +names(formals(download_narr_monolevel)) ``` -The two functions have different required parameters because `download_hms_data()` uses a daily temporal resolution while `download_narr_monolevel_data()` uses yearly, but they share some common, standard parameters. +The two functions have different required parameters because `download_hms` uses a daily temporal resolution while `download_narr_monolevel` uses yearly, but they share some common, standard parameters. #### Standard parameters @@ -152,11 +148,11 @@ colnames(parameter_descriptions) <- c("Parameter", "Type", "Description") kable(parameter_descriptions) ``` -Additionally, the `dataset_name =` parameter must be specified when using `data_download()`, but is assumed when using a source-specific download function. +Additionally, the `dataset_name` parameter must be specified when using `data_download`, but is assumed when using a source-specific download function. ### Function Structure -Although each source-specific download function is unique, they all follow the same general structure. The following chunks of code have been **adopted** from `download_hms_data()` to demonstrate the functions' structure. +Although each source-specific download function is unique, they all follow the same general structure. The following chunks of code have been adopted from `download_hms` to demonstrate the functions' structure. [1. Clean Parameters] @@ -196,7 +192,7 @@ date_sequence #### 2. Generate download URLs -The URL base and pattern are identified by manually inspecting the download link on the source-specific web page. `download_hms_data()` utilizes the year, month, date, and data format to generate the download url. +The URL base and pattern are identified by manually inspecting the download link on the source-specific web page. `download_hms` utilizes the year, month, date, and data format to generate the download url. ```{r} # user defined parameters @@ -257,13 +253,13 @@ A download URL is created for each date in `date_sequence` based on the fixed pa #### 4. Initiate "...commands.txt" -An important aspect of the data download function is its `sink()...cat()...sink()` structure. Rather than using the `utils::download.file()` function, a text file is created to store all of the download commands generated from the URLs and file names. +An important aspect of the data download function is its `sink...cat...sink` structure. Rather than using the `utils::download.file` function, a text file is created to store all of the download commands generated from the URLs and file names. This structure is utilized for several reasons: - Consistent structure for all the source-specific download functions. -- The `download.file()` function cannot accept vectors of URLs and destination files for downloading. An additional `for` loop to download data will increase function complexity and may reduce performance. +- The `download.file` function cannot accept vectors of URLs and destination files for downloading. An additional `for` loop to download data will increase function complexity and may reduce performance. - Writing commands in Bash (Unix shell) script allows for specific arguments and flags. @@ -311,7 +307,7 @@ for (d in seq_along(date_sequence)) { #### 6. Finalize "...commands.txt" -After the download commands have been concatenated to the commands text file, a second `sink()` command is run to finalize the file and stop the appending of R output. +After the download commands have been concatenated to the commands text file, a second `sink` command is run to finalize the file and stop the appending of R output. ```{r, eval = FALSE} sink() @@ -330,7 +326,7 @@ system_command <- paste0( system_command ``` -Running the `system_command` deploys a "helper function", `download_run()`, a function created to reduce repeated code across the source-specific download functions. The function takes two parameters, `system_command =`, which indicates the command to be run, and `download =`, a user-defined logical parameter. +Running the `system_command` deploys a "helper function", `download_run`, a function created to reduce repeated code across the source-specific download functions. The function takes two parameters, `system_command`, which indicates the command to be run, and `download`, a user-defined logical parameter. ```{r} download_run <- function( @@ -347,7 +343,7 @@ download_run <- function( } ``` -The data download is initiated by running `download_run()` with the system command identified and `download = TRUE`. +The data download is initiated by running `download_run` with the system command identified and `download = TRUE`. ```{r, eval = FALSE} download_run( @@ -369,9 +365,9 @@ paste0("hms_smoke_Shapefile_", date_sequence, ".zip") #### 8. Zip files (if applicable) {#zip-files-if-applicable} -All of the source-specific data download functions follow this general pattern, but those functions which download zip files require additional steps to inflate and remove the downloaded zip files, if desired. Each of these two steps are run by helper functions, and they are run by the user-defined `unzip = ` and `remove_zip = ` parameters in `data_download()`. +All of the source-specific data download functions follow this general pattern, but those functions which download zip files require additional steps to inflate and remove the downloaded zip files, if desired. Each of these two steps are run by helper functions, and they are run by the user-defined `unzip` and `remove_zip` parameters in `data_download`. -`download_unzip()` inflates zip files if `unzip = TRUE`, and skips inflation if `unzip = FALSE`. +`download_unzip` inflates zip files if `unzip = TRUE`, and skips inflation if `unzip = FALSE`. ```{r} download_unzip <- @@ -394,7 +390,7 @@ download_unzip <- } ``` -`download_remove_zips()` removes the downloaded zip files if `remove = TRUE`, and skips removal if `remove = FALSE`. +`download_remove_zips` removes the downloaded zip files if `remove = TRUE`, and skips removal if `remove = FALSE`. ```{r} download_remove_zips <- @@ -464,7 +460,7 @@ The previous outline successfully cleaned parameters, generated URLs, and downlo ### Helper functions -`read_commands()` imports the commands text file and converts the data frame to a vector. +`read_commands` imports the commands text file and converts the data frame to a vector. ```{r} read_commands <- function( @@ -475,7 +471,7 @@ read_commands <- function( } ``` -`extract_urls()` extracts each download URL from the vector of commands. The `position =` of the URL within the download command is determined in [5. Concatenate download commands]. +`extract_urls` extracts each download URL from the vector of commands. The `position` of the URL within the download command is determined in [5. Concatenate download commands]. ```{r} # function to extract URLs from vector @@ -495,7 +491,7 @@ extract_urls <- function( } ``` -`check_url_status()` is the most important of the download test "helper" functions. This function utilizes `httr::HEAD()` and `httr::GET()` to check the HTTP response status of a given URL. The desired HTTP response status is 200, which means the URL is valid and accessible. `check_url_status()` returns a logical value to indicate whether the URL returns HTTP status 200 (`TRUE`) or not (`FALSE`). For more information on HTTP status', see [HTTP response status codes](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status). +`check_url_status` is the most important of the download test "helper" functions. This function utilizes `httr::HEAD` and `httr::GET` to check the HTTP response status of a given URL. The desired HTTP response status is 200, which means the URL is valid and accessible. `check_url_status` returns a logical value to indicate whether the URL returns HTTP status 200 (`TRUE`) or not (`FALSE`). For more information on HTTP status', see [HTTP response status codes](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status). ```{r} check_url_status <- function( @@ -512,7 +508,7 @@ check_url_status <- function( } ``` -`check_urls()` applies `check_url_status()` to a random sample of URLs extracted by `extract_urls()`. The sample size will vary based on the dataset and spatio-temporal parameters being tested. The function returns a logical vector containing the output from `check_url_status()`. +`check_urls` applies `check_url_status` to a random sample of URLs extracted by `extract_urls`. The sample size will vary based on the dataset and spatio-temporal parameters being tested. The function returns a logical vector containing the output from `check_url_status`. ```{r} check_urls <- function( @@ -537,7 +533,7 @@ check_urls <- function( ### testthat -To demonstrate a test in action, test the URLs generated by `download_data()` for the NOAA HMS Smoke dataset. +To demonstrate a test in action, test the URLs generated by `download_data` for the NOAA HMS Smoke dataset. For more information see [testthat](https://testthat.r-lib.org/). @@ -549,14 +545,13 @@ testthat::test_that( # parameters test_start <- "2023-12-28" test_end <- "2024-01-02" - test_directory <- "./data" + test_directory <- "./data/" # download download_data( dataset_name = "noaa", date_start = test_start, date_end = test_end, data_format = "Shapefile", - directory_to_download = test_directory, directory_to_save = test_directory, acknowledgement = TRUE, download = FALSE, @@ -597,7 +592,6 @@ testthat::test_that( date_start = test_start, date_end = test_end, data_format = "Shapefile", - directory_to_download = test_directory, directory_to_save = test_directory, acknowledgement = TRUE, download = FALSE, @@ -626,9 +620,9 @@ testthat::test_that( ``` -Although the `testthat::test_that(...)` chunk contains 32 lines of code, the unit test is performed by `expect_true(all(url_status))`. In words, this line is expecting (`expect_true()`) that all (`all()`) of the sampled URLs return HTTP response status 200 (`url_status`). Since this expectation was met, the test passed! +Although the `testthat::test_that(...)` chunk contains 32 lines of code, the unit test is performed by `expect_true(all(url_status))`. In words, this line is expecting (`expect_true`) that all (`all`) of the sampled URLs return HTTP response status 200 (`url_status`). Since this expectation was met, the test passed! -For an alternate example, we can use a start and end date that are known to not have data. As the URLs associated with these dates do not exist, we expect the function will fail. This test utilizes `expect_error()` because the `data_download()` wrapper function returns an error message if the underlying source-specific download function returns an error. +For an alternate example, we can use a start and end date that are known to not have data. As the URLs associated with these dates do not exist, we expect the function will fail. This test utilizes `expect_error()` because the `data_download` wrapper function returns an error message if the underlying source-specific download function returns an error. ```{r} testthat::test_that( @@ -659,7 +653,7 @@ testthat::test_that( ``` -This test utilizes `expect_error()` because the `data_download()` wrapper function returns an error message if the underlying source-specific download function returns an error. If we directly used the `download_hms_data` function, we would expect and receive an error. +This test utilizes `testthat::expect_error` because the `data_download` wrapper function returns an error message if the underlying source-specific download function returns an error. If we directly used the `download_hms` function, we would expect and receive an error. ```{r} testthat::test_that( @@ -671,7 +665,7 @@ testthat::test_that( test_directory <- "../inst/extdata/" # test for error testthat::expect_error( - download_hms_data( + download_hms( date_start = test_start, date_end = test_end, data_format = "Shapefile", @@ -688,17 +682,20 @@ testthat::test_that( ) ``` +```{r, echo = FALSE, include = FALSE} +file.remove(commands_txt) +``` As expected, the test passes because the NOAA HMS Smoke dataset does not contain data for January 1-2, 1800. -These unit tests are just two of many implemented on `download_data()` and the accompanying source-specific download functions, but they demonstrate how unit testing helps build stable code. +These unit tests are just two of many implemented on `download_data` and the accompanying source-specific download functions, but they demonstrate how unit testing helps build stable code. ## Download Example With the function structure outlined and the unit tests in place, we can now perform a data download. To begin, check the parameters required by the source-specific data download function. ```{r} -names(formals(download_hms_data)) +names(formals(download_hms)) ``` Define the parameters. @@ -771,7 +768,7 @@ zips ## NASA EarthData Account -As mentioned in [Motivation], `data_download()` provides access to **publicly available** geospatial data. Although publicly available, some of the NASA data sources require a NASA EarthData Account. +As mentioned in [Motivation], `data_download` provides access to publicly available geospatial data. Although publicly available, some of the NASA data sources require a NASA EarthData Account. For example, the UN WPP-Adjusted population density data from NASA Socioeconomic Data and Applications Center (SEDAC) requires an EarthData account. Without an EarthData Account and the prerequisite files prepared, the data download functions will return an error. @@ -855,13 +852,13 @@ if (.Platform$OS.type == "unix") { } ``` -Create a file named `.netrc` with `file.create()`. +Create a file named `.netrc` with `file.create`. ```{r, eval = FALSE} file.create(".netrc") ``` -Open a connection to `.netrc` with `sink()`. Write the line `machine urs...` replacing `YOUR_USERNAME` and `YOUR_PASSWORD` with your NASA EarthData username and password, respectively. After writing the line, close the connection with `sink()` again. +Open a connection to `.netrc` with `sink`. Write the line `machine urs...` replacing `YOUR_USERNAME` and `YOUR_PASSWORD` with your NASA EarthData username and password, respectively. After writing the line, close the connection with `sink` again. ```{r, eval = FALSE} sink(".netrc") @@ -912,7 +909,7 @@ if (.Platform$OS.type == "unix") { } ``` -Create a file named `.netrc` with `file.create()`. +Create a file named `.netrc` with `file.create`. ```{r, eval = FALSE} file.create(".urs_cookies") @@ -942,13 +939,13 @@ if (.Platform$OS.type == "unix") { } ``` -Create a file named ".dodsrc" with `file.create()` +Create a file named ".dodsrc" with `file.create`. ```{r, eval = FALSE} file.create(".dodsrc") ``` -Open a connection to `.dodsrc` with `sink()`. Write the lines beginning with `HTTP.`, replacing `YOUR_USERNAME` and `YOUR_PASSWORD` with your NASA EarthData username and password, respectively. After writing the line, close the connection with `sink()` again. +Open a connection to `.dodsrc` with `sink`. Write the lines beginning with `HTTP.`, replacing `YOUR_USERNAME` and `YOUR_PASSWORD` with your NASA EarthData username and password, respectively. After writing the line, close the connection with `sink` again. ```{r, eval = FALSE} sink(".dodsrc") @@ -984,7 +981,7 @@ paste0( ) ``` -If working on a **Windows** machine, copy the `.dodsrc` file to the project working directory. Replace `YOUR_WORKING_DIRECTORY` with the absolute path to the project working directory. +If working on a Windows machine, copy the `.dodsrc` file to the project working directory. Replace `YOUR_WORKING_DIRECTORY` with the absolute path to the project working directory. ```{r} if (.Platform$OS.type == "windows") { @@ -1005,7 +1002,6 @@ download_data( year = "2020", data_format = "GeoTIFF", data_resolution = "60 minute", - directory_to_download = "./sedac_population/", directory_to_save = "./sedac_population", acknowledgement = TRUE, download = TRUE, @@ -1055,8 +1051,8 @@ sedac_files ## Code Example -The following is the entire R code used to create `download_hms_data()`. +The following is the entire R code used to create `download_hms`. ```{r} -download_hms_data +download_hms ``` diff --git a/vignettes/images/readme_issues.png b/vignettes/images/readme_issues.png new file mode 100644 index 00000000..5e90a628 Binary files /dev/null and b/vignettes/images/readme_issues.png differ