Skip to content

Commit

Permalink
Implement data.table checkHzDepthLogic RE #157, #174 (#190)
Browse files Browse the repository at this point in the history
* checkHzDepthLogic: use data.table instead of profileApply #157 #174

* data.table: R CMD check-safe .() -> list() and define "undefined globals" as null locally #157 #174

* #190; looks like this got uncommented when I rebased onto latest changes
  • Loading branch information
brownag committed Jan 21, 2021
1 parent 5cd4427 commit 006ed6d
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 119 deletions.
141 changes: 104 additions & 37 deletions R/checkHzDepthLogic.R
Original file line number Diff line number Diff line change
@@ -1,51 +1,118 @@

## related issues:
# ## https://github.com/ncss-tech/aqp/issues/65

## general-purpose hz depth logic check
# assumes that data are sorted ID, top ASC
# x: SoilProfileCollection object to check
checkHzDepthLogic <- function(x) {
#' Check a SoilProfileCollection object for errors in horizon depths.
#'
#' @description This function inspects a SoilProfileCollection object, looking for four common errors in horizon depths:
#'
#' 1. bottom depth shallower than top depth
#' 2. equal top and bottom depth
#' 3. missing top or bottom depth (e.g. `NA`)
#' 4. gap or overlap between adjacent horizons
#'
#' @param x SoilProfileCollection object to check
#'
#' @param fast If details about specific test results are not needed, the operation can allocate less memory and run approximately 5x faster. Default: `FALSE`
#'
#' @return A `data.frame` containing profile IDs, validity boolean (`valid`) and test results if `fast = FALSE`.
#'
#' The `data.frame` will have as many rows as profiles in `x` (`length(x)`).
#'
#' - `id` : Profile IDs, named according to `idname(x)`
#' - `valid` : boolean, profile passes all of the following tests
#' - `depthLogic` : boolean, errors related to depth logic
#' - `sameDepth` : boolean, errors related to same top/bottom depths
#' - `missingDepth` : boolean, NA in top / bottom depths
#' - `overlapOrGap` : boolean, gaps or overlap in adjacent horizons
#'
#' @export
#' @author D.E. Beaudette, A.G. Brown
#' @examples
#'
#' ## sample data
#'
#' data(sp3)
#' depths(sp3) <- id ~ top + bottom
#'
#' # these data should be clean
#' res <- checkHzDepthLogic(sp3)
#'
#' head(res)
#'
#' # less memory if only concerned about net validity
#' res <- checkHzDepthLogic(sp3, fast = TRUE)
#'
#' head(res)
#'
checkHzDepthLogic <- function(x, fast = FALSE) {

# used inside / outside of scope of .check()
htb <- horizonDepths(x)
stopifnot(inherits(x, 'SoilProfileCollection'))
h <- data.table::as.data.table(horizons(x))
hzd <- horizonDepths(x)
idn <- idname(x)
hby <- substitute(idn)
top <- substitute(hzd[1])
bottom <- substitute(hzd[2])

.check <- function(i) {
# extract pieces
h <- horizons(i)
res <- NULL

# data.table R CMD check
tests <- NULL

if (!fast) {

# convenience vars
ID.i <- h[[idn]][1]
.top <- h[[htb[1]]]
.bottom <- h[[htb[2]]]
res <- h[, list(tests = list(tests = data.frame(t(hzDepthTests(eval(top), eval(bottom)))))), by = c(eval(hby))][,
list(tests = tests, valid = all(!tests[[1]])), by = c(eval(hby))]

# hzTests takes two numeric vectors and returns named logical
test <- hzDepthTests(.top, .bottom)
res <- cbind(res, data.table::rbindlist(res$tests))
res$tests <- NULL

# pack into DF, 1 row per profile
res <- data.frame(
.id=ID.i,
depthLogic=test[1],
sameDepth=test[2],
missingDepth=test[3],
overlapOrGap=test[4],
stringsAsFactors = FALSE
)
} else {

# re-name .id -> idname(x)
names(res)[1] <- idn
res <- h[, all(!hzDepthTests(eval(top), eval(bottom))), by = c(eval(hby))]
colnames(res) <- c(idname(x), "valid")

return(res)
}

# iterate over profiles, result is safely packed into a DF ready for splicing into @site
res <- profileApply(x, .check, simplify = FALSE, frameify = TRUE)
return(as.data.frame(res))

# add 'valid' flag for simple filtering
res[['valid']] <- !apply(res[, -1], 1, any)

return(res)
#
# # used inside / outside of scope of .check()
# htb <- horizonDepths(x)
# idn <- idname(x)
#
# .check <- function(i) {
# # extract pieces
# h <- horizons(i)
#
# # convenience vars
# ID.i <- h[[idn]][1]
# .top <- h[[htb[1]]]
# .bottom <- h[[htb[2]]]
#
# # hzTests takes two numeric vectors and returns named logical
# test <- hzDepthTests(.top, .bottom)
#
# # pack into DF, 1 row per profile
# res <- data.frame(
# .id=ID.i,
# depthLogic=test[1],
# sameDepth=test[2],
# missingDepth=test[3],
# overlapOrGap=test[4],
# stringsAsFactors = FALSE
# )
#
# # re-name .id -> idname(x)
# names(res)[1] <- idn
#
# return(res)
# }
#
# # iterate over profiles, result is safely packed into a DF ready for splicing into @site
# res <- profileApply(x, .check, simplify = FALSE, frameify = TRUE)
#
# # add 'valid' flag for simple filtering
# res[['valid']] <- !apply(res[, -1], 1, any)
#
# return(res)
}

#' @title Tests of horizon depth logic
Expand Down
88 changes: 44 additions & 44 deletions R/test_hz_logic.R
Original file line number Diff line number Diff line change
@@ -1,44 +1,44 @@
## https://github.com/ncss-tech/aqp/issues/65

# depricated
test_hz_logic <- function(i, topcol, bottomcol, strict=FALSE) {

## not using this function any longer, will remove by aqp 2.0
.Deprecated('checkHzDepthLogic')


# test for NA
if(any(c(is.na(i[[topcol]])), is.na(i[[bottomcol]]))) {
res <- FALSE
names(res) <- 'hz_logic_pass'
return(res)
}

# test for overlapping OR non-contiguous horizon boundaries
if(strict) {
n <- nrow(i)
res <- all.equal(i[[topcol]][-1], i[[bottomcol]][-n])
if(res != TRUE)
res <- FALSE
names(res) <- 'hz_logic_pass'
return(res)
}



# test for overlapping horizons
# note: this will fail if an O horizon is described using the old style O 3--0cm
m <- cbind(i[[topcol]], i[[bottomcol]])
unzipped.depths <- unlist(apply(m, 1, function(i) seq(from=i[1], to=i[2], by=1)))
len.overlapping <- length(which(table(unzipped.depths) > 1))
n.hz <- nrow(i)

# there should be 1 fewer segments of overlap than there are horizons
if(len.overlapping > (n.hz - 1))
res <- FALSE
else
res <- TRUE
names(res) <- 'hz_logic_pass'
return(res)

}
# ## https://github.com/ncss-tech/aqp/issues/65
#
# # deprecated
# test_hz_logic <- function(i, topcol, bottomcol, strict=FALSE) {
#
# ## not using this function any longer, will remove by aqp 2.0
# .Deprecated('checkHzDepthLogic')
#
#
# # test for NA
# if(any(c(is.na(i[[topcol]])), is.na(i[[bottomcol]]))) {
# res <- FALSE
# names(res) <- 'hz_logic_pass'
# return(res)
# }
#
# # test for overlapping OR non-contiguous horizon boundaries
# if(strict) {
# n <- nrow(i)
# res <- all.equal(i[[topcol]][-1], i[[bottomcol]][-n])
# if(res != TRUE)
# res <- FALSE
# names(res) <- 'hz_logic_pass'
# return(res)
# }
#
#
#
# # test for overlapping horizons
# # note: this will fail if an O horizon is described using the old style O 3--0cm
# m <- cbind(i[[topcol]], i[[bottomcol]])
# unzipped.depths <- unlist(apply(m, 1, function(i) seq(from=i[1], to=i[2], by=1)))
# len.overlapping <- length(which(table(unzipped.depths) > 1))
# n.hz <- nrow(i)
#
# # there should be 1 fewer segments of overlap than there are horizons
# if(len.overlapping > (n.hz - 1))
# res <- FALSE
# else
# res <- TRUE
# names(res) <- 'hz_logic_pass'
# return(res)
#
# }
80 changes: 42 additions & 38 deletions man/checkHzDepthLogic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions misc/aqp2/man_deprecated/checkHzDepthLogic.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
\name{checkHzDepthLogic}
\alias{checkHzDepthLogic}
\alias{test_hz_logic}

\title{Check a \code{SoilProfileCollection} object for errors in horizon depths.}
\description{This function inspects a \code{SoilProfileCollection} object, looking for 4 common errors in horizon depths: 1) bottom depths shallower than top depths, 2) equal top and bottom depths, 3) missing top or bottom depths (e.g. NA), and, 4) gaps or overlap between adjacent horizons.}

\usage{
checkHzDepthLogic(x)
}

\arguments{
\item{x}{a \code{SoilProfileCollection} object}
}

\details{This function replaces \code{test_hz_logic}, now marked as deprecated.}

\value{A \code{data.frame} with as many rows as profiles in \code{x}.
\describe{
\item{id}{Profile IDs, named according to \code{idname(x)}}

\item{depthLogic}{boolean, errors related to depth logic}

\item{sameDepth}{boolean, errors related to same top/bottom depths}

\item{missingDepth}{boolean, NA in top / bottom depths}

\item{overlapOrGap}{boolean, gaps or overlap in adjacent horizons}

\item{valid}{boolean, profile passes all tests}
}
}


\author{D.E. Beaudette}

\note{There is currently no simple way to fix errors identified by this function. Stay tuned for a \code{fixHzDepthErrors()}.}




\examples{

## sample data
data(sp3)
depths(sp3) <- id ~ top + bottom

# these data should be clean
(res <- checkHzDepthLogic(sp3))

}

\keyword{ manip }

0 comments on commit 006ed6d

Please sign in to comment.