reshape examples/demo (#193)

* sim: update docs; remove reshape example #157 * sp3 example data.table patch #157 * update aqp demo to use data.table #157 * docs * fix missing comment example
ncss-tech · Jan 22, 2021 · 799ec15 · 799ec15
1 parent 5d25d50
commit 799ec15
Show file tree

Hide file tree

Showing 6 changed files with 391 additions and 194 deletions.
diff --git a/R/permute_profile.R b/R/permute_profile.R
@@ -7,7 +7,11 @@
 #' @param min.thickness Minimum thickness of permuted horizons (default: 1)
 #' @param soildepth Depth below which horizon depths are not permuted (default: NULL)
 #' @param new.idname New column name to contain unique profile ID (default: pID)
-#' @description This method is most "believable" when used to _gently_ permute the data, on the order of moving boundaries a few centimeters in either direction. The nice thing about it is it can leverage semi-quantitative (ordered factor) levels of boundary distinctness/topography for the upper and lower boundary of individual horizons, given a set of assumptions to convert classes to a "standard deviation" (see example).
+#' 
+#' @description "Perturbs" the **boundary between horizons** using a standard deviation thickness. The thickness standard deviation corresponds roughly to the concept of "horizon boundary distinctness." This is arguably "easier" to parameterize from something like a single profile description where boundary distinctness classes (based on vertical distance of transition) are recorded for each horizon. 
+#' 
+#' @details
+#'This method is most "believable" when used to _gently_ permute the data, on the order of moving boundaries a few centimeters in either direction. The nice thing about it is it can leverage semi-quantitative (ordered factor) levels of boundary distinctness/topography for the upper and lower boundary of individual horizons, given a set of assumptions to convert classes to a "standard deviation" (see example).
 #'
 #' If you imagine a normal curve with its mean centered on the vertical (depth axis) at a RV horizon depth. By the Empirical Rule for Normal distribution, two "standard deviations" above or below that RV depth represent 95% of the "volume" of the boundary.
 #'
@@ -18,11 +22,15 @@
 #' Future implementations may use boundary topography as a second hierarchical level (e.g. trig-based random functions), but think that distinctness captures the "uncertainty" about horizon separation at a specific "point" on the ground (or line in the profile quite well, and the extra variation may be hard to interpret, in general.
 #'
 #' @return A SoilProfileCollection with n permutations of p.
+#' 
+#' @seealso [random_profile()] [sim()] [hzDistinctnessCodeToOffset()]
+#' 
 #' @export permute_profile
 #' @author Andrew G. Brown
 #'
 #' @examples
-#' # # example with sp1 (using boundary distinctness)
+#' 
+#' # example with sp1 (using boundary distinctness)
 #' data("sp1")
 #' depths(sp1) <- id ~ top + bottom
 #'
@@ -41,19 +49,13 @@
 #'
 #' quantile(sp1$bound_sd, na.rm = TRUE)
 #' p <- sp1[3]
-
-# # example with loafercreek (no boundaries)
-# library(soilDB)
-# data("loafercreek")
-# #
-# # # assume boundary sd is 1/12 midpoint of horizon depth
-# #  (i.e. generally increases/less well known with depth)
-# #
-# loafercreek <- mutate(loafercreek, midpt = (hzdepb - hzdept) / 2 + hzdept,
-# #                                    bound_sd = midpt / 12)
-# quantile(loafercreek$bound_sd)
-# p <- loafercreek[1]
-
+#' 
+#' # assume boundary sd is 1/12 midpoint of horizon depth
+#' # (i.e. general relationship: SD increases (less well known) with depth)
+#' sp1 <- mutate(sp1, midpt = (bottom - top) / 2 + top, bound_sd = midpt / 12)
+#' quantile(sp1$bound_sd)
+#' p <- sp1[1]
+#' 
 permute_profile <- function(p, n = 100, id = NULL, boundary.attr,
                             min.thickness = 1,
                             soildepth = NULL, new.idname = 'pID') {

diff --git a/R/sim.R b/R/sim.R
@@ -1,4 +1,101 @@
-# function is more useful when supplied with a meaningful sd for each horizon
+# 
+#' Simulate Soil Profiles
+#' 
+#' @description Simulate a collection of soil profiles based on the horizonation of a single soil profile. 
+#' 
+#' The function is most useful when supplied with a meaningful standard deviation in thickness for each horizon -- which generally implies an aggregation of **several** profiles (say, using some generalized horizon pattern).
+#' 
+#' This contrasts with a similar approach in [permute_profile()] -- which "perturbs" the **boundary between horizons** using a standard deviation of "horizon transition zone" thickness. This thickness standard deviation corresponds roughly to the concept of "horizon boundary distinctness."
+#'
+#' @param x a SoilProfileCollection object containing a single profile from which to draw simulated data
+#' @param n the number of requested simulations
+#' @param iterations sampling iterations used to determine each horizon thickness
+#' @param hz.sd standard deviation used to simulate horizon thickness, can be a vector but must divide evenly into the number of horizons found in \code{x}
+#' @param min.thick minimum horizon thickness allowed in simulation results
+#'
+#' @return A SoilProfileCollection object with \code{n} simulated profiles, each containing the same number of horizons and same data as \code{x}
+#' 
+#' @details This function generates a collection of simulated soil profiles based on the horizon thickness data associated with a single "template" profile. Simulation is based on sampling from a family of gaussian distribution with means defined by the "template" profile and standard deviation defined by the user.
+#' 
+#' @export
+#' 
+#' @seealso \code{\link{random_profile}}  \code{\link{permute_profile}}
+#' 
+#' @author D. E. Beaudette
+#' 
+#' @examples
+#' 
+#' # load sample data and convert into SoilProfileCollection
+#' data(sp3)
+#' depths(sp3) <- id ~ top + bottom
+#' 
+#' # select a profile to use as the basis for simulation
+#' s <- sp3[3,]
+#' 
+#' # reset horizon names
+#' s$name <- paste('H', seq_along(s$name), sep = '')
+#' 
+#' # simulate 25 new profiles, using 's' and function defaults
+#' sim.1 <- sim(s, n = 25)
+#' 
+#' # simulate 25 new profiles using 's' and variable SD for each horizon
+#' sim.2 <- sim(s, n = 25, hz.sd = c(1, 2, 5, 5, 5, 10, 3))
+#' 
+#' # plot
+#' par(mfrow = c(2, 1), mar = c(0, 0, 0, 0))
+#' plot(sim.1)
+#' mtext(
+#'   'SD = 2',
+#'   side = 2,
+#'   line = -1.5,
+#'   font = 2,
+#'   cex = 0.75
+#' )
+#' plot(sim.2)
+#' mtext(
+#'   'SD = c(1, 2, 5, 5, 5, 10, 3)',
+#'   side = 2,
+#'   line = -1.5,
+#'   font = 2,
+#'   cex = 0.75
+#' )
+#' 
+#' # aggregate horizonation of simulated data
+#' # note: set class_prob_mode=2 as profiles were not defined to a constant depth
+#' sim.2$name <- factor(sim.2$name)
+#' a <- slab(sim.2, ~ name, class_prob_mode = 2)
+#' 
+#' # convert to long format for plotting simplicity
+#' library(data.table)
+#' a.long <-
+#'   melt(a,
+#'        id.vars = c('top', 'bottom'),
+#'        measure.vars = levels(sim.2$name))
+#' 
+#' # plot horizon probabilities derived from simulated data
+#' # dashed lines are the original horizon boundaries
+#' library(lattice)
+#' 
+#' xyplot(
+#'   top ~ value,
+#'   groups = variable,
+#'   data = a.long,
+#'   subset = value > 0,
+#'   ylim = c(100,-5),
+#'   type = c('l', 'g'),
+#'   asp = 1.5,
+#'   ylab = 'Depth (cm)',
+#'   xlab = 'Probability',
+#'   auto.key = list(
+#'     columns = 4,
+#'     lines = TRUE,
+#'     points = FALSE
+#'   ),
+#'   panel = function(...) {
+#'     panel.xyplot(...)
+#'     panel.abline(h = s$top, lty = 2, lwd = 2)
+#'   }
+#' )
 sim <- function(x, n=1, iterations=25, hz.sd=2, min.thick=2) {	
 
   hd <- horizonDepths(x)

diff --git a/demo/aqp.R b/demo/aqp.R
@@ -1,19 +1,20 @@
 ##
-## main demo for AQP package-- a work in progress, largely just material from help files
+## main demo for AQP package
 ##
 
-# required packages
-require(aqp)
-require(ape)
-require(cluster)
-require(lattice)
-require(reshape)
+# load packages
+library(aqp)
+library(ape)
+library(cluster)
+library(lattice)
+library(data.table)
 
+# Example 1: sp1: 9 soil profiles from Pinnacles National Monument, CA.
+data(sp1)
 
 # 
 # 1. basic profile aggregation and plotting
 # 
-data(sp1)
 depths(sp1) <- id ~ top + bottom
 
 # aggregate all profiles into 1,5,10,20 cm thick slabs, computing mean values by slab
@@ -29,87 +30,108 @@ s20 <- slab(sp1, ~ prop, slab.fun=mean, na.rm=TRUE, slab.structure=20)
 head(s1)
 
 # variation in segment-weighted mean property: very little
-round(sapply(
-list(s1, s5, s10, s20), 
-function(i) {
-	with(i, sum((bottom - top) * value) / sum(bottom - top)) 
-	}
-), 1)
+round(sapply(list(s1, s5, s10, s20),
+             function(i) {
+               with(i, sum((bottom - top) * value) / sum(bottom - top))
+             }), 1)
 
 # combined viz
-g2 <- make.groups("1cm interval"=s1, "5cm interval"=s5, 
-"10cm interval"=s10, "20cm interval"=s20)
+g2 <- make.groups(
+  "1cm interval" = s1,
+  "5cm interval" = s5,
+  "10cm interval" = s10,
+  "20cm interval" = s20
+)
 
 # note special syntax for plotting step function
-xyplot(cbind(top,bottom) ~ value, groups=which, data=g2, id=g2$which,
-panel=panel.depth_function, ylim=c(250,-10), 
-scales=list(y=list(tick.number=10)), xlab='Property', 
-ylab='Depth (cm)', main='Soil Profile Aggregation by Regular Depth-slice',
-auto.key=list(columns=2, points=FALSE, lines=TRUE)
+xyplot(
+  cbind(top, bottom) ~ value,
+  groups = which,
+  data = g2,
+  id = g2$which,
+  panel = panel.depth_function,
+  ylim = c(250, -10),
+  scales = list(y = list(tick.number = 10)),
+  xlab = 'Property',
+  ylab = 'Depth (cm)',
+  main = 'Soil Profile Aggregation by Regular Depth-slice',
+  auto.key = list(
+    columns = 2,
+    points = FALSE,
+    lines = TRUE
+  )
 )
 
-
+# Example 2: sp3: 10 soil profiles from the Sierra Nevada Foothills Region of California.
+data(sp3)
 
 # 
 # 2. investigate the concept of a 'median profile'
 # note that this involves aggregation between two dissimilar groups of soils
 # 
-data(sp3)
 
 # generate a RGB version of soil colors
 # and convert to HSV for aggregation
-sp3$h <- NA ; sp3$s <- NA ; sp3$v <- NA
-sp3.rgb <- with(sp3, munsell2rgb(hue, value, chroma, return_triplets=TRUE))
-sp3[, c('h','s','v')] <- t(with(sp3.rgb, rgb2hsv(r, g, b, maxColorValue=1)))
+sp3$h <- NA
+sp3$s <- NA
+sp3$v <- NA
+sp3.rgb <- with(sp3, munsell2rgb(hue, value, chroma, return_triplets = TRUE))
+sp3[, c('h', 's', 'v')] <- t(with(sp3.rgb, rgb2hsv(r, g, b, maxColorValue = 1)))
 
 # promote to SoilProfileCollection
 depths(sp3) <- id ~ top + bottom
 
 # aggregate across entire collection
-a <- slab(sp3, fm= ~ clay + cec + ph + h + s + v, slab.structure=10)
+a <- slab(sp3,
+          fm = ~ clay + cec + ph + h + s + v,
+          slab.structure = 10)
 
 # check
 str(a)
 
 # convert back to wide format
-library(reshape)
-a.wide.q25 <- cast(a, top + bottom ~ variable, value=c('p.q25'))
-a.wide.q50 <- cast(a, top + bottom ~ variable, value=c('p.q50'))
-a.wide.q75 <- cast(a, top + bottom ~ variable, value=c('p.q75'))
+a.wide.q25 <- dcast(as.data.table(a), top + bottom ~ variable, value.var = c('p.q25'))
+a.wide.q50 <- dcast(as.data.table(a), top + bottom ~ variable, value.var = c('p.q50'))
+a.wide.q75 <- dcast(as.data.table(a), top + bottom ~ variable, value.var = c('p.q75'))
 
 # add a new id for the 25th, 50th, and 75th percentile pedons
 a.wide.q25$id <- 'Q25'
 a.wide.q50$id <- 'Q50'
 a.wide.q75$id <- 'Q75'
 
 # combine original data with "mean profile"
-vars <- c('top','bottom','id','clay','cec','ph','h','s','v')
+vars <- c('id', 'top', 'bottom', 'clay', 'cec', 'ph', 'h', 's', 'v')
+
 # make data.frame version of sp3
-sp3.df <- as(sp3, 'data.frame')
-sp3.grouped <- rbind(
-sp3.df[, vars], a.wide.q25[, vars], a.wide.q50[, vars], a.wide.q75[, vars]
-)
+sp3.grouped <- as.data.frame(rbind(as.data.table(horizons(sp3))[, .SD, .SDcol = vars], 
+                     a.wide.q25[, .SD, .SDcol = vars],
+                     a.wide.q50[, .SD, .SDcol = vars], 
+                     a.wide.q75[, .SD, .SDcol = vars]))
 
 # re-constitute the soil color from HSV triplets
 # convert HSV back to standard R colors
 sp3.grouped$soil_color <- with(sp3.grouped, hsv(h, s, v))
 
 # give each horizon a name
-sp3.grouped$name <- paste(round(sp3.grouped$clay), '/' , 
-round(sp3.grouped$cec), '/', round(sp3.grouped$ph,1))
-
+sp3.grouped$name <- paste(round(sp3.grouped$clay), '/' ,
+                          round(sp3.grouped$cec), '/',
+                          round(sp3.grouped$ph, 1))
 
+plot(sp3.grouped)
 
 ## perform comparison, and convert to phylo class object
 ## D is rescaled to [0,]
-d <- profile_compare(sp3.grouped, vars=c('clay','cec','ph'), max_d=100, 
-k=0.01, replace_na=TRUE, add_soil_flag=TRUE, rescale.result=TRUE)
-
-require(cluster)
-h <- agnes(d, method='ward')
+d <- profile_compare(sp3.grouped,
+                     vars = c('clay', 'cec', 'ph'),
+                     max_d = 100,
+                     k = 0.01,
+                     replace_na = TRUE,
+                     add_soil_flag = TRUE, 
+                     rescale.result = TRUE)
+
+h <- agnes(d, method = 'ward')
 p <- ladderize(as.phylo(as.hclust(h)))
 
-
 # look at distance plot-- just the median profile
 plot_distance_graph(d, 12)
 
@@ -121,9 +143,14 @@ round(1 - (as.matrix(d)[12, ] / max(as.matrix(d)[12, ])), 2)
 depths(sp3.grouped) <- id ~ top + bottom
 
 # setup plot: note that D has a scale of [0,1]
-par(mar=c(1,1,1,1))
-p.plot <- plot(p, cex=0.8, label.offset=3, direction='up', y.lim=c(2,0), 
-x.lim=c(1.25,length(sp3.grouped)+1), show.tip.label=FALSE)
+par(mar = c(1, 1, 1, 1))
+p.plot <- plot(p,
+               cex = 0.8,
+               label.offset = 3,
+               direction = 'up',
+               y.lim = c(2, 0),
+               x.lim = c(1.25, length(sp3.grouped) + 1),
+               show.tip.label = FALSE)
 
 # get the last plot geometry
 lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
@@ -132,10 +159,18 @@ lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
 d.labels <- attr(d, 'Labels')
 
 new_order <- sapply(1:lastPP$Ntip,
-function(i) which(as.integer(lastPP$xx[1:lastPP$Ntip]) == i))
+                    function(i)
+                      which(as.integer(lastPP$xx[1:lastPP$Ntip]) == i))
 
 # plot the profiles, in the ordering defined by the dendrogram
 # with a couple fudge factors to make them fit
-plot(sp3.grouped, color="soil_color", plot.order=new_order,
-scaling.factor=0.01, width=0.1, cex.names=0.5,
-y.offset=max(lastPP$yy)+0.1, add=TRUE)
+plotSPC(
+  sp3.grouped,
+  color = "soil_color",
+  plot.order = new_order,
+  scaling.factor = 0.01,
+  width = 0.1,
+  cex.names = 0.5,
+  y.offset = max(lastPP$yy) + 0.1,
+  add = TRUE
+)