diff --git a/R/Log_Likelihood.R b/R/Log_Likelihood.R index 7529cd9..d663715 100755 --- a/R/Log_Likelihood.R +++ b/R/Log_Likelihood.R @@ -12,7 +12,8 @@ #' #' @examples #' @noRd -negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scaling, nscale) { +negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, + scaling, nscale) { parms <- unlog.params(logparms, param.seq, 1) locs.scaled <- vecchia.approx$locsord for (j in 1:nscale) { @@ -20,7 +21,9 @@ negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scali parms[param.seq[2, 1] + j - 1] } vecchia.approx$locsord <- locs.scaled - -vecchia_likelihood(res, vecchia.approx, c(parms[1], 1, parms[param.seq[3, 1]]), parms[param.seq[4, 1]]) + -vecchia_likelihood(res, vecchia.approx, c(parms[1], 1, + parms[param.seq[3, 1]]), + parms[param.seq[4, 1]]) } #' negloglik_vecchia @@ -39,7 +42,8 @@ negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scali #' @noRd negloglik_vecchia <- function(logparms, res, vecchia.approx, param.seq) { parms <- unlog.params(logparms, param.seq, 1) - -vecchia_likelihood(res, vecchia.approx, c(parms[1], parms[2], parms[3]), parms[4]) + -vecchia_likelihood(res, vecchia.approx, c(parms[1], parms[2], parms[3]), + parms[4]) } #' negloglik_full_ST @@ -91,7 +95,8 @@ negloglik.full <- function(logparams, d, y, param.seq) { params <- unlog.params(logparams, param.seq, 1) # d <- fields::rdist(locs) N <- nrow(d) - cov.mat <- params[1] * fields::Matern(d, range = params[2], smoothness = params[3]) + + cov.mat <- params[1] * fields::Matern(d, range = params[2], + smoothness = params[3]) + params[4] * diag(N) return(-1 * mvtnorm::dmvnorm(y, rep(0, N), cov.mat, log = TRUE)) } @@ -128,7 +133,8 @@ mvnegloglik <- function(logparams, vecchia.approx, y, param.seq, P) { ############################################################################## ### Flexible Spatiotemporal Multivariate Matern Negative Loglikelihood Function ########### -mvnegloglik_ST <- function(logparams, vecchia.approx, y, param.seq, P, scaling, nscale) { +mvnegloglik_ST <- function(logparams, vecchia.approx, y, param.seq, P, scaling, + nscale) { # Input- # logparams: A numeric vector of length (4*P)+(4*choose(P,2)). # To construct these parameters we unlist a list of the 7 covariance @@ -236,11 +242,15 @@ create.cov.upper.flex <- function(P, marg.var, marg.range, marg.smooth, j <- combs[iter, 2] smoothness.mat[i, j] <- (marg.smooth[i] + marg.smooth[j]) / 2 - range.mat[i, j] <- 1 / sqrt(((1 / marg.range[i])^2 + (1 / marg.range[j])^2) / 2) + range.mat[i, j] <- 1 / sqrt(((1 / marg.range[i])^2 + + (1 / marg.range[j])^2) / 2) s1 <- sqrt(marg.var[i] * marg.var[j]) - s2 <- ((1 / marg.range[i])^marg.smooth[i] * (1 / marg.range[j])^marg.smooth[j]) / ((1 / range.mat[i, j])^(2 * smoothness.mat[i, j])) - s3 <- gamma(smoothness.mat[i, j]) / (sqrt(gamma(marg.smooth[i])) * sqrt(gamma(marg.smooth[j]))) + s2 <- ((1 / marg.range[i])^marg.smooth[i] * + (1 / marg.range[j])^marg.smooth[j]) / + ((1 / range.mat[i, j])^(2 * smoothness.mat[i, j])) + s3 <- gamma(smoothness.mat[i, j]) / (sqrt(gamma(marg.smooth[i])) * + sqrt(gamma(marg.smooth[j]))) s4 <- R.corr[iter] sig2.mat[i, j] <- s1 * s2 * s3 * s4 } @@ -282,10 +292,12 @@ cat.covariances <- function(locs.list, sig2, range, smoothness, nugget) { # Calculate the covariance matrix - if/then based on its location in the super-matrix N <- nrow(d) if (i == j) { # To accomodate varying size outcomes- the nugget is not included on cross-covariances - cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = smoothness[i, j]) + + cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = + smoothness[i, j]) + nugget[i, j] * diag(N) } else { - cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = smoothness[i, j]) + cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = + smoothness[i, j]) } diff --git a/R/PrestoGP_CreateU_Multivariate.R b/R/PrestoGP_CreateU_Multivariate.R index fdbbe24..bb099f7 100755 --- a/R/PrestoGP_CreateU_Multivariate.R +++ b/R/PrestoGP_CreateU_Multivariate.R @@ -66,7 +66,8 @@ max_min_ordering <- function(locs, dist_func) { #' @param dist_func Any distance function with a signature of dist(query_location, locations_matrix) #' #' @return A vector containing the indices of the neighbors -knn_indices <- function(ordered_locs, query, n_neighbors, dist_func, dist_func_code) { +knn_indices <- function(ordered_locs, query, n_neighbors, + dist_func, dist_func_code) { if (dist_func_code == "custom") { dists <- dist_func(query, ordered_locs) dists_order <- order(dists) @@ -90,7 +91,8 @@ knn_indices <- function(ordered_locs, query, n_neighbors, dist_func, dist_func_c #' @param dist_func Any distance function with a signature of dist(query_location, locations_matrix) #' #' @return A list containing two matrices, each with one row per location: an indices matrix with the indices of nearest neighbors for each location, and a distance matrix with the associated distances -sparseNN <- function(ordered_locs, n_neighbors, dist_func, dist_func_code, ordered_locs_pred = NULL) { +sparseNN <- function(ordered_locs, n_neighbors, + dist_func, dist_func_code, ordered_locs_pred = NULL) { ee <- min(apply(ordered_locs, 2, stats::sd)) n <- nrow(ordered_locs) ordered_locs <- ordered_locs + matrix( @@ -98,17 +100,25 @@ sparseNN <- function(ordered_locs, n_neighbors, dist_func, dist_func_code, order stats::rnorm(n * ncol(ordered_locs)), n, ncol(ordered_locs) ) - indices_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), ncol = n_neighbors) - distances_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), ncol = n_neighbors) + indices_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), + ncol = n_neighbors) + distances_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), + ncol = n_neighbors) for (row in 1:n_neighbors) { # for the locations from 1 to n_neighbors, use the entire locs list to find the neighbors - nn <- knn_indices(ordered_locs[1:(n_neighbors + 1), , drop = FALSE][-row, , drop = FALSE], ordered_locs[row, , drop = FALSE], n_neighbors, dist_func, dist_func_code) + nn <- knn_indices(ordered_locs[1: + (n_neighbors + 1), , drop = FALSE][-row, , + drop = FALSE], + ordered_locs[row, , drop = FALSE], n_neighbors, + dist_func, dist_func_code) indices_matrix[row, 1:n_neighbors] <- nn$indices[1:n_neighbors] distances_matrix[row, 1:n_neighbors] <- nn$distances[1:n_neighbors] } for (row in (n_neighbors + 1):nrow(ordered_locs)) { # get the m nearest neighbors from the locs before this one in the max-min order - nn <- knn_indices(ordered_locs[1:(row - 1), , drop = FALSE], ordered_locs[row, , drop = FALSE], n_neighbors, dist_func, dist_func_code) + nn <- knn_indices(ordered_locs[1:(row - 1), , drop = FALSE], + ordered_locs[row, , drop = FALSE], n_neighbors, + dist_func, dist_func_code) indices_matrix[row, 1:n_neighbors] <- nn$indices[1:n_neighbors] distances_matrix[row, 1:n_neighbors] <- nn$distances[1:n_neighbors] } diff --git a/R/PrestoGP_Model.R b/R/PrestoGP_Model.R index 1e12d45..76f07cd 100755 --- a/R/PrestoGP_Model.R +++ b/R/PrestoGP_Model.R @@ -70,7 +70,7 @@ setMethod("initialize", "PrestoGPModel", function(.Object, ...) { setGeneric("show_theta", function(object, Y_names) standardGeneric("show_theta")) setGeneric("prestogp_fit", function(model, Y, X, locs, scaling = NULL, apanasovich = FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose = FALSE, optim.method = "Nelder-Mead", optim.control = list(trace = 0, reltol = 1e-3, maxit = 5000), parallel = FALSE, foldid = NULL) standardGeneric("prestogp_fit")) setGeneric("prestogp_predict", function(model, X = "matrix", locs = "matrix", m = "numeric", ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) standardGeneric("prestogp_predict")) -setGeneric("calc_covparams", function(model, locs, Y) standardGeneric("calc_covparams")) +setGeneric("calc_covparams", function(model, locs, Y, covparams) standardGeneric("calc_covparams")) setGeneric("specify", function(model, ...) standardGeneric("specify")) setGeneric("compute_residuals", function(model, Y, Y.hat) standardGeneric("compute_residuals")) setGeneric("transform_data", function(model, Y, X) standardGeneric("transform_data")) @@ -81,6 +81,7 @@ setGeneric("scale_locs", function(model, locs) standardGeneric("scale_locs")) setGeneric("theta_names", function(model) standardGeneric("theta_names")) setGeneric("transform_covariance_parameters", function(model) standardGeneric("transform_covariance_parameters")) setGeneric("check_input", function(model, Y, X, locs) standardGeneric("check_input")) +setGeneric("check_input_pred", function(model, X, locs) standardGeneric("check_input_pred")) #' show #' @@ -205,22 +206,35 @@ setMethod( optim.control = list(trace = 0, reltol = 1e-3, maxit = 5000), parallel = FALSE, foldid = NULL) { model <- check_input(model, Y, X, locs) - if (!is.double(beta.hat) && !is.null(beta.hat)) { - stop("The beta.hat parameter must be floating point number.") + if (!is.null(beta.hat)) { + if (!is.vector(beta.hat) | !is.numeric(beta.hat)) { + stop("beta.hat parameter must be a numeric vector") + } + if (length(beta.hat) != (ncol(model@X_train) + 1)) { + stop("Length of beta.hat must match the number of predictors") + } + beta.hat <- as.matrix(beta.hat) } - if (!is.double(tol)) { - stop("The tol parameter must be floating point number.") + if (!is.numeric(tol)) { + stop("tol must be numeric") } - if (is.null(scaling)) { - if (is.matrix(locs)) { - scaling <- rep(1, ncol(locs)) - } else { - scaling <- rep(1, ncol(locs[[1]])) - } + if (length(tol) != 1) { + stop("tol must be a scalar") } - nscale <- length(unique(scaling)) - if (sum(sort(unique(scaling)) == 1:nscale) < nscale) { - stop("scaling must consist of sequential integers between 1 and ncol(locs)") + if (tol<=0 | tol>1) { + stop("tol must satisfy 0= nrow(model@Y_train)) { + warning("Conditioning set size m chosen to be >=n. Changing to m=n-1") + model@n_neighbors <- nrow(model@Y_train) - 1 } model <- specify(model) @@ -390,7 +408,7 @@ setMethod("compute_error", "PrestoGPModel", function(model) { #' @param Y the dependent variable matrix #' #' @return a model with initial covariance parameters -setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y) { +setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y, covparams) { if (!is.list(locs)) { P <- 1 locs <- list(locs) @@ -398,27 +416,68 @@ setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y) { } else { P <- length(locs) } - col.vars <- rep(NA, P) - D.sample.bar <- rep(NA, model@nscale * P) - for (i in 1:P) { - col.vars[i] <- var(Y[[i]]) - N <- length(Y[[i]]) - # TODO find a better way to compute initial spatial range - for (j in 1:model@nscale) { - d.sample <- sample(1:N, max(2, ceiling(N / 50)), replace = FALSE) - D.sample <- rdist(locs[[i]][d.sample, model@scaling == j]) - D.sample.bar[(i - 1) * model@nscale + j] <- mean(D.sample) / 4 - } + pseq <- create.param.sequence(P, model@nscale) + if (is.null(covparams)) { + col.vars <- rep(NA, P) + D.sample.bar <- rep(NA, model@nscale * P) + for (i in 1:P) { + col.vars[i] <- var(Y[[i]]) + N <- length(Y[[i]]) + # TODO find a better way to compute initial spatial range + for (j in 1:model@nscale) { + d.sample <- sample(1:N, max(2, ceiling(N / 50)), replace = FALSE) + D.sample <- rdist(locs[[i]][d.sample, model@scaling == j]) + D.sample.bar[(i - 1) * model@nscale + j] <- mean(D.sample) / 4 + } + } + model@logparams <- create.initial.values.flex( + c(0.9 * col.vars), # marginal variance + D.sample.bar, # range + rep(0.5, P), # smoothness + c(.1 * col.vars), # nuggets + rep(0, choose(P, 2)), + P + ) + } else { + if (P==1) { + if (length(covparams) != pseq[4,2]) { + stop("Incorrect number of parameters in covparams") + } + } else { + if (length(covparams) != pseq[5,2]) { + stop("Incorrect number of parameters in covparams") + } + } + init.var <- covparams[pseq[1,1]:pseq[1,2]] + init.range <- covparams[pseq[2,1]:pseq[2,2]] + init.smooth <- covparams[pseq[3,1]:pseq[3,2]] + init.nugget <- covparams[pseq[4,1]:pseq[4,2]] + if (P>1) { + init.corr <- covparams[pseq[5,1]:pseq[5,2]] + } + else { + init.corr <- 0 + } + if (sum(init.var<=0)>0) { + stop("Initial variance estimates must be positive") + } + if (sum(init.range<=0)>0) { + stop("Initial range estimates must be positive") + } + if (sum(init.nugget<=0)>0) { + stop("Initial nugget estimates must be positive") + } + if (sum(init.smooth<=0)>0 | sum(init.smooth>=2.5)>0) { + stop("Initial smoothness estimates must be between 0 and 2.5") + } + if (sum(init.corr < -1)>0 | sum(init.corr > 1)>0) { + stop("Initial correlation estimates must be between -1 and 1") + } + model@logparams <- create.initial.values.flex(init.var, init.range, + init.smooth, init.nugget, + init.corr, P) } - model@logparams <- create.initial.values.flex( - c(0.9 * col.vars), # marginal variance - D.sample.bar, # range - rep(0.5, P), # smoothness - c(.1 * col.vars), # nuggets - rep(0, choose(P, 2)), - P - ) - model@param_sequence <- create.param.sequence(P, model@nscale) + model@param_sequence <- pseq model <- transform_covariance_parameters(model) invisible(model) }) diff --git a/R/PrestoGP_Multivariate_Vecchia.R b/R/PrestoGP_Multivariate_Vecchia.R index d4e2959..08dab5b 100755 --- a/R/PrestoGP_Multivariate_Vecchia.R +++ b/R/PrestoGP_Multivariate_Vecchia.R @@ -50,27 +50,17 @@ setMethod("prestogp_predict", "MultivariateVecchiaModel", function(model, X, loc ordering.pred <- match.arg(ordering.pred) pred.cond <- match.arg(pred.cond) return.values <- match.arg(return.values) - if (!is.list(X)) { - stop("X parameter must be a list.") - } - if (!is.list(locs)) { - stop("locs parameter must be a list.") - } - ndx.out <- NULL - for (i in 1:length(locs)) { - if (nrow(X[[i]]) != nrow(locs[[i]])) { - stop("The number of locations must match the number of X observations.") - } - ndx.out <- c(ndx.out, rep(i, nrow(locs[[i]]))) - } - X <- psych::superMatrix(X) - if (ncol(X) != ncol(model@X_train)) { - stop("The number of predictors in X must match the training data") - } + X <- check_input_pred(model, X, locs) if (is.null(m)) { # m defaults to the value used for training m <- model@n_neighbors } - stopifnot((m > 0)) # FIXME m is not required by full model + if (m < model@min_m) { + stop(paste("m must be at least ", model@min_m, sep = "")) + } + if (m >= nrow(model@X_train)) { + warning("Conditioning set size m chosen to be >=n. Changing to m=n-1") + m <- nrow(model@X_train) - 1 + } # Vecchia prediction at new locations Vecchia.Pred <- predict(model@linear_model, newx = X, s = model@linear_model$lambda[model@lambda_1se_idx]) @@ -114,15 +104,13 @@ setMethod("prestogp_predict", "MultivariateVecchiaModel", function(model, X, loc return.list <- list(means = Vec.mean) } else { warning("Variance estimates do not include model fitting variance and are anticonservative. Use with caution.") - vec.sds <- sqrt(pred$var.pred) + vec.var <- pred$var.pred for (i in 1:length(locs)) { - vec.sds[ndx.out == i] <- sqrt(vec.sds[ndx.out == i] + + vec.sds[ndx.out == i] <- sqrt(vec.var[ndx.out == i] + model@covparams[model@param_sequence[4, i]]) } - return.list$sds <- vec.sds + return.list <- list(means = Vec.mean, sds = vec.sds) } - # option to include or exclude theta below - # Vec.sds = sqrt(pred$var.pred + model@covparams[4]) #standard deviation return(return.list) }) @@ -181,6 +169,47 @@ setMethod("check_input", "MultivariateVecchiaModel", function(model, Y, X, locs) invisible(model) }) +setMethod("check_input_pred", "MultivariateVecchiaModel", function(model, X, locs) { + if (!is.list(locs)) { + stop("locs must be a list for multivariate models") + } + if (!is.list(X)) { + stop("X must be a list for multivariate models") + } + if (length(locs) != length(X)) { + stop("locs and X must have the same length") + } + if (length(locs) != length(model@locs_train)) { + stop("Training and test set locs must have the same length") + } + for (i in 1:length(locs)) { + if (!is.matrix(locs[[i]])) { + stop("Each locs must be a matrix") + } + if (i > 1) { + if (ncol(locs[[i]]) != ncol(model@locs_train[[1]])) { + stop("All locs must have the same number of columns as locs_train") + } + } + if (!is.matrix(X[[i]])) { + stop("Each X must be a matrix") + } + if (nrow(X[[i]]) != nrow(locs[[i]])) { + stop("Each X must have the same number of rows as locs") + } + } + if (length(X) == 1) { + X <- X[[1]] + } + else { + X <- psych::superMatrix(X) + } + if (ncol(X) != ncol(model@X_train)) { + stop("X and X_train must have the same number of predictors") + } + return(X) +}) + setMethod("specify", "MultivariateVecchiaModel", function(model) { locs <- model@locs_train locs.scaled <- scale_locs(model, locs) diff --git a/R/PrestoGP_Vecchia.R b/R/PrestoGP_Vecchia.R index fa6dfb7..ebd28ec 100755 --- a/R/PrestoGP_Vecchia.R +++ b/R/PrestoGP_Vecchia.R @@ -42,24 +42,20 @@ setMethod("initialize", "VecchiaModel", function(.Object, n_neighbors = 25, ...) #' prediction <- prestogp_predict(model, X.test, locs.test) #' Vec.mean <- prediction[[1]] #' Vec.sds <- prediction[[2]] -setMethod("prestogp_predict", "VecchiaModel", function(model, X, locs, m = NULL) { +setMethod("prestogp_predict", "VecchiaModel", function(model, X, locs, m = NULL, ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) { # validate parameters - if (!is.matrix(X)) { - stop("X parameter must be a matrix.") - } - if (!is.matrix(locs)) { - stop("The locs parameter must be a matrix.") - } - if (nrow(X) != nrow(locs)) { - stop("The number of locations must match the number of X observations.") - } - if (ncol(locs) != 3) { - stop("The locs parameter must have 3 columns.") - } + ordering.pred <- match.arg(ordering.pred) + pred.cond <- match.arg(pred.cond) + return.values <- match.arg(return.values) + model <- check_input_pred(model, X, locs) if (is.null(m)) { # m defaults to the value used for training m <- model@n_neighbors } - if (m == 0) { + if (m < model@min_m) { + stop(paste("m must be at least ", model@min_m, sep = "")) + } + if (m >= nrow(model@X_train)) { + warning("Conditioning set size m chosen to be >=n. Changing to m=n-1") m <- nrow(model@X_train) - 1 } @@ -73,20 +69,29 @@ setMethod("prestogp_predict", "VecchiaModel", function(model, X, locs, m = NULL) # Test set prediction res <- model@Y_train - Vecchia.hat - locs.train.scaled <- scale_locs(model, model@locs_train) - locs.scaled <- scale_locs(model, locs) - vec.approx.test <- vecchia_specify(locs.train.scaled, m, locs.pred = locs.scaled) + locs.train.scaled <- scale_locs(model, model@locs_train)[[1]] + locs.scaled <- scale_locs(model, list(locs))[[1]] + vec.approx.test <- vecchia_specify(locs.train.scaled, m, locs.pred = locs.scaled, ordering.pred=ordering.pred, pred.cond=pred.cond) - ## carry out prediction - pred <- vecchia_prediction(res, vec.approx.test, c(model@covparams[1], 1, 0.5), model@covparams[4]) + ## carry out prediction + if (!model@apanasovich) { + pred <- vecchia_prediction(res, vec.approx.test, c(model@covparams[1], 1, model@covparams[3]), model@covparams[4], return.values=return.values) + } else { + pred <- vecchia_prediction(res, vec.approx.test, c(model@covparams[1], model@covparams[2], model@covparams[3]), model@covparams[4], return.values=return.values) + } # prediction function can return both mean and sds # returns a list with elements mu.pred,mu.obs,var.pred,var.obs,V.ord Vec.mean <- pred$mu.pred + Vecchia.Pred # residual + mean trend - # option to include or exclude theta below - Vec.sds <- sqrt(pred$var.pred + model@covparams[4]) # standard deviation + if (return.values == "mean") { + return.list <- list(means = Vec.mean) + } else { + warning("Variance estimates do not include model fitting variance and are anticonservative. Use with caution.") + Vec.sds <- sqrt(pred$var.pred + model@covparams[4]) + return.list <- list(means = Vec.mean, sds = vec.sds) + } - return(list("means" = Vec.mean, "standard deviations" = Vec.sds)) + return(return.list) }) setMethod("check_input", "VecchiaModel", function(model, Y, X, locs) { @@ -117,6 +122,25 @@ setMethod("check_input", "VecchiaModel", function(model, Y, X, locs) { invisible(model) }) +setMethod("check_input_pred", "VecchiaModel", function(model, X, locs) { + if (!is.matrix(locs)) { + stop("locs must be a matrix") + } + if (!is.matrix(X)) { + stop("X must be a matrix") + } + if (ncol(locs) != ncol(model@locs_train[[1]])) { + stop("locs must have the same number of columns as locs_train") + } + if (nrow(X) != nrow(locs)) { + stop("X must have the same number of rows as locs") + } + if (ncol(X) != ncol(model@X_train)) { + stop("X and X_train must have the same number of predictors") + } + invisible(model) +}) + #' specify #' #' Specify the conditioning set using m nearest neighbors. diff --git a/R/RcppExports.R b/R/RcppExports.R old mode 100755 new mode 100644 index 0e90b6b..dd16aaf --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -2,9 +2,10 @@ # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 na_omit_c <- function(x) { - .Call("_PrestoGP_na_omit_c", PACKAGE = "PrestoGP", x) + .Call('_PrestoGP_na_omit_c', PACKAGE = 'PrestoGP', x) } createU_helper_mat <- function(olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning) { - .Call("_PrestoGP_createU_helper_mat", PACKAGE = "PrestoGP", olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning) + .Call('_PrestoGP_createU_helper_mat', PACKAGE = 'PrestoGP', olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning) } + diff --git a/man/calc_covparams-PrestoGPModel-method.Rd b/man/calc_covparams-PrestoGPModel-method.Rd old mode 100644 new mode 100755 index 0384b81..0d49a69 --- a/man/calc_covparams-PrestoGPModel-method.Rd +++ b/man/calc_covparams-PrestoGPModel-method.Rd @@ -4,7 +4,7 @@ \alias{calc_covparams,PrestoGPModel-method} \title{calc_covparams} \usage{ -\S4method{calc_covparams}{PrestoGPModel}(model, locs, Y) +\S4method{calc_covparams}{PrestoGPModel}(model, locs, Y, covparams) } \arguments{ \item{model}{The model to set the covariance parameters of} diff --git a/man/prestogp_fit-PrestoGPModel-method.Rd b/man/prestogp_fit-PrestoGPModel-method.Rd index fdacf59..96f695e 100755 --- a/man/prestogp_fit-PrestoGPModel-method.Rd +++ b/man/prestogp_fit-PrestoGPModel-method.Rd @@ -49,18 +49,18 @@ This method fits any PrestoGP model given a matrix of locations, a matrix of ind } \examples{ -US_NO2_Data<- read.csv("data/US_ST_NO2_Data.csv") -lat <- US_NO2_Data$Latitude # Latitude -lon <- US_NO2_Data$Longitude # Longitude -logNO2 <- log(US_NO2_Data$Y) # ozone data -logNO2 <- logNO2[1:1000,] -time <- US_NO2_Data$YearFrac # time in fractional years -N <- length(logNO2) -locs = cbind(lon,lat,time) # Coordinates in R3 (x,y,t) -locs <- locs[1:1000,] -X.Design <- US_NO2_Data[,7:145] #Design matrix +US_NO2_Data <- read.csv("data/US_ST_NO2_Data.csv") +lat <- US_NO2_Data$Latitude # Latitude +lon <- US_NO2_Data$Longitude # Longitude +logNO2 <- log(US_NO2_Data$Y) # ozone data +logNO2 <- logNO2[1:1000, ] +time <- US_NO2_Data$YearFrac # time in fractional years +N <- length(logNO2) +locs <- cbind(lon, lat, time) # Coordinates in R3 (x,y,t) +locs <- locs[1:1000, ] +X.Design <- US_NO2_Data[, 7:145] # Design matrix X.Design <- scale(X.Design) -X <- X.Design[1:1000,] +X <- X.Design[1:1000, ] model <- PrestoGPSpatiotemporalModel() model <- prestogp_fit(model, logNO2, X, locs) ... diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index ce7b2b9..3f445f7 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -7,51 +7,49 @@ using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM -Rcpp::Rostream &Rcpp::Rcout = Rcpp::Rcpp_cout_get(); -Rcpp::Rostream &Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // na_omit_c arma::vec na_omit_c(arma::vec x); -RcppExport SEXP _PrestoGP_na_omit_c(SEXP xSEXP) -{ - BEGIN_RCPP +RcppExport SEXP _PrestoGP_na_omit_c(SEXP xSEXP) { +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type x(xSEXP); + Rcpp::traits::input_parameter< arma::vec >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(na_omit_c(x)); return rcpp_result_gen; - END_RCPP +END_RCPP } // createU_helper_mat -arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx, const arma::mat &curqys, const arma::mat &curqzs, const arma::mat &vijs, const arma::mat &aijs, const arma::mat &full_const, const arma::vec &nugget, const arma::vec &sig2, const arma::vec &U_beginning); -RcppExport SEXP _PrestoGP_createU_helper_mat(SEXP olocsSEXP, SEXP ondxSEXP, SEXP curqysSEXP, SEXP curqzsSEXP, SEXP vijsSEXP, SEXP aijsSEXP, SEXP full_constSEXP, SEXP nuggetSEXP, SEXP sig2SEXP, SEXP U_beginningSEXP) -{ - BEGIN_RCPP +arma::sp_mat createU_helper_mat(const arma::mat& olocs, const arma::vec& ondx, const arma::mat& curqys, const arma::mat& curqzs, const arma::mat& vijs, const arma::mat& aijs, const arma::mat& full_const, const arma::vec& nugget, const arma::vec& sig2, const arma::vec& U_beginning); +RcppExport SEXP _PrestoGP_createU_helper_mat(SEXP olocsSEXP, SEXP ondxSEXP, SEXP curqysSEXP, SEXP curqzsSEXP, SEXP vijsSEXP, SEXP aijsSEXP, SEXP full_constSEXP, SEXP nuggetSEXP, SEXP sig2SEXP, SEXP U_beginningSEXP) { +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type olocs(olocsSEXP); - Rcpp::traits::input_parameter::type ondx(ondxSEXP); - Rcpp::traits::input_parameter::type curqys(curqysSEXP); - Rcpp::traits::input_parameter::type curqzs(curqzsSEXP); - Rcpp::traits::input_parameter::type vijs(vijsSEXP); - Rcpp::traits::input_parameter::type aijs(aijsSEXP); - Rcpp::traits::input_parameter::type full_const(full_constSEXP); - Rcpp::traits::input_parameter::type nugget(nuggetSEXP); - Rcpp::traits::input_parameter::type sig2(sig2SEXP); - Rcpp::traits::input_parameter::type U_beginning(U_beginningSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type olocs(olocsSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type ondx(ondxSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type curqys(curqysSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type curqzs(curqzsSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type vijs(vijsSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type aijs(aijsSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type full_const(full_constSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type nugget(nuggetSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type sig2(sig2SEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type U_beginning(U_beginningSEXP); rcpp_result_gen = Rcpp::wrap(createU_helper_mat(olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning)); return rcpp_result_gen; - END_RCPP +END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_PrestoGP_na_omit_c", (DL_FUNC)&_PrestoGP_na_omit_c, 1}, - {"_PrestoGP_createU_helper_mat", (DL_FUNC)&_PrestoGP_createU_helper_mat, 10}, - {NULL, NULL, 0}}; + {"_PrestoGP_na_omit_c", (DL_FUNC) &_PrestoGP_na_omit_c, 1}, + {"_PrestoGP_createU_helper_mat", (DL_FUNC) &_PrestoGP_createU_helper_mat, 10}, + {NULL, NULL, 0} +}; -RcppExport void R_init_PrestoGP(DllInfo *dll) -{ +RcppExport void R_init_PrestoGP(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } diff --git a/tests/testthat/sim_multivariate_small.R b/tests/testthat/sim_multivariate_small.R new file mode 100755 index 0000000..0fbcd93 --- /dev/null +++ b/tests/testthat/sim_multivariate_small.R @@ -0,0 +1,93 @@ +ny <- 2 # number of response variables +p <- 10 # number of predictors for each response +p.nz <- 4 # number of nonzero predictors for each y +n.spatial.xy <- 10 # number of spatial coordinates per dimension + +library(MASS) +library(fields) +library(psych) + +beta1 <- c(rep(1, p.nz), rep(0, p - p.nz)) +beta.all <- rep(beta1, ny) + +X.st <- list() +for (i in 1:ny) { + set.seed(1212 + i) + Sigma.X <- exp(-rdist(sample(1:p)) / 3) + X.st[[i]] <- mvrnorm(n.spatial.xy^2, rep(0, p), Sigma.X) +} +X.all <- superMatrix(X.st) +mean.trend.st <- X.all %*% beta.all + +n.rho <- choose(ny, 2) +rho.vec <- runif(n.rho, 0.2, 0.8) +rho <- matrix(0, nrow = ny, ncol = ny) +rho[upper.tri(rho)] <- rho.vec +rho <- rho + t(rho) + diag(1, ny) + +marg.smoothness <- 0.5 + rnorm(ny, 0.1, 0.05) +nuggets <- runif(ny, 0.5, 2) +x.variance <- runif(ny, 1.5, 4) +marg.var <- nuggets + x.variance +ranges <- runif(ny, 0.5, 1.2) + +params.all <- c(x.variance, ranges, marg.smoothness, nuggets, rho.vec) + +locs.list <- list() +for (i in 1:ny) { + loc1 <- seq(0, 1, length.out = n.spatial.xy) + rnorm(n.spatial.xy, 0, 0.001) + loc2 <- seq(0, 1, length.out = n.spatial.xy) + rnorm(n.spatial.xy, 0, 0.001) + locs.list[[i]] <- as.matrix(expand.grid(loc1, loc2)) +} + +Sigma.All <- matrix(nrow = ny * n.spatial.xy^2, ncol = ny * n.spatial.xy^2) +for (i in 1:ny) { + for (j in i:ny) { + if (i == j) { + ndx1 <- ((i - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * i) + dij <- fields::rdist(locs.list[[i]]) + Sigma.All[ndx1, ndx1] <- marg.var[i] * + fields::Matern(dij, + range = ranges[i], + smoothness = marg.smoothness[i] + ) + } else { + ndx1 <- ((i - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * i) + ndx2 <- ((j - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * j) + dij <- fields::rdist(locs.list[[i]], locs.list[[j]]) + vii <- marg.smoothness[i] + vjj <- marg.smoothness[j] + vij <- (vii + vjj) / 2 + aii <- 1 / ranges[i] + ajj <- 1 / ranges[j] + aij <- sqrt((aii^2 + ajj^2) / 2) + Sigma.All[ndx1, ndx2] <- rho[i, j] * sqrt(marg.var[i]) * + sqrt(marg.var[j]) * aii^vii * ajj^vjj * gamma(vij) / + (aij^(2 * vij) * sqrt(gamma(vii) * gamma(vjj))) * + fields::Matern(dij, smoothness = vij, alpha = aij) + Sigma.All[ndx2, ndx1] <- t(Sigma.All[ndx1, ndx2]) + } + } +} + +L.C <- chol(Sigma.All) + +st.error <- rnorm(n.spatial.xy^2 * ny) %*% L.C + +nug.error <- NULL +for (i in 1:ny) { + nug.error <- c(nug.error, nuggets[i] * rnorm(n.spatial.xy^2)) +} + +y.list <- list() +for (i in 1:ny) { + ndx1 <- ((i - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * i) + y.list[[i]] <- mean.trend.st[ndx1] + st.error[ndx1] + nug.error[ndx1] +} + +rm( + ny, p, p.nz, n.spatial.xy, beta1, i, j, Sigma.X, mean.trend.st, n.rho, + loc1, loc2, ndx1, ndx2, dij, vii, vjj, vij, aii, ajj, aij, L.C, + st.error, nug.error, X.all, rho, rho.vec, ranges, Sigma.All, nuggets, + marg.smoothness, marg.var +) diff --git a/tests/testthat/sim_multivariate_small_pred.R b/tests/testthat/sim_multivariate_small_pred.R new file mode 100755 index 0000000..0b5101e --- /dev/null +++ b/tests/testthat/sim_multivariate_small_pred.R @@ -0,0 +1,106 @@ +set.seed(1234) + +ny <- 2 # number of response variables +p <- 10 # number of predictors for each response +p.nz <- 4 # number of nonzero predictors for each y +n.spatial.xy <- 7 # number of spatial coordinates per dimension + +library(MASS) +library(fields) +library(psych) + +beta1 <- c(rep(1,p.nz), rep(0,p-p.nz)) +beta.all <- rep(beta1, ny) + +X.st <- list() +for (i in 1:ny) { + Sigma.X <- exp(-rdist(sample(1:p))/3) + X.st[[i]] <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X) +} +X.all <- superMatrix(X.st) +mean.trend.st <- X.all %*% beta.all + +n.rho <- choose(ny, 2) +rho.vec <- runif(n.rho, 0.2, 0.8) +rho <- matrix(0, nrow=ny, ncol=ny) +rho[upper.tri(rho)] <- rho.vec +rho <- rho + t(rho) + diag(1, ny) + +marg.smoothness <- 0.5 + rnorm(ny, 0.1, 0.05) +nuggets <- runif(ny, 0.5, 2) +x.variance <- runif(ny, 1.5, 4) +marg.var <- nuggets + x.variance +ranges <- runif(ny, 0.5, 1.2) + +params.all <- c(x.variance, ranges, marg.smoothness, nuggets, rho.vec) + +locs.list <- list() +for (i in 1:ny) { + loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) + loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) + locs.list[[i]] <- as.matrix(expand.grid(loc1, loc2)) +} + +Sigma.All <- matrix(nrow=ny*n.spatial.xy^2, ncol=ny*n.spatial.xy^2) +for (i in 1:ny) { + for (j in i:ny) { + if (i==j) { + ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i) + dij <- fields::rdist(locs.list[[i]]) + Sigma.All[ndx1,ndx1] <- marg.var[i] * + fields::Matern(dij, range = ranges[i], + smoothness = marg.smoothness[i]) + } + else { + ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i) + ndx2 <- ((j-1)*n.spatial.xy^2+1):(n.spatial.xy^2*j) + dij <- fields::rdist(locs.list[[i]], locs.list[[j]]) + vii <- marg.smoothness[i] + vjj <- marg.smoothness[j] + vij <- (vii+vjj)/2 + aii <- 1/ranges[i] + ajj <- 1/ranges[j] + aij <- sqrt((aii^2+ajj^2)/2) + Sigma.All[ndx1,ndx2] <- rho[i,j] * sqrt(marg.var[i]) * + sqrt(marg.var[j]) * aii^vii * ajj^vjj * gamma(vij) / + (aij^(2*vij) * sqrt(gamma(vii) * gamma(vjj))) * + fields::Matern(dij, smoothness=vij, alpha=aij) + Sigma.All[ndx2,ndx1] <- t(Sigma.All[ndx1,ndx2]) + } + } +} + +L.C <- chol(Sigma.All) + +st.error <- rnorm(n.spatial.xy^2 * ny) %*% L.C + +nug.error <- NULL +for (i in 1:ny) { + nug.error <- c(nug.error, nuggets[i]*rnorm(n.spatial.xy^2)) +} + +y.list <- list() +locs.list.otr <- list() +locs.list.otst <- list() +y.list.otr <- list() +y.list.otst <- list() +X.st.otr <- list() +X.st.otst <- list() +for (i in 1:ny) { + ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i) + y.list[[i]] <- mean.trend.st[ndx1] + st.error[ndx1] + nug.error[ndx1] + nn <- n.spatial.xy^2 + otr <- rep(FALSE, nn) + otr[sample(1:nn, size=floor(nn/2))] <- TRUE + locs.list.otr[[i]] <- locs.list[[i]][otr,] + locs.list.otst[[i]] <- locs.list[[i]][!otr,] + y.list.otr[[i]] <- y.list[[i]][otr] + y.list.otst[[i]] <- y.list[[i]][!otr] + X.st.otr[[i]] <- X.st[[i]][otr,] + X.st.otst[[i]] <- X.st[[i]][!otr,] +} + +rm(ny, p, p.nz, n.spatial.xy, beta1, i, j, Sigma.X, mean.trend.st, n.rho, + loc1, loc2, ndx1, ndx2, dij, vii, vjj, vij, aii, ajj, aij, L.C, + st.error, nug.error, X.all, rho, rho.vec, ranges, Sigma.All, nuggets, + marg.smoothness, marg.var, x.variance, nn, otr) diff --git a/tests/testthat/sim_vecchia.RData b/tests/testthat/sim_vecchia.RData new file mode 100755 index 0000000..0dfbff0 Binary files /dev/null and b/tests/testthat/sim_vecchia.RData differ diff --git a/tests/testthat/sim_vecchia_pred.R b/tests/testthat/sim_vecchia_pred.R new file mode 100755 index 0000000..ffcb97e --- /dev/null +++ b/tests/testthat/sim_vecchia_pred.R @@ -0,0 +1,50 @@ +set.seed(1212) + +p <- 10 # number of predictors for each response +p.nz <- 4 # number of nonzero predictors for each y +n.spatial.xy <- 50 # number of spatial coordinates per dimension + +library(MASS) +library(fields) + +beta1 <- c(rep(1,p.nz), rep(0,p-p.nz)) + +Sigma.X <- exp(-rdist(sample(1:p))/3) +X <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X) +mean.trend.st <- as.vector(X %*% beta1) + +marg.smoothness <- 0.5 + rnorm(1, 0.1, 0.05) +nuggets <- runif(1, 0.5, 2) +x.variance <- runif(1, 1.5, 4) +marg.var <- nuggets + x.variance +ranges <- runif(1, 0.5, 1.2) + +params.all <- c(x.variance, ranges, marg.smoothness, nuggets) + + +loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) +loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) +locs <- as.matrix(expand.grid(loc1, loc2)) + +Sigma.All <- marg.var*Matern(rdist(locs), range=ranges, + smoothness=marg.smoothness) +L.C <- chol(Sigma.All) + +st.error <- as.vector(rnorm(n.spatial.xy^2) %*% L.C) +nug.error <- nuggets*rnorm(n.spatial.xy^2) +y <- mean.trend.st + st.error + nug.error + +nn <- n.spatial.xy^2 +otr <- rep(FALSE, nn) +otr[sample(1:nn, size=floor(nn/2))] <- TRUE + +locs.otr <- locs[otr,] +locs.otst <- locs[!otr,] +y.otr <- y[otr] +y.otst <- y[!otr] +X.otr <- X[otr,] +X.otst <- X[!otr,] + +rm(p, p.nz, n.spatial.xy, beta1, Sigma.X, mean.trend.st, loc1, loc2, L.C, + st.error, nug.error, ranges, Sigma.All, nuggets, marg.smoothness, marg.var, + x.variance, nn, otr) diff --git a/tests/testthat/sim_vecchia_small.R b/tests/testthat/sim_vecchia_small.R new file mode 100755 index 0000000..9c884eb --- /dev/null +++ b/tests/testthat/sim_vecchia_small.R @@ -0,0 +1,39 @@ +set.seed(1212) + +p <- 10 # number of predictors for each response +p.nz <- 4 # number of nonzero predictors for each y +n.spatial.xy <- 10 # number of spatial coordinates per dimension + +library(MASS) +library(fields) + +beta1 <- c(rep(1,p.nz), rep(0,p-p.nz)) + +Sigma.X <- exp(-rdist(sample(1:p))/3) +X <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X) +mean.trend.st <- as.vector(X %*% beta1) + +marg.smoothness <- 0.5 + rnorm(1, 0.1, 0.05) +nuggets <- runif(1, 0.5, 2) +x.variance <- runif(1, 1.5, 4) +marg.var <- nuggets + x.variance +ranges <- runif(1, 0.5, 1.2) + +params.all <- c(x.variance, ranges, marg.smoothness, nuggets) + + +loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) +loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) +locs <- as.matrix(expand.grid(loc1, loc2)) + +Sigma.All <- marg.var*Matern(rdist(locs), range=ranges, + smoothness=marg.smoothness) +L.C <- chol(Sigma.All) + +st.error <- as.vector(rnorm(n.spatial.xy^2) %*% L.C) +nug.error <- nuggets*rnorm(n.spatial.xy^2) +y <- mean.trend.st + st.error + nug.error + +rm(p, p.nz, n.spatial.xy, beta1, Sigma.X, mean.trend.st, loc1, loc2, L.C, + st.error, nug.error, ranges, Sigma.All, nuggets, marg.smoothness, marg.var, + x.variance) diff --git a/tests/testthat/sim_vecchia_small_pred.R b/tests/testthat/sim_vecchia_small_pred.R new file mode 100755 index 0000000..7f25507 --- /dev/null +++ b/tests/testthat/sim_vecchia_small_pred.R @@ -0,0 +1,50 @@ +set.seed(1212) + +p <- 10 # number of predictors for each response +p.nz <- 4 # number of nonzero predictors for each y +n.spatial.xy <- 20 # number of spatial coordinates per dimension + +library(MASS) +library(fields) + +beta1 <- c(rep(1,p.nz), rep(0,p-p.nz)) + +Sigma.X <- exp(-rdist(sample(1:p))/3) +X <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X) +mean.trend.st <- as.vector(X %*% beta1) + +marg.smoothness <- 0.5 + rnorm(1, 0.1, 0.05) +nuggets <- runif(1, 0.5, 2) +x.variance <- runif(1, 1.5, 4) +marg.var <- nuggets + x.variance +ranges <- runif(1, 0.5, 1.2) + +params.all <- c(x.variance, ranges, marg.smoothness, nuggets) + + +loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) +loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001) +locs <- as.matrix(expand.grid(loc1, loc2)) + +Sigma.All <- marg.var*Matern(rdist(locs), range=ranges, + smoothness=marg.smoothness) +L.C <- chol(Sigma.All) + +st.error <- as.vector(rnorm(n.spatial.xy^2) %*% L.C) +nug.error <- nuggets*rnorm(n.spatial.xy^2) +y <- mean.trend.st + st.error + nug.error + +nn <- n.spatial.xy^2 +otr <- rep(FALSE, nn) +otr[sample(1:nn, size=floor(nn/2))] <- TRUE + +locs.otr <- locs[otr,] +locs.otst <- locs[!otr,] +y.otr <- y[otr] +y.otst <- y[!otr] +X.otr <- X[otr,] +X.otst <- X[!otr,] + +rm(p, p.nz, n.spatial.xy, beta1, Sigma.X, mean.trend.st, loc1, loc2, L.C, + st.error, nug.error, ranges, Sigma.All, nuggets, marg.smoothness, marg.var, + x.variance, nn, otr) diff --git a/tests/testthat/test-Log_Likelihood.R b/tests/testthat/test-Log_Likelihood.R index 2933641..d1ddb03 100755 --- a/tests/testthat/test-Log_Likelihood.R +++ b/tests/testthat/test-Log_Likelihood.R @@ -1,28 +1,72 @@ +test_that("negloglik_vecchia", { + set.seed(1234) + + y.orig <- rnorm(100) + + locs.dim1 <- seq(1, 10, length = 10) + rnorm(10, 0, 0.1) + locs.dim2 <- seq(1, 10, length = 10) + rnorm(10, 0, 0.1) + locs <- as.matrix(expand.grid(locs.dim1, locs.dim2)) + + covmat.true <- Matern(rdist(locs), range = 1, smoothness = 0.5) + diag(100) + + y <- y.orig %*% chol(covmat.true) + y <- as.vector(y) + + logparams <- create.initial.values.flex(0.9, 0.5, 0.5, 0.1, 1, 1) + pseq <- create.param.sequence(1) + vec.approx <- vecchia_specify(locs, 5) + LL.vecchia <- negloglik_vecchia(logparams, y, vec.approx, pseq) + expect_equal(194.75, LL.vecchia, tolerance=1e-2) + + vec.mapprox <- vecchia_Mspecify(list(locs), 5) + LL.mvecchia <- mvnegloglik(logparams, vec.mapprox, y, pseq, 1) + # Univariate likelihood should equal multivariate likelihood + expect_equal(LL.vecchia, LL.mvecchia, tolerance=1e-1) + + logparams2 <- create.initial.values.flex(0.9, 1, 0.5, 0.1, 1, 1) + vec.approx2 <- vec.approx + vec.approx2$locsord <- vec.approx$locsord / 0.5 + LL.vecchia2 <- negloglik_vecchia(logparams2, y, vec.approx2, pseq) + LL.vecchia.st <- negloglik_vecchia_ST(logparams, y, vec.approx, pseq, 1, 1) + vec.mapprox2 <- vec.mapprox + vec.mapprox2$locsord <- vec.mapprox$locsord / 0.5 + LL.mvecchia2 <- mvnegloglik_ST(logparams2, vec.mapprox2, y, pseq, 1, 1, 1) + + # Likelihood should equal spatiotemporal likelihood after scaling locs + expect_equal(LL.vecchia, LL.vecchia2, tolerance=1e-3) + expect_equal(LL.vecchia, LL.vecchia.st, tolerance=1e-3) + expect_equal(LL.vecchia, LL.mvecchia2, tolerance=1e-1) +}) + test_that("negloglik_vecchia_ST", { - set.seed(7919) - load("small_sim.Rdata") - return(1) # this test isn't useful at the moment - params <- c(0.5, 0.5, 0.5, 0.5) - locs <- locs_train - X <- X_train - Y <- Y_train - locs.scaled <- locs / c(params[2], params[2], params[3]) - vecchia_approx <- vecchia_specify(locs.scaled, 25) - beta.hat <- rep(0, ncol(X)) - Y.hat <- as.matrix(X) %*% beta.hat - res <- as.double(Y - Y.hat) - ord_locs <- locs[vecchia_approx$ord, ] - locs_mat <- cbind(ord_locs[, 1], ord_locs[, 2], ord_locs[, 3]) - result <- negloglik_vecchia_ST(log(params), locs_mat, res, vecchia_approx) - expect_equal(1597.9808688, result, tolerance = 10e-5) - vecchia.result <- optim( - par = log(params), fn = negloglik_vecchia_ST, - locs = locs_mat, res = res, vecchia.approx = vecchia_approx, method = "Nelder-Mead", - control = list(trace = 0) - ) - params_optim <- exp(vecchia.result$par) - result_optim <- negloglik_vecchia_ST(log(params_optim), locs_mat, res, vecchia_approx) - expect_equal(-6861.65048095, result_optim, tolerance = 1e-5) + set.seed(1234) + + y.orig <- rnorm(125) + + locs.dim1 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1) + locs.dim2 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1) + locs.dim3 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1) + locs <- as.matrix(expand.grid(locs.dim1, locs.dim2, locs.dim3)) + + covmat.true <- Matern(rdist(locs), range = 1, smoothness = 0.5) + diag(125) + + y <- y.orig %*% chol(covmat.true) + y <- as.vector(y) + + logparams <- create.initial.values.flex(0.9, c(2, 3), 0.5, 0.1, 1, 1) + pseq <- create.param.sequence(1, 2) + vec.approx <- vecchia_specify(locs, 5) + LL.vecchia <- negloglik_vecchia_ST(logparams, y, vec.approx, pseq, + c(1, 1, 2), 2) + expect_equal(284.73, LL.vecchia, tolerance=1e-2) + + logparams2 <- create.initial.values.flex(0.9, 1, 0.5, 0.1, 1, 1) + pseq2 <- create.param.sequence(1) + vec.approx2 <- vec.approx + vec.approx2$locsord[,1:2] <- vec.approx$locsord[,1:2] / 2 + vec.approx2$locsord[,3] <- vec.approx$locsord[,3] / 3 + LL.vecchia2 <- negloglik_vecchia(logparams2, y, vec.approx2, pseq2) + expect_equal(LL.vecchia, LL.vecchia2, tolerance=1e-3) }) test_that("negloglik.full", { @@ -93,6 +137,36 @@ test_that("negloglik.full", { expect_equal(LL.full, LL.pgp, tolerance = 1e-3) }) +test_that("negloglik_full_ST", { + set.seed(1234) + + y.orig <- rnorm(125) + + locs.dim1 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1) + locs.dim2 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1) + locs.dim3 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1) + locs <- as.matrix(expand.grid(locs.dim1, locs.dim2, locs.dim3)) + + covmat.true <- Matern(rdist(locs), range = 1, smoothness = 0.5) + diag(125) + + y <- y.orig %*% chol(covmat.true) + y <- as.vector(y) + + logparams <- create.initial.values.flex(0.9, c(2, 3), 0.5, 0.1, 1, 1) + pseq <- create.param.sequence(1, 2) + LL.full <- negloglik_full_ST(logparams, locs, y, pseq, + c(1, 1, 2), 2) + expect_equal(286.84, LL.full, tolerance=1e-2) + + logparams2 <- create.initial.values.flex(0.9, 1, 0.5, 0.1, 1, 1) + pseq2 <- create.param.sequence(1) + locs2 <- locs + locs2[,1:2] <- locs[,1:2] / 2 + locs2[,3] <- locs[,3] / 3 + LL.full2 <- negloglik.full(logparams2, rdist(locs2), y, pseq2) + expect_equal(LL.full, LL.full2, tolerance=1e-3) +}) + test_that("mvnegloglik", { load("sim_multivariate_big.RData") set.seed(1234) diff --git a/tests/testthat/test-PrestoGP_Model.R b/tests/testthat/test-PrestoGP_Model.R new file mode 100755 index 0000000..a13cc6c --- /dev/null +++ b/tests/testthat/test-PrestoGP_Model.R @@ -0,0 +1,155 @@ +test_that("beta.hat not numeric", { + source("sim_vecchia_small.R") + pgp.model1 <- new("VecchiaModel") + expect_error(prestogp_fit(pgp.model1, y, X, locs, beta.hat="foo"), + "beta.hat parameter must be a numeric vector") +}) + +test_that("beta.hat not a vector", { + source("sim_vecchia_small.R") + pgp.model1 <- new("VecchiaModel") + expect_error(prestogp_fit(pgp.model1, y, X, locs, beta.hat=matrix(1:3)), + "beta.hat parameter must be a numeric vector") +}) + +test_that("beta.hat incorrect length", { + source("sim_vecchia_small.R") + pgp.model1 <- new("VecchiaModel") + expect_error(prestogp_fit(pgp.model1, y, X, locs, beta.hat=1:3), + "Length of beta.hat must match the number of predictors") +}) + +test_that("tol not numeric", { + source("sim_vecchia_small.R") + pgp.model1 <- new("VecchiaModel") + expect_error(prestogp_fit(pgp.model1, y, X, locs, tol="foo"), + "tol must be numeric") +}) + +test_that("tol not a scalar", { + source("sim_vecchia_small.R") + pgp.model1 <- new("VecchiaModel") + expect_error(prestogp_fit(pgp.model1, y, X, locs, tol=1:2), + "tol must be a scalar") +}) + +test_that("tol out of range", { + source("sim_vecchia_small.R") + pgp.model1 <- new("VecchiaModel") + expect_error(prestogp_fit(pgp.model1, y, X, locs, tol=1.1), + "tol must satisfy 0=n. Changing to m=n-1") +}) diff --git a/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R b/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R index 5f1a64e..7a2a58a 100755 --- a/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R +++ b/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R @@ -142,6 +142,147 @@ test_that("Simulated dataset multivariate spatiotemporal", { ), tolerance = 1.1) }) +test_that("Invalid locs input for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, as.matrix(1:3)), + "locs must be a list for multivariate models") +}) + +test_that("Invalid X input for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, as.matrix(1:3), locs.list.otst), + "X must be a list for multivariate models") +}) + +test_that("locs/X length mismatch for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, list(as.matrix(1:3))), + "locs and X must have the same length") +}) + +test_that("locs/locs_train length mismatch", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, list(1:3, 1:3, 1:3), + list(1:3, 1:3, 1:3)), + "Training and test set locs must have the same length") +}) + +test_that("locs not a matrix for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, list(as.matrix(1:3), + as.matrix(1:3)), + list(1:3, as.matrix(1:3))), + "Each locs must be a matrix") +}) + +test_that("ncol(locs) != ncol(locs_train)", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, list(as.matrix(1:3), + as.matrix(1:3)), + list(as.matrix(1:3), as.matrix(1:3))), + "All locs must have the same number of columns as locs_train") +}) + +test_that("X not a matrix for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + X.st.otst[[1]] <- as.vector(X.st.otst[[1]]) + expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst), + "Each X must be a matrix") +}) + +test_that("nrow(X) != nrow(locs) for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + locs.list.otst[[1]] <- rbind(locs.list.otst[[1]], rep(0, 2)) + expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst), + "Each X must have the same number of rows as locs") +}) + +test_that("ncol(X) != ncol(X_train) for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + X.st.otst[[1]] <- cbind(X.st.otst[[1]], rep(0, nrow(X.st.otst[[1]]))) + expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst), + "X and X_train must have the same number of predictors") +}) + +test_that("m too small for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst, m=2), + "m must be at least 3") +}) + +test_that("m too large for prediction", { + source("sim_multivariate_small_pred.R") + pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5) + pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr, + locs.list.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_warning(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst, + m=51), + "Conditioning set size m chosen to be >=n. Changing to m=n-1") +}) + test_that("Simulated dataset multivariate spatial prediction", { source("sim_multivariate_big_pred.R") pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors = 25) @@ -158,6 +299,8 @@ test_that("Simulated dataset multivariate spatial prediction", { pgp.mmodel1.pred <- prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst) mse <- mean((pgp.mmodel1.pred$means - unlist(y.list.otst))^2) + me <- mean(pgp.mmodel1.pred$means - unlist(y.list.otst)) expect_equal(mse, 1.99, tolerance = 0.1) + expect_equal(me, -0.04, tolerance = 0.03) }) diff --git a/tests/testthat/test-PrestoGP_Univariate_Model.R b/tests/testthat/test-PrestoGP_Univariate.R similarity index 53% rename from tests/testthat/test-PrestoGP_Univariate_Model.R rename to tests/testthat/test-PrestoGP_Univariate.R index 53a56b1..3ee7435 100755 --- a/tests/testthat/test-PrestoGP_Univariate_Model.R +++ b/tests/testthat/test-PrestoGP_Univariate.R @@ -41,7 +41,7 @@ test_that("nrow(Y) != nrow(X)", { }) test_that("Simulated dataset spatial", { - source("sim_vecchia.R") + load("sim_vecchia.RData") pgp.model1 <- new("VecchiaModel", n_neighbors=25) pgp.model1 <- prestogp_fit(pgp.model1, y, X, locs, scaling=c(1,1), apanasovich=TRUE, verbose=FALSE, @@ -132,3 +132,111 @@ test_that("Simulated dataset spatiotemporal", { expect_equal(params.out[4], params.out2[4], tolerance=0.3) expect_equal(params.out[5], params.out2[5], tolerance=0.1) }) + +test_that("Invalid locs input for prediction", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.model1, X.otst, 1:3), + "locs must be a matrix") +}) + +test_that("Invalid X input for prediction", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.model1, 1:3, locs.otst), + "X must be a matrix") +}) + +test_that("ncol(locs) != ncol(locs_train)", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + locs.otst <- cbind(locs.otst, rep(1, nrow(locs.otst))) + expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst), + "locs must have the same number of columns as locs_train") +}) + +test_that("nrow(X) != nrow(locs) for prediction", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + X.otst <- rbind(X.otst, rep(1, ncol(X.otst))) + expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst), + "X must have the same number of rows as locs") +}) + +test_that("ncol(X) != ncol(X_train) for prediction", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + X.otst <- cbind(X.otst, rep(1, nrow(X.otst))) + expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst), + "X and X_train must have the same number of predictors") +}) + +test_that("m too small for prediction", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst, m=2), + "m must be at least 3") +}) + +test_that("m too large for prediction", { + source("sim_vecchia_small_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors=5) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, + locs.otr, scaling=c(1,1), + apanasovich=TRUE, verbose=FALSE, + optim.control=list(trace=0,maxit=5000, + reltol=1e-3)) + expect_warning(prestogp_predict(pgp.model1, X.otst, locs.otst, m=201), + "Conditioning set size m chosen to be >=n. Changing to m=n-1") +}) + +test_that("Simulated spatial prediction", { + source("sim_vecchia_pred.R") + pgp.model1 <- new("VecchiaModel", n_neighbors = 25) + pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, locs.otr, + scaling = c(1, 1), + apanasovich = TRUE, verbose = FALSE, + optim.control = list( + trace = 0, maxit = 5000, + reltol = 1e-3 + ) + ) + + pgp.model1.pred <- prestogp_predict(pgp.model1, X.otst, locs.otst) + + mse <- mean((pgp.model1.pred$means - y.otst)^2) + me <- mean(pgp.model1.pred$means - y.otst) + + expect_equal(mse, 2.24, tolerance = 0.07) + expect_equal(me, 0.03, tolerance = 0.05) +})