diff --git a/R/Log_Likelihood.R b/R/Log_Likelihood.R
index 7529cd9..d663715 100755
--- a/R/Log_Likelihood.R
+++ b/R/Log_Likelihood.R
@@ -12,7 +12,8 @@
 #'
 #' @examples
 #' @noRd
-negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scaling, nscale) {
+negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq,
+                                 scaling, nscale) {
   parms <- unlog.params(logparms, param.seq, 1)
   locs.scaled <- vecchia.approx$locsord
   for (j in 1:nscale) {
@@ -20,7 +21,9 @@ negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scali
       parms[param.seq[2, 1] + j - 1]
   }
   vecchia.approx$locsord <- locs.scaled
-  -vecchia_likelihood(res, vecchia.approx, c(parms[1], 1, parms[param.seq[3, 1]]), parms[param.seq[4, 1]])
+  -vecchia_likelihood(res, vecchia.approx, c(parms[1], 1,
+                                             parms[param.seq[3, 1]]),
+                      parms[param.seq[4, 1]])
 }
 
 #' negloglik_vecchia
@@ -39,7 +42,8 @@ negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scali
 #' @noRd
 negloglik_vecchia <- function(logparms, res, vecchia.approx, param.seq) {
   parms <- unlog.params(logparms, param.seq, 1)
-  -vecchia_likelihood(res, vecchia.approx, c(parms[1], parms[2], parms[3]), parms[4])
+  -vecchia_likelihood(res, vecchia.approx, c(parms[1], parms[2], parms[3]),
+                      parms[4])
 }
 
 #' negloglik_full_ST
@@ -91,7 +95,8 @@ negloglik.full <- function(logparams, d, y, param.seq) {
   params <- unlog.params(logparams, param.seq, 1)
   #    d <- fields::rdist(locs)
   N <- nrow(d)
-  cov.mat <- params[1] * fields::Matern(d, range = params[2], smoothness = params[3]) +
+  cov.mat <- params[1] * fields::Matern(d, range = params[2],
+                                        smoothness = params[3]) +
     params[4] * diag(N)
   return(-1 * mvtnorm::dmvnorm(y, rep(0, N), cov.mat, log = TRUE))
 }
@@ -128,7 +133,8 @@ mvnegloglik <- function(logparams, vecchia.approx, y, param.seq, P) {
 ##############################################################################
 ### Flexible Spatiotemporal Multivariate Matern Negative Loglikelihood Function ###########
 
-mvnegloglik_ST <- function(logparams, vecchia.approx, y, param.seq, P, scaling, nscale) {
+mvnegloglik_ST <- function(logparams, vecchia.approx, y, param.seq, P, scaling,
+                           nscale) {
   #  Input-
   #  logparams: A numeric vector of length (4*P)+(4*choose(P,2)).
   #             To construct these parameters we unlist a list of the 7 covariance
@@ -236,11 +242,15 @@ create.cov.upper.flex <- function(P, marg.var, marg.range, marg.smooth,
       j <- combs[iter, 2]
 
       smoothness.mat[i, j] <- (marg.smooth[i] + marg.smooth[j]) / 2
-      range.mat[i, j] <- 1 / sqrt(((1 / marg.range[i])^2 + (1 / marg.range[j])^2) / 2)
+      range.mat[i, j] <- 1 / sqrt(((1 / marg.range[i])^2 +
+                                     (1 / marg.range[j])^2) / 2)
 
       s1 <- sqrt(marg.var[i] * marg.var[j])
-      s2 <- ((1 / marg.range[i])^marg.smooth[i] * (1 / marg.range[j])^marg.smooth[j]) / ((1 / range.mat[i, j])^(2 * smoothness.mat[i, j]))
-      s3 <- gamma(smoothness.mat[i, j]) / (sqrt(gamma(marg.smooth[i])) * sqrt(gamma(marg.smooth[j])))
+      s2 <- ((1 / marg.range[i])^marg.smooth[i] *
+               (1 / marg.range[j])^marg.smooth[j]) /
+        ((1 / range.mat[i, j])^(2 * smoothness.mat[i, j]))
+      s3 <- gamma(smoothness.mat[i, j]) / (sqrt(gamma(marg.smooth[i])) *
+                                             sqrt(gamma(marg.smooth[j])))
       s4 <- R.corr[iter]
       sig2.mat[i, j] <- s1 * s2 * s3 * s4
     }
@@ -282,10 +292,12 @@ cat.covariances <- function(locs.list, sig2, range, smoothness, nugget) {
     # Calculate the covariance matrix - if/then based on its location in the super-matrix
     N <- nrow(d)
     if (i == j) { # To accomodate varying size outcomes- the nugget is not included on cross-covariances
-      cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = smoothness[i, j]) +
+      cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa =
+                                                smoothness[i, j]) +
         nugget[i, j] * diag(N)
     } else {
-      cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = smoothness[i, j])
+      cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa =
+                                                smoothness[i, j])
     }
 
 
diff --git a/R/PrestoGP_CreateU_Multivariate.R b/R/PrestoGP_CreateU_Multivariate.R
index fdbbe24..bb099f7 100755
--- a/R/PrestoGP_CreateU_Multivariate.R
+++ b/R/PrestoGP_CreateU_Multivariate.R
@@ -66,7 +66,8 @@ max_min_ordering <- function(locs, dist_func) {
 #' @param dist_func Any distance function with a signature of dist(query_location, locations_matrix)
 #'
 #' @return A vector containing the indices of the neighbors
-knn_indices <- function(ordered_locs, query, n_neighbors, dist_func, dist_func_code) {
+knn_indices <- function(ordered_locs, query, n_neighbors,
+                        dist_func, dist_func_code) {
   if (dist_func_code == "custom") {
     dists <- dist_func(query, ordered_locs)
     dists_order <- order(dists)
@@ -90,7 +91,8 @@ knn_indices <- function(ordered_locs, query, n_neighbors, dist_func, dist_func_c
 #' @param dist_func Any distance function with a signature of dist(query_location, locations_matrix)
 #'
 #' @return A list containing two matrices, each with one row per location: an indices matrix with the indices of nearest neighbors for each location, and a distance matrix with the associated distances
-sparseNN <- function(ordered_locs, n_neighbors, dist_func, dist_func_code, ordered_locs_pred = NULL) {
+sparseNN <- function(ordered_locs, n_neighbors,
+                     dist_func, dist_func_code, ordered_locs_pred = NULL) {
   ee <- min(apply(ordered_locs, 2, stats::sd))
   n <- nrow(ordered_locs)
   ordered_locs <- ordered_locs + matrix(
@@ -98,17 +100,25 @@ sparseNN <- function(ordered_locs, n_neighbors, dist_func, dist_func_code, order
       stats::rnorm(n * ncol(ordered_locs)),
     n, ncol(ordered_locs)
   )
-  indices_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), ncol = n_neighbors)
-  distances_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), ncol = n_neighbors)
+  indices_matrix <- matrix(data = NA, nrow = nrow(ordered_locs),
+                           ncol = n_neighbors)
+  distances_matrix <- matrix(data = NA, nrow = nrow(ordered_locs),
+                             ncol = n_neighbors)
   for (row in 1:n_neighbors) {
     # for the locations from 1 to n_neighbors, use the entire locs list to find the neighbors
-    nn <- knn_indices(ordered_locs[1:(n_neighbors + 1), , drop = FALSE][-row, , drop = FALSE], ordered_locs[row, , drop = FALSE], n_neighbors, dist_func, dist_func_code)
+    nn <- knn_indices(ordered_locs[1:
+                                     (n_neighbors + 1), , drop = FALSE][-row, ,
+                                                              drop = FALSE],
+                      ordered_locs[row, , drop = FALSE], n_neighbors,
+                      dist_func, dist_func_code)
     indices_matrix[row, 1:n_neighbors] <- nn$indices[1:n_neighbors]
     distances_matrix[row, 1:n_neighbors] <- nn$distances[1:n_neighbors]
   }
   for (row in (n_neighbors + 1):nrow(ordered_locs)) {
     # get the m nearest neighbors from the locs before this one in the max-min order
-    nn <- knn_indices(ordered_locs[1:(row - 1), , drop = FALSE], ordered_locs[row, , drop = FALSE], n_neighbors, dist_func, dist_func_code)
+    nn <- knn_indices(ordered_locs[1:(row - 1), , drop = FALSE],
+                      ordered_locs[row, , drop = FALSE], n_neighbors,
+                      dist_func, dist_func_code)
     indices_matrix[row, 1:n_neighbors] <- nn$indices[1:n_neighbors]
     distances_matrix[row, 1:n_neighbors] <- nn$distances[1:n_neighbors]
   }
diff --git a/R/PrestoGP_Model.R b/R/PrestoGP_Model.R
index 1e12d45..76f07cd 100755
--- a/R/PrestoGP_Model.R
+++ b/R/PrestoGP_Model.R
@@ -70,7 +70,7 @@ setMethod("initialize", "PrestoGPModel", function(.Object, ...) {
 setGeneric("show_theta", function(object, Y_names) standardGeneric("show_theta"))
 setGeneric("prestogp_fit", function(model, Y, X, locs, scaling = NULL, apanasovich = FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose = FALSE, optim.method = "Nelder-Mead", optim.control = list(trace = 0, reltol = 1e-3, maxit = 5000), parallel = FALSE, foldid = NULL) standardGeneric("prestogp_fit"))
 setGeneric("prestogp_predict", function(model, X = "matrix", locs = "matrix", m = "numeric", ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) standardGeneric("prestogp_predict"))
-setGeneric("calc_covparams", function(model, locs, Y) standardGeneric("calc_covparams"))
+setGeneric("calc_covparams", function(model, locs, Y, covparams) standardGeneric("calc_covparams"))
 setGeneric("specify", function(model, ...) standardGeneric("specify"))
 setGeneric("compute_residuals", function(model, Y, Y.hat) standardGeneric("compute_residuals"))
 setGeneric("transform_data", function(model, Y, X) standardGeneric("transform_data"))
@@ -81,6 +81,7 @@ setGeneric("scale_locs", function(model, locs) standardGeneric("scale_locs"))
 setGeneric("theta_names", function(model) standardGeneric("theta_names"))
 setGeneric("transform_covariance_parameters", function(model) standardGeneric("transform_covariance_parameters"))
 setGeneric("check_input", function(model, Y, X, locs) standardGeneric("check_input"))
+setGeneric("check_input_pred", function(model, X, locs) standardGeneric("check_input_pred"))
 
 #' show
 #'
@@ -205,22 +206,35 @@ setMethod(
            optim.control = list(trace = 0, reltol = 1e-3, maxit = 5000),
            parallel = FALSE, foldid = NULL) {
     model <- check_input(model, Y, X, locs)
-    if (!is.double(beta.hat) && !is.null(beta.hat)) {
-      stop("The beta.hat parameter must be floating point number.")
+    if (!is.null(beta.hat)) {
+        if (!is.vector(beta.hat) | !is.numeric(beta.hat)) {
+            stop("beta.hat parameter must be a numeric vector")
+        }
+        if (length(beta.hat) != (ncol(model@X_train) + 1)) {
+            stop("Length of beta.hat must match the number of predictors")
+        }
+        beta.hat <- as.matrix(beta.hat)
     }
-    if (!is.double(tol)) {
-      stop("The tol parameter must be floating point number.")
+    if (!is.numeric(tol)) {
+      stop("tol must be numeric")
     }
-    if (is.null(scaling)) {
-      if (is.matrix(locs)) {
-        scaling <- rep(1, ncol(locs))
-      } else {
-        scaling <- rep(1, ncol(locs[[1]]))
-      }
+    if (length(tol) != 1) {
+      stop("tol must be a scalar")
     }
-    nscale <- length(unique(scaling))
-    if (sum(sort(unique(scaling)) == 1:nscale) < nscale) {
-      stop("scaling must consist of sequential integers between 1 and ncol(locs)")
+    if (tol<=0 | tol>1) {
+      stop("tol must satisfy 0<tol<=1")
+    }
+    if (is.null(scaling)) {
+        scaling <- rep(1, ncol(model@locs_train[[1]]))
+        nscale <- 1
+    } else {
+        if (length(scaling) != ncol(model@locs_train[[1]])) {
+            stop("Length of scaling must equal ncol of locs")
+        }
+        nscale <- length(unique(scaling))
+        if (sum(sort(unique(scaling)) == 1:nscale) < nscale) {
+            stop("scaling must consist of sequential integers starting at 1")
+        }
     }
     if (is.null(apanasovich)) {
       if (nscale == 1) {
@@ -235,14 +249,18 @@ setMethod(
     model@scaling <- scaling
     model@nscale <- nscale
     model@apanasovich <- apanasovich
-    if (is.null(covparams)) {
-      model <- calc_covparams(model, locs, Y)
-    }
-    if (!is.vector(model@covparams)) {
-      stop("The covparams paramter must be a numeric vector.")
+    if (!is.null(covparams)) {
+      if (!is.vector(covparams) | !is.numeric(covparams)) {
+        stop("covparams must be a numeric vector")
+      }
     }
+    model <- calc_covparams(model, locs, Y, covparams)
     if (model@n_neighbors < model@min_m) {
-      stop(paste("M must be at least ", model@min_m, ".", sep = ""))
+      stop(paste("m must be at least ", model@min_m, sep = ""))
+    }
+    if (model@n_neighbors >= nrow(model@Y_train)) {
+      warning("Conditioning set size m chosen to be >=n. Changing to m=n-1")
+      model@n_neighbors <- nrow(model@Y_train) - 1
     }
 
     model <- specify(model)
@@ -390,7 +408,7 @@ setMethod("compute_error", "PrestoGPModel", function(model) {
 #' @param Y the dependent variable matrix
 #'
 #' @return a model with initial covariance parameters
-setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y) {
+setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y, covparams) {
   if (!is.list(locs)) {
     P <- 1
     locs <- list(locs)
@@ -398,27 +416,68 @@ setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y) {
   } else {
     P <- length(locs)
   }
-  col.vars <- rep(NA, P)
-  D.sample.bar <- rep(NA, model@nscale * P)
-  for (i in 1:P) {
-    col.vars[i] <- var(Y[[i]])
-    N <- length(Y[[i]])
-    # TODO find a better way to compute initial spatial range
-    for (j in 1:model@nscale) {
-      d.sample <- sample(1:N, max(2, ceiling(N / 50)), replace = FALSE)
-      D.sample <- rdist(locs[[i]][d.sample, model@scaling == j])
-      D.sample.bar[(i - 1) * model@nscale + j] <- mean(D.sample) / 4
-    }
+  pseq <- create.param.sequence(P, model@nscale)
+  if (is.null(covparams)) {
+      col.vars <- rep(NA, P)
+      D.sample.bar <- rep(NA, model@nscale * P)
+      for (i in 1:P) {
+          col.vars[i] <- var(Y[[i]])
+          N <- length(Y[[i]])
+          # TODO find a better way to compute initial spatial range
+          for (j in 1:model@nscale) {
+              d.sample <- sample(1:N, max(2, ceiling(N / 50)), replace = FALSE)
+              D.sample <- rdist(locs[[i]][d.sample, model@scaling == j])
+              D.sample.bar[(i - 1) * model@nscale + j] <- mean(D.sample) / 4
+          }
+      }
+      model@logparams <- create.initial.values.flex(
+          c(0.9 * col.vars), # marginal variance
+          D.sample.bar, # range
+          rep(0.5, P), # smoothness
+          c(.1 * col.vars), # nuggets
+          rep(0, choose(P, 2)),
+          P
+      )
+  } else {
+      if (P==1) {
+          if (length(covparams) != pseq[4,2]) {
+              stop("Incorrect number of parameters in covparams")
+          }
+      } else {
+          if (length(covparams) != pseq[5,2]) {
+              stop("Incorrect number of parameters in covparams")
+          }
+      }
+      init.var <- covparams[pseq[1,1]:pseq[1,2]]
+      init.range <- covparams[pseq[2,1]:pseq[2,2]]
+      init.smooth <- covparams[pseq[3,1]:pseq[3,2]]
+      init.nugget <- covparams[pseq[4,1]:pseq[4,2]]
+      if (P>1) {
+          init.corr <- covparams[pseq[5,1]:pseq[5,2]]
+      }
+      else {
+          init.corr <- 0
+      }
+      if (sum(init.var<=0)>0) {
+          stop("Initial variance estimates must be positive")
+      }
+      if (sum(init.range<=0)>0) {
+          stop("Initial range estimates must be positive")
+      }
+      if (sum(init.nugget<=0)>0) {
+          stop("Initial nugget estimates must be positive")
+      }
+      if (sum(init.smooth<=0)>0 | sum(init.smooth>=2.5)>0) {
+          stop("Initial smoothness estimates must be between 0 and 2.5")
+      }
+      if (sum(init.corr < -1)>0 | sum(init.corr > 1)>0) {
+          stop("Initial correlation estimates must be between -1 and 1")
+      }
+      model@logparams <- create.initial.values.flex(init.var, init.range,
+                                                    init.smooth, init.nugget,
+                                                    init.corr, P)
   }
-  model@logparams <- create.initial.values.flex(
-    c(0.9 * col.vars), # marginal variance
-    D.sample.bar, # range
-    rep(0.5, P), # smoothness
-    c(.1 * col.vars), # nuggets
-    rep(0, choose(P, 2)),
-    P
-  )
-  model@param_sequence <- create.param.sequence(P, model@nscale)
+  model@param_sequence <- pseq
   model <- transform_covariance_parameters(model)
   invisible(model)
 })
diff --git a/R/PrestoGP_Multivariate_Vecchia.R b/R/PrestoGP_Multivariate_Vecchia.R
index d4e2959..08dab5b 100755
--- a/R/PrestoGP_Multivariate_Vecchia.R
+++ b/R/PrestoGP_Multivariate_Vecchia.R
@@ -50,27 +50,17 @@ setMethod("prestogp_predict", "MultivariateVecchiaModel", function(model, X, loc
   ordering.pred <- match.arg(ordering.pred)
   pred.cond <- match.arg(pred.cond)
   return.values <- match.arg(return.values)
-  if (!is.list(X)) {
-    stop("X parameter must be a list.")
-  }
-  if (!is.list(locs)) {
-    stop("locs parameter must be a list.")
-  }
-  ndx.out <- NULL
-  for (i in 1:length(locs)) {
-    if (nrow(X[[i]]) != nrow(locs[[i]])) {
-      stop("The number of locations must match the number of X observations.")
-    }
-    ndx.out <- c(ndx.out, rep(i, nrow(locs[[i]])))
-  }
-  X <- psych::superMatrix(X)
-  if (ncol(X) != ncol(model@X_train)) {
-    stop("The number of predictors in X must match the training data")
-  }
+  X <- check_input_pred(model, X, locs)
   if (is.null(m)) { # m defaults to the value used for training
     m <- model@n_neighbors
   }
-  stopifnot((m > 0)) # FIXME m is not required by full model
+  if (m < model@min_m) {
+    stop(paste("m must be at least ", model@min_m, sep = ""))
+  }
+  if (m >= nrow(model@X_train)) {
+    warning("Conditioning set size m chosen to be >=n. Changing to m=n-1")
+    m <- nrow(model@X_train) - 1
+  }
 
   # Vecchia prediction at new locations
   Vecchia.Pred <- predict(model@linear_model, newx = X, s = model@linear_model$lambda[model@lambda_1se_idx])
@@ -114,15 +104,13 @@ setMethod("prestogp_predict", "MultivariateVecchiaModel", function(model, X, loc
     return.list <- list(means = Vec.mean)
   } else {
     warning("Variance estimates do not include model fitting variance and are anticonservative. Use with caution.")
-    vec.sds <- sqrt(pred$var.pred)
+    vec.var <- pred$var.pred
     for (i in 1:length(locs)) {
-      vec.sds[ndx.out == i] <- sqrt(vec.sds[ndx.out == i] +
+      vec.sds[ndx.out == i] <- sqrt(vec.var[ndx.out == i] +
         model@covparams[model@param_sequence[4, i]])
     }
-    return.list$sds <- vec.sds
+    return.list <- list(means = Vec.mean, sds = vec.sds)
   }
-  # option to include or exclude theta below
-  #  Vec.sds = sqrt(pred$var.pred + model@covparams[4]) #standard deviation
 
   return(return.list)
 })
@@ -181,6 +169,47 @@ setMethod("check_input", "MultivariateVecchiaModel", function(model, Y, X, locs)
   invisible(model)
 })
 
+setMethod("check_input_pred", "MultivariateVecchiaModel", function(model, X, locs) {
+  if (!is.list(locs)) {
+    stop("locs must be a list for multivariate models")
+  }
+  if (!is.list(X)) {
+    stop("X must be a list for multivariate models")
+  }
+  if (length(locs) != length(X)) {
+    stop("locs and X must have the same length")
+  }
+  if (length(locs) != length(model@locs_train)) {
+    stop("Training and test set locs must have the same length")
+  }
+  for (i in 1:length(locs)) {
+    if (!is.matrix(locs[[i]])) {
+      stop("Each locs must be a matrix")
+    }
+    if (i > 1) {
+      if (ncol(locs[[i]]) != ncol(model@locs_train[[1]])) {
+        stop("All locs must have the same number of columns as locs_train")
+      }
+    }
+    if (!is.matrix(X[[i]])) {
+      stop("Each X must be a matrix")
+    }
+    if (nrow(X[[i]]) != nrow(locs[[i]])) {
+      stop("Each X must have the same number of rows as locs")
+    }
+  }
+  if (length(X) == 1) {
+      X <- X[[1]]
+  }
+  else {
+      X <- psych::superMatrix(X)
+  }
+  if (ncol(X) != ncol(model@X_train)) {
+      stop("X and X_train must have the same number of predictors")
+  }
+  return(X)
+})
+
 setMethod("specify", "MultivariateVecchiaModel", function(model) {
   locs <- model@locs_train
   locs.scaled <- scale_locs(model, locs)
diff --git a/R/PrestoGP_Vecchia.R b/R/PrestoGP_Vecchia.R
index fa6dfb7..ebd28ec 100755
--- a/R/PrestoGP_Vecchia.R
+++ b/R/PrestoGP_Vecchia.R
@@ -42,24 +42,20 @@ setMethod("initialize", "VecchiaModel", function(.Object, n_neighbors = 25, ...)
 #' prediction <- prestogp_predict(model, X.test, locs.test)
 #' Vec.mean <- prediction[[1]]
 #' Vec.sds <- prediction[[2]]
-setMethod("prestogp_predict", "VecchiaModel", function(model, X, locs, m = NULL) {
+setMethod("prestogp_predict", "VecchiaModel", function(model, X, locs, m = NULL, ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) {
   # validate parameters
-  if (!is.matrix(X)) {
-    stop("X parameter must be a matrix.")
-  }
-  if (!is.matrix(locs)) {
-    stop("The locs parameter must be a matrix.")
-  }
-  if (nrow(X) != nrow(locs)) {
-    stop("The number of locations must match the number of X observations.")
-  }
-  if (ncol(locs) != 3) {
-    stop("The locs parameter must have 3 columns.")
-  }
+  ordering.pred <- match.arg(ordering.pred)
+  pred.cond <- match.arg(pred.cond)
+  return.values <- match.arg(return.values)
+  model <- check_input_pred(model, X, locs)
   if (is.null(m)) { # m defaults to the value used for training
     m <- model@n_neighbors
   }
-  if (m == 0) {
+  if (m < model@min_m) {
+    stop(paste("m must be at least ", model@min_m, sep = ""))
+  }
+  if (m >= nrow(model@X_train)) {
+    warning("Conditioning set size m chosen to be >=n. Changing to m=n-1")
     m <- nrow(model@X_train) - 1
   }
 
@@ -73,20 +69,29 @@ setMethod("prestogp_predict", "VecchiaModel", function(model, X, locs, m = NULL)
   # Test set prediction
   res <- model@Y_train - Vecchia.hat
 
-  locs.train.scaled <- scale_locs(model, model@locs_train)
-  locs.scaled <- scale_locs(model, locs)
-  vec.approx.test <- vecchia_specify(locs.train.scaled, m, locs.pred = locs.scaled)
+  locs.train.scaled <- scale_locs(model, model@locs_train)[[1]]
+  locs.scaled <- scale_locs(model, list(locs))[[1]]
+  vec.approx.test <- vecchia_specify(locs.train.scaled, m, locs.pred = locs.scaled, ordering.pred=ordering.pred, pred.cond=pred.cond)
 
-  ## carry out prediction
-  pred <- vecchia_prediction(res, vec.approx.test, c(model@covparams[1], 1, 0.5), model@covparams[4])
+    ## carry out prediction
+    if (!model@apanasovich) {
+        pred <- vecchia_prediction(res, vec.approx.test, c(model@covparams[1], 1, model@covparams[3]), model@covparams[4], return.values=return.values)
+    } else {
+        pred <- vecchia_prediction(res, vec.approx.test, c(model@covparams[1], model@covparams[2], model@covparams[3]), model@covparams[4], return.values=return.values)
+    }
 
   # prediction function can return both mean and sds
   # returns a list with elements mu.pred,mu.obs,var.pred,var.obs,V.ord
   Vec.mean <- pred$mu.pred + Vecchia.Pred # residual + mean trend
-  # option to include or exclude theta below
-  Vec.sds <- sqrt(pred$var.pred + model@covparams[4]) # standard deviation
+  if (return.values == "mean") {
+      return.list <- list(means = Vec.mean)
+  } else {
+      warning("Variance estimates do not include model fitting variance and are anticonservative. Use with caution.")
+      Vec.sds <- sqrt(pred$var.pred + model@covparams[4])
+      return.list <- list(means = Vec.mean, sds = vec.sds)
+  }
 
-  return(list("means" = Vec.mean, "standard deviations" = Vec.sds))
+  return(return.list)
 })
 
 setMethod("check_input", "VecchiaModel", function(model, Y, X, locs) {
@@ -117,6 +122,25 @@ setMethod("check_input", "VecchiaModel", function(model, Y, X, locs) {
   invisible(model)
 })
 
+setMethod("check_input_pred", "VecchiaModel", function(model, X, locs) {
+  if (!is.matrix(locs)) {
+    stop("locs must be a matrix")
+  }
+  if (!is.matrix(X)) {
+    stop("X must be a matrix")
+  }
+  if (ncol(locs) != ncol(model@locs_train[[1]])) {
+      stop("locs must have the same number of columns as locs_train")
+  }
+  if (nrow(X) != nrow(locs)) {
+      stop("X must have the same number of rows as locs")
+  }
+  if (ncol(X) != ncol(model@X_train)) {
+      stop("X and X_train must have the same number of predictors")
+  }
+  invisible(model)
+})
+
 #' specify
 #'
 #' Specify the conditioning set using m nearest neighbors.
diff --git a/R/RcppExports.R b/R/RcppExports.R
old mode 100755
new mode 100644
index 0e90b6b..dd16aaf
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -2,9 +2,10 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 na_omit_c <- function(x) {
-    .Call("_PrestoGP_na_omit_c", PACKAGE = "PrestoGP", x)
+    .Call('_PrestoGP_na_omit_c', PACKAGE = 'PrestoGP', x)
 }
 
 createU_helper_mat <- function(olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning) {
-    .Call("_PrestoGP_createU_helper_mat", PACKAGE = "PrestoGP", olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning)
+    .Call('_PrestoGP_createU_helper_mat', PACKAGE = 'PrestoGP', olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning)
 }
+
diff --git a/man/calc_covparams-PrestoGPModel-method.Rd b/man/calc_covparams-PrestoGPModel-method.Rd
old mode 100644
new mode 100755
index 0384b81..0d49a69
--- a/man/calc_covparams-PrestoGPModel-method.Rd
+++ b/man/calc_covparams-PrestoGPModel-method.Rd
@@ -4,7 +4,7 @@
 \alias{calc_covparams,PrestoGPModel-method}
 \title{calc_covparams}
 \usage{
-\S4method{calc_covparams}{PrestoGPModel}(model, locs, Y)
+\S4method{calc_covparams}{PrestoGPModel}(model, locs, Y, covparams)
 }
 \arguments{
 \item{model}{The model to set the covariance parameters of}
diff --git a/man/prestogp_fit-PrestoGPModel-method.Rd b/man/prestogp_fit-PrestoGPModel-method.Rd
index fdacf59..96f695e 100755
--- a/man/prestogp_fit-PrestoGPModel-method.Rd
+++ b/man/prestogp_fit-PrestoGPModel-method.Rd
@@ -49,18 +49,18 @@ This method fits any PrestoGP model given a matrix of locations, a matrix of ind
 }
 \examples{
 
-US_NO2_Data<- read.csv("data/US_ST_NO2_Data.csv")
-lat       <- US_NO2_Data$Latitude  # Latitude
-lon       <- US_NO2_Data$Longitude # Longitude
-logNO2    <-  log(US_NO2_Data$Y)   # ozone data
-logNO2 <- logNO2[1:1000,]
-time      <- US_NO2_Data$YearFrac  # time in fractional years
-N         <- length(logNO2)
-locs = cbind(lon,lat,time)         # Coordinates in R3 (x,y,t)
-locs <- locs[1:1000,]
-X.Design <- US_NO2_Data[,7:145]    #Design matrix
+US_NO2_Data <- read.csv("data/US_ST_NO2_Data.csv")
+lat <- US_NO2_Data$Latitude # Latitude
+lon <- US_NO2_Data$Longitude # Longitude
+logNO2 <- log(US_NO2_Data$Y) # ozone data
+logNO2 <- logNO2[1:1000, ]
+time <- US_NO2_Data$YearFrac # time in fractional years
+N <- length(logNO2)
+locs <- cbind(lon, lat, time) # Coordinates in R3 (x,y,t)
+locs <- locs[1:1000, ]
+X.Design <- US_NO2_Data[, 7:145] # Design matrix
 X.Design <- scale(X.Design)
-X <- X.Design[1:1000,]
+X <- X.Design[1:1000, ]
 model <- PrestoGPSpatiotemporalModel()
 model <- prestogp_fit(model, logNO2, X, locs)
 ...
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index ce7b2b9..3f445f7 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -7,51 +7,49 @@
 using namespace Rcpp;
 
 #ifdef RCPP_USE_GLOBAL_ROSTREAM
-Rcpp::Rostream<true> &Rcpp::Rcout = Rcpp::Rcpp_cout_get();
-Rcpp::Rostream<false> &Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
 // na_omit_c
 arma::vec na_omit_c(arma::vec x);
-RcppExport SEXP _PrestoGP_na_omit_c(SEXP xSEXP)
-{
-    BEGIN_RCPP
+RcppExport SEXP _PrestoGP_na_omit_c(SEXP xSEXP) {
+BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter<arma::vec>::type x(xSEXP);
+    Rcpp::traits::input_parameter< arma::vec >::type x(xSEXP);
     rcpp_result_gen = Rcpp::wrap(na_omit_c(x));
     return rcpp_result_gen;
-    END_RCPP
+END_RCPP
 }
 // createU_helper_mat
-arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx, const arma::mat &curqys, const arma::mat &curqzs, const arma::mat &vijs, const arma::mat &aijs, const arma::mat &full_const, const arma::vec &nugget, const arma::vec &sig2, const arma::vec &U_beginning);
-RcppExport SEXP _PrestoGP_createU_helper_mat(SEXP olocsSEXP, SEXP ondxSEXP, SEXP curqysSEXP, SEXP curqzsSEXP, SEXP vijsSEXP, SEXP aijsSEXP, SEXP full_constSEXP, SEXP nuggetSEXP, SEXP sig2SEXP, SEXP U_beginningSEXP)
-{
-    BEGIN_RCPP
+arma::sp_mat createU_helper_mat(const arma::mat& olocs, const arma::vec& ondx, const arma::mat& curqys, const arma::mat& curqzs, const arma::mat& vijs, const arma::mat& aijs, const arma::mat& full_const, const arma::vec& nugget, const arma::vec& sig2, const arma::vec& U_beginning);
+RcppExport SEXP _PrestoGP_createU_helper_mat(SEXP olocsSEXP, SEXP ondxSEXP, SEXP curqysSEXP, SEXP curqzsSEXP, SEXP vijsSEXP, SEXP aijsSEXP, SEXP full_constSEXP, SEXP nuggetSEXP, SEXP sig2SEXP, SEXP U_beginningSEXP) {
+BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter<const arma::mat &>::type olocs(olocsSEXP);
-    Rcpp::traits::input_parameter<const arma::vec &>::type ondx(ondxSEXP);
-    Rcpp::traits::input_parameter<const arma::mat &>::type curqys(curqysSEXP);
-    Rcpp::traits::input_parameter<const arma::mat &>::type curqzs(curqzsSEXP);
-    Rcpp::traits::input_parameter<const arma::mat &>::type vijs(vijsSEXP);
-    Rcpp::traits::input_parameter<const arma::mat &>::type aijs(aijsSEXP);
-    Rcpp::traits::input_parameter<const arma::mat &>::type full_const(full_constSEXP);
-    Rcpp::traits::input_parameter<const arma::vec &>::type nugget(nuggetSEXP);
-    Rcpp::traits::input_parameter<const arma::vec &>::type sig2(sig2SEXP);
-    Rcpp::traits::input_parameter<const arma::vec &>::type U_beginning(U_beginningSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type olocs(olocsSEXP);
+    Rcpp::traits::input_parameter< const arma::vec& >::type ondx(ondxSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type curqys(curqysSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type curqzs(curqzsSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type vijs(vijsSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type aijs(aijsSEXP);
+    Rcpp::traits::input_parameter< const arma::mat& >::type full_const(full_constSEXP);
+    Rcpp::traits::input_parameter< const arma::vec& >::type nugget(nuggetSEXP);
+    Rcpp::traits::input_parameter< const arma::vec& >::type sig2(sig2SEXP);
+    Rcpp::traits::input_parameter< const arma::vec& >::type U_beginning(U_beginningSEXP);
     rcpp_result_gen = Rcpp::wrap(createU_helper_mat(olocs, ondx, curqys, curqzs, vijs, aijs, full_const, nugget, sig2, U_beginning));
     return rcpp_result_gen;
-    END_RCPP
+END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_PrestoGP_na_omit_c", (DL_FUNC)&_PrestoGP_na_omit_c, 1},
-    {"_PrestoGP_createU_helper_mat", (DL_FUNC)&_PrestoGP_createU_helper_mat, 10},
-    {NULL, NULL, 0}};
+    {"_PrestoGP_na_omit_c", (DL_FUNC) &_PrestoGP_na_omit_c, 1},
+    {"_PrestoGP_createU_helper_mat", (DL_FUNC) &_PrestoGP_createU_helper_mat, 10},
+    {NULL, NULL, 0}
+};
 
-RcppExport void R_init_PrestoGP(DllInfo *dll)
-{
+RcppExport void R_init_PrestoGP(DllInfo *dll) {
     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
     R_useDynamicSymbols(dll, FALSE);
 }
diff --git a/tests/testthat/sim_multivariate_small.R b/tests/testthat/sim_multivariate_small.R
new file mode 100755
index 0000000..0fbcd93
--- /dev/null
+++ b/tests/testthat/sim_multivariate_small.R
@@ -0,0 +1,93 @@
+ny <- 2 # number of response variables
+p <- 10 # number of predictors for each response
+p.nz <- 4 # number of nonzero predictors for each y
+n.spatial.xy <- 10 # number of spatial coordinates per dimension
+
+library(MASS)
+library(fields)
+library(psych)
+
+beta1 <- c(rep(1, p.nz), rep(0, p - p.nz))
+beta.all <- rep(beta1, ny)
+
+X.st <- list()
+for (i in 1:ny) {
+    set.seed(1212 + i)
+    Sigma.X <- exp(-rdist(sample(1:p)) / 3)
+    X.st[[i]] <- mvrnorm(n.spatial.xy^2, rep(0, p), Sigma.X)
+}
+X.all <- superMatrix(X.st)
+mean.trend.st <- X.all %*% beta.all
+
+n.rho <- choose(ny, 2)
+rho.vec <- runif(n.rho, 0.2, 0.8)
+rho <- matrix(0, nrow = ny, ncol = ny)
+rho[upper.tri(rho)] <- rho.vec
+rho <- rho + t(rho) + diag(1, ny)
+
+marg.smoothness <- 0.5 + rnorm(ny, 0.1, 0.05)
+nuggets <- runif(ny, 0.5, 2)
+x.variance <- runif(ny, 1.5, 4)
+marg.var <- nuggets + x.variance
+ranges <- runif(ny, 0.5, 1.2)
+
+params.all <- c(x.variance, ranges, marg.smoothness, nuggets, rho.vec)
+
+locs.list <- list()
+for (i in 1:ny) {
+    loc1 <- seq(0, 1, length.out = n.spatial.xy) + rnorm(n.spatial.xy, 0, 0.001)
+    loc2 <- seq(0, 1, length.out = n.spatial.xy) + rnorm(n.spatial.xy, 0, 0.001)
+    locs.list[[i]] <- as.matrix(expand.grid(loc1, loc2))
+}
+
+Sigma.All <- matrix(nrow = ny * n.spatial.xy^2, ncol = ny * n.spatial.xy^2)
+for (i in 1:ny) {
+    for (j in i:ny) {
+        if (i == j) {
+            ndx1 <- ((i - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * i)
+            dij <- fields::rdist(locs.list[[i]])
+            Sigma.All[ndx1, ndx1] <- marg.var[i] *
+                fields::Matern(dij,
+                    range = ranges[i],
+                    smoothness = marg.smoothness[i]
+                )
+        } else {
+            ndx1 <- ((i - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * i)
+            ndx2 <- ((j - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * j)
+            dij <- fields::rdist(locs.list[[i]], locs.list[[j]])
+            vii <- marg.smoothness[i]
+            vjj <- marg.smoothness[j]
+            vij <- (vii + vjj) / 2
+            aii <- 1 / ranges[i]
+            ajj <- 1 / ranges[j]
+            aij <- sqrt((aii^2 + ajj^2) / 2)
+            Sigma.All[ndx1, ndx2] <- rho[i, j] * sqrt(marg.var[i]) *
+                sqrt(marg.var[j]) * aii^vii * ajj^vjj * gamma(vij) /
+                (aij^(2 * vij) * sqrt(gamma(vii) * gamma(vjj))) *
+                fields::Matern(dij, smoothness = vij, alpha = aij)
+            Sigma.All[ndx2, ndx1] <- t(Sigma.All[ndx1, ndx2])
+        }
+    }
+}
+
+L.C <- chol(Sigma.All)
+
+st.error <- rnorm(n.spatial.xy^2 * ny) %*% L.C
+
+nug.error <- NULL
+for (i in 1:ny) {
+    nug.error <- c(nug.error, nuggets[i] * rnorm(n.spatial.xy^2))
+}
+
+y.list <- list()
+for (i in 1:ny) {
+    ndx1 <- ((i - 1) * n.spatial.xy^2 + 1):(n.spatial.xy^2 * i)
+    y.list[[i]] <- mean.trend.st[ndx1] + st.error[ndx1] + nug.error[ndx1]
+}
+
+rm(
+    ny, p, p.nz, n.spatial.xy, beta1, i, j, Sigma.X, mean.trend.st, n.rho,
+    loc1, loc2, ndx1, ndx2, dij, vii, vjj, vij, aii, ajj, aij, L.C,
+    st.error, nug.error, X.all, rho, rho.vec, ranges, Sigma.All, nuggets,
+    marg.smoothness, marg.var
+)
diff --git a/tests/testthat/sim_multivariate_small_pred.R b/tests/testthat/sim_multivariate_small_pred.R
new file mode 100755
index 0000000..0b5101e
--- /dev/null
+++ b/tests/testthat/sim_multivariate_small_pred.R
@@ -0,0 +1,106 @@
+set.seed(1234)
+
+ny <- 2 # number of response variables
+p <- 10 # number of predictors for each response
+p.nz <- 4 # number of nonzero predictors for each y
+n.spatial.xy <- 7 # number of spatial coordinates per dimension
+
+library(MASS)
+library(fields)
+library(psych)
+
+beta1 <- c(rep(1,p.nz), rep(0,p-p.nz))
+beta.all <- rep(beta1, ny)
+
+X.st <- list()
+for (i in 1:ny) {
+    Sigma.X <- exp(-rdist(sample(1:p))/3)
+    X.st[[i]] <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X)
+}
+X.all <- superMatrix(X.st)
+mean.trend.st <- X.all %*% beta.all
+
+n.rho <- choose(ny, 2)
+rho.vec <- runif(n.rho, 0.2, 0.8)
+rho <- matrix(0, nrow=ny, ncol=ny)
+rho[upper.tri(rho)] <- rho.vec
+rho <- rho + t(rho) + diag(1, ny)
+
+marg.smoothness <- 0.5 + rnorm(ny, 0.1, 0.05)
+nuggets <- runif(ny, 0.5, 2)
+x.variance <- runif(ny, 1.5, 4)
+marg.var <- nuggets + x.variance
+ranges <- runif(ny, 0.5, 1.2)
+
+params.all <- c(x.variance, ranges, marg.smoothness, nuggets, rho.vec)
+
+locs.list <- list()
+for (i in 1:ny) {
+    loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+    loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+    locs.list[[i]] <- as.matrix(expand.grid(loc1, loc2))
+}
+
+Sigma.All <- matrix(nrow=ny*n.spatial.xy^2, ncol=ny*n.spatial.xy^2)
+for (i in 1:ny) {
+    for (j in i:ny) {
+        if (i==j) {
+            ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
+            dij <- fields::rdist(locs.list[[i]])
+            Sigma.All[ndx1,ndx1] <- marg.var[i] *
+                fields::Matern(dij, range = ranges[i],
+                               smoothness = marg.smoothness[i])
+        }
+        else {
+            ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
+            ndx2 <- ((j-1)*n.spatial.xy^2+1):(n.spatial.xy^2*j)
+            dij <- fields::rdist(locs.list[[i]], locs.list[[j]])
+            vii <- marg.smoothness[i]
+            vjj <- marg.smoothness[j]
+            vij <- (vii+vjj)/2
+            aii <- 1/ranges[i]
+            ajj <- 1/ranges[j]
+            aij <- sqrt((aii^2+ajj^2)/2)
+            Sigma.All[ndx1,ndx2] <- rho[i,j] * sqrt(marg.var[i]) *
+                sqrt(marg.var[j]) * aii^vii * ajj^vjj * gamma(vij) /
+                (aij^(2*vij) * sqrt(gamma(vii) * gamma(vjj))) *
+                fields::Matern(dij, smoothness=vij, alpha=aij)
+            Sigma.All[ndx2,ndx1] <- t(Sigma.All[ndx1,ndx2])
+        }
+    }
+}
+
+L.C <- chol(Sigma.All)
+
+st.error <- rnorm(n.spatial.xy^2 * ny) %*% L.C
+
+nug.error <- NULL
+for (i in 1:ny) {
+    nug.error <- c(nug.error, nuggets[i]*rnorm(n.spatial.xy^2))
+}
+
+y.list <- list()
+locs.list.otr <- list()
+locs.list.otst <- list()
+y.list.otr <- list()
+y.list.otst <- list()
+X.st.otr <- list()
+X.st.otst <- list()
+for (i in 1:ny) {
+    ndx1 <- ((i-1)*n.spatial.xy^2+1):(n.spatial.xy^2*i)
+    y.list[[i]] <- mean.trend.st[ndx1] + st.error[ndx1] + nug.error[ndx1]
+    nn <- n.spatial.xy^2
+    otr <- rep(FALSE, nn)
+    otr[sample(1:nn, size=floor(nn/2))] <- TRUE
+    locs.list.otr[[i]] <- locs.list[[i]][otr,]
+    locs.list.otst[[i]] <- locs.list[[i]][!otr,]
+    y.list.otr[[i]] <- y.list[[i]][otr]
+    y.list.otst[[i]] <- y.list[[i]][!otr]
+    X.st.otr[[i]] <- X.st[[i]][otr,]
+    X.st.otst[[i]] <- X.st[[i]][!otr,]
+}
+
+rm(ny, p, p.nz, n.spatial.xy, beta1, i, j, Sigma.X, mean.trend.st, n.rho,
+   loc1, loc2, ndx1, ndx2, dij, vii, vjj, vij, aii, ajj, aij, L.C,
+   st.error, nug.error, X.all, rho, rho.vec, ranges, Sigma.All, nuggets,
+   marg.smoothness, marg.var, x.variance, nn, otr)
diff --git a/tests/testthat/sim_vecchia.RData b/tests/testthat/sim_vecchia.RData
new file mode 100755
index 0000000..0dfbff0
Binary files /dev/null and b/tests/testthat/sim_vecchia.RData differ
diff --git a/tests/testthat/sim_vecchia_pred.R b/tests/testthat/sim_vecchia_pred.R
new file mode 100755
index 0000000..ffcb97e
--- /dev/null
+++ b/tests/testthat/sim_vecchia_pred.R
@@ -0,0 +1,50 @@
+set.seed(1212)
+
+p <- 10 # number of predictors for each response
+p.nz <- 4 # number of nonzero predictors for each y
+n.spatial.xy <- 50 # number of spatial coordinates per dimension
+
+library(MASS)
+library(fields)
+
+beta1 <- c(rep(1,p.nz), rep(0,p-p.nz))
+
+Sigma.X <- exp(-rdist(sample(1:p))/3)
+X <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X)
+mean.trend.st <- as.vector(X %*% beta1)
+
+marg.smoothness <- 0.5 + rnorm(1, 0.1, 0.05)
+nuggets <- runif(1, 0.5, 2)
+x.variance <- runif(1, 1.5, 4)
+marg.var <- nuggets + x.variance
+ranges <- runif(1, 0.5, 1.2)
+
+params.all <- c(x.variance, ranges, marg.smoothness, nuggets)
+
+
+loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+locs <- as.matrix(expand.grid(loc1, loc2))
+
+Sigma.All <- marg.var*Matern(rdist(locs), range=ranges,
+                             smoothness=marg.smoothness)
+L.C <- chol(Sigma.All)
+
+st.error <- as.vector(rnorm(n.spatial.xy^2) %*% L.C)
+nug.error <- nuggets*rnorm(n.spatial.xy^2)
+y <- mean.trend.st + st.error + nug.error
+
+nn <- n.spatial.xy^2
+otr <- rep(FALSE, nn)
+otr[sample(1:nn, size=floor(nn/2))] <- TRUE
+
+locs.otr <- locs[otr,]
+locs.otst <- locs[!otr,]
+y.otr <- y[otr]
+y.otst <- y[!otr]
+X.otr <- X[otr,]
+X.otst <- X[!otr,]
+
+rm(p, p.nz, n.spatial.xy, beta1, Sigma.X, mean.trend.st, loc1, loc2, L.C,
+   st.error, nug.error, ranges, Sigma.All, nuggets, marg.smoothness, marg.var,
+   x.variance, nn, otr)
diff --git a/tests/testthat/sim_vecchia_small.R b/tests/testthat/sim_vecchia_small.R
new file mode 100755
index 0000000..9c884eb
--- /dev/null
+++ b/tests/testthat/sim_vecchia_small.R
@@ -0,0 +1,39 @@
+set.seed(1212)
+
+p <- 10 # number of predictors for each response
+p.nz <- 4 # number of nonzero predictors for each y
+n.spatial.xy <- 10 # number of spatial coordinates per dimension
+
+library(MASS)
+library(fields)
+
+beta1 <- c(rep(1,p.nz), rep(0,p-p.nz))
+
+Sigma.X <- exp(-rdist(sample(1:p))/3)
+X <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X)
+mean.trend.st <- as.vector(X %*% beta1)
+
+marg.smoothness <- 0.5 + rnorm(1, 0.1, 0.05)
+nuggets <- runif(1, 0.5, 2)
+x.variance <- runif(1, 1.5, 4)
+marg.var <- nuggets + x.variance
+ranges <- runif(1, 0.5, 1.2)
+
+params.all <- c(x.variance, ranges, marg.smoothness, nuggets)
+
+
+loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+locs <- as.matrix(expand.grid(loc1, loc2))
+
+Sigma.All <- marg.var*Matern(rdist(locs), range=ranges,
+                             smoothness=marg.smoothness)
+L.C <- chol(Sigma.All)
+
+st.error <- as.vector(rnorm(n.spatial.xy^2) %*% L.C)
+nug.error <- nuggets*rnorm(n.spatial.xy^2)
+y <- mean.trend.st + st.error + nug.error
+
+rm(p, p.nz, n.spatial.xy, beta1, Sigma.X, mean.trend.st, loc1, loc2, L.C,
+   st.error, nug.error, ranges, Sigma.All, nuggets, marg.smoothness, marg.var,
+   x.variance)
diff --git a/tests/testthat/sim_vecchia_small_pred.R b/tests/testthat/sim_vecchia_small_pred.R
new file mode 100755
index 0000000..7f25507
--- /dev/null
+++ b/tests/testthat/sim_vecchia_small_pred.R
@@ -0,0 +1,50 @@
+set.seed(1212)
+
+p <- 10 # number of predictors for each response
+p.nz <- 4 # number of nonzero predictors for each y
+n.spatial.xy <- 20 # number of spatial coordinates per dimension
+
+library(MASS)
+library(fields)
+
+beta1 <- c(rep(1,p.nz), rep(0,p-p.nz))
+
+Sigma.X <- exp(-rdist(sample(1:p))/3)
+X <- mvrnorm(n.spatial.xy^2,rep(0,p),Sigma.X)
+mean.trend.st <- as.vector(X %*% beta1)
+
+marg.smoothness <- 0.5 + rnorm(1, 0.1, 0.05)
+nuggets <- runif(1, 0.5, 2)
+x.variance <- runif(1, 1.5, 4)
+marg.var <- nuggets + x.variance
+ranges <- runif(1, 0.5, 1.2)
+
+params.all <- c(x.variance, ranges, marg.smoothness, nuggets)
+
+
+loc1 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+loc2 <- seq(0,1,length.out = n.spatial.xy)+rnorm(n.spatial.xy,0,0.001)
+locs <- as.matrix(expand.grid(loc1, loc2))
+
+Sigma.All <- marg.var*Matern(rdist(locs), range=ranges,
+                             smoothness=marg.smoothness)
+L.C <- chol(Sigma.All)
+
+st.error <- as.vector(rnorm(n.spatial.xy^2) %*% L.C)
+nug.error <- nuggets*rnorm(n.spatial.xy^2)
+y <- mean.trend.st + st.error + nug.error
+
+nn <- n.spatial.xy^2
+otr <- rep(FALSE, nn)
+otr[sample(1:nn, size=floor(nn/2))] <- TRUE
+
+locs.otr <- locs[otr,]
+locs.otst <- locs[!otr,]
+y.otr <- y[otr]
+y.otst <- y[!otr]
+X.otr <- X[otr,]
+X.otst <- X[!otr,]
+
+rm(p, p.nz, n.spatial.xy, beta1, Sigma.X, mean.trend.st, loc1, loc2, L.C,
+   st.error, nug.error, ranges, Sigma.All, nuggets, marg.smoothness, marg.var,
+   x.variance, nn, otr)
diff --git a/tests/testthat/test-Log_Likelihood.R b/tests/testthat/test-Log_Likelihood.R
index 2933641..d1ddb03 100755
--- a/tests/testthat/test-Log_Likelihood.R
+++ b/tests/testthat/test-Log_Likelihood.R
@@ -1,28 +1,72 @@
+test_that("negloglik_vecchia", {
+    set.seed(1234)
+
+    y.orig <- rnorm(100)
+
+    locs.dim1 <- seq(1, 10, length = 10) + rnorm(10, 0, 0.1)
+    locs.dim2 <- seq(1, 10, length = 10) + rnorm(10, 0, 0.1)
+    locs <- as.matrix(expand.grid(locs.dim1, locs.dim2))
+
+    covmat.true <- Matern(rdist(locs), range = 1, smoothness = 0.5) + diag(100)
+
+    y <- y.orig %*% chol(covmat.true)
+    y <- as.vector(y)
+
+    logparams <- create.initial.values.flex(0.9, 0.5, 0.5, 0.1, 1, 1)
+    pseq <- create.param.sequence(1)
+    vec.approx <- vecchia_specify(locs, 5)
+    LL.vecchia <- negloglik_vecchia(logparams, y, vec.approx, pseq)
+    expect_equal(194.75, LL.vecchia, tolerance=1e-2)
+
+    vec.mapprox <- vecchia_Mspecify(list(locs), 5)
+    LL.mvecchia <- mvnegloglik(logparams, vec.mapprox, y, pseq, 1)
+    # Univariate likelihood should equal multivariate likelihood
+    expect_equal(LL.vecchia, LL.mvecchia, tolerance=1e-1)
+
+    logparams2 <- create.initial.values.flex(0.9, 1, 0.5, 0.1, 1, 1)
+    vec.approx2 <- vec.approx
+    vec.approx2$locsord <- vec.approx$locsord / 0.5
+    LL.vecchia2 <- negloglik_vecchia(logparams2, y, vec.approx2, pseq)
+    LL.vecchia.st <- negloglik_vecchia_ST(logparams, y, vec.approx, pseq, 1, 1)
+    vec.mapprox2 <- vec.mapprox
+    vec.mapprox2$locsord <- vec.mapprox$locsord / 0.5
+    LL.mvecchia2 <- mvnegloglik_ST(logparams2, vec.mapprox2, y, pseq, 1, 1, 1)
+
+    # Likelihood should equal spatiotemporal likelihood after scaling locs
+    expect_equal(LL.vecchia, LL.vecchia2, tolerance=1e-3)
+    expect_equal(LL.vecchia, LL.vecchia.st, tolerance=1e-3)
+    expect_equal(LL.vecchia, LL.mvecchia2, tolerance=1e-1)
+})
+
 test_that("negloglik_vecchia_ST", {
-    set.seed(7919)
-    load("small_sim.Rdata")
-    return(1) # this test isn't useful at the moment
-    params <- c(0.5, 0.5, 0.5, 0.5)
-    locs <- locs_train
-    X <- X_train
-    Y <- Y_train
-    locs.scaled <- locs / c(params[2], params[2], params[3])
-    vecchia_approx <- vecchia_specify(locs.scaled, 25)
-    beta.hat <- rep(0, ncol(X))
-    Y.hat <- as.matrix(X) %*% beta.hat
-    res <- as.double(Y - Y.hat)
-    ord_locs <- locs[vecchia_approx$ord, ]
-    locs_mat <- cbind(ord_locs[, 1], ord_locs[, 2], ord_locs[, 3])
-    result <- negloglik_vecchia_ST(log(params), locs_mat, res, vecchia_approx)
-    expect_equal(1597.9808688, result, tolerance = 10e-5)
-    vecchia.result <- optim(
-        par = log(params), fn = negloglik_vecchia_ST,
-        locs = locs_mat, res = res, vecchia.approx = vecchia_approx, method = "Nelder-Mead",
-        control = list(trace = 0)
-    )
-    params_optim <- exp(vecchia.result$par)
-    result_optim <- negloglik_vecchia_ST(log(params_optim), locs_mat, res, vecchia_approx)
-    expect_equal(-6861.65048095, result_optim, tolerance = 1e-5)
+    set.seed(1234)
+
+    y.orig <- rnorm(125)
+
+    locs.dim1 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1)
+    locs.dim2 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1)
+    locs.dim3 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1)
+    locs <- as.matrix(expand.grid(locs.dim1, locs.dim2, locs.dim3))
+
+    covmat.true <- Matern(rdist(locs), range = 1, smoothness = 0.5) + diag(125)
+
+    y <- y.orig %*% chol(covmat.true)
+    y <- as.vector(y)
+
+    logparams <- create.initial.values.flex(0.9, c(2, 3), 0.5, 0.1, 1, 1)
+    pseq <- create.param.sequence(1, 2)
+    vec.approx <- vecchia_specify(locs, 5)
+    LL.vecchia <- negloglik_vecchia_ST(logparams, y, vec.approx, pseq,
+                                       c(1, 1, 2), 2)
+    expect_equal(284.73, LL.vecchia, tolerance=1e-2)
+
+    logparams2 <- create.initial.values.flex(0.9, 1, 0.5, 0.1, 1, 1)
+    pseq2 <- create.param.sequence(1)
+    vec.approx2 <- vec.approx
+    vec.approx2$locsord[,1:2] <- vec.approx$locsord[,1:2] / 2
+    vec.approx2$locsord[,3] <- vec.approx$locsord[,3] / 3
+    LL.vecchia2 <- negloglik_vecchia(logparams2, y, vec.approx2, pseq2)
+    expect_equal(LL.vecchia, LL.vecchia2, tolerance=1e-3)
 })
 
 test_that("negloglik.full", {
@@ -93,6 +137,36 @@ test_that("negloglik.full", {
     expect_equal(LL.full, LL.pgp, tolerance = 1e-3)
 })
 
+test_that("negloglik_full_ST", {
+    set.seed(1234)
+
+    y.orig <- rnorm(125)
+
+    locs.dim1 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1)
+    locs.dim2 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1)
+    locs.dim3 <- seq(1, 5, length = 5) + rnorm(5, 0, 0.1)
+    locs <- as.matrix(expand.grid(locs.dim1, locs.dim2, locs.dim3))
+
+    covmat.true <- Matern(rdist(locs), range = 1, smoothness = 0.5) + diag(125)
+
+    y <- y.orig %*% chol(covmat.true)
+    y <- as.vector(y)
+
+    logparams <- create.initial.values.flex(0.9, c(2, 3), 0.5, 0.1, 1, 1)
+    pseq <- create.param.sequence(1, 2)
+    LL.full <- negloglik_full_ST(logparams, locs, y, pseq,
+                                 c(1, 1, 2), 2)
+    expect_equal(286.84, LL.full, tolerance=1e-2)
+
+    logparams2 <- create.initial.values.flex(0.9, 1, 0.5, 0.1, 1, 1)
+    pseq2 <- create.param.sequence(1)
+    locs2 <- locs
+    locs2[,1:2] <- locs[,1:2] / 2
+    locs2[,3] <- locs[,3] / 3
+    LL.full2 <- negloglik.full(logparams2, rdist(locs2), y, pseq2)
+    expect_equal(LL.full, LL.full2, tolerance=1e-3)
+})
+
 test_that("mvnegloglik", {
     load("sim_multivariate_big.RData")
     set.seed(1234)
diff --git a/tests/testthat/test-PrestoGP_Model.R b/tests/testthat/test-PrestoGP_Model.R
new file mode 100755
index 0000000..a13cc6c
--- /dev/null
+++ b/tests/testthat/test-PrestoGP_Model.R
@@ -0,0 +1,155 @@
+test_that("beta.hat not numeric", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, beta.hat="foo"),
+               "beta.hat parameter must be a numeric vector")
+})
+
+test_that("beta.hat not a vector", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, beta.hat=matrix(1:3)),
+               "beta.hat parameter must be a numeric vector")
+})
+
+test_that("beta.hat incorrect length", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, beta.hat=1:3),
+               "Length of beta.hat must match the number of predictors")
+})
+
+test_that("tol not numeric", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, tol="foo"),
+               "tol must be numeric")
+})
+
+test_that("tol not a scalar", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, tol=1:2),
+               "tol must be a scalar")
+})
+
+test_that("tol out of range", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, tol=1.1),
+               "tol must satisfy 0<tol<=1")
+})
+
+test_that("length(scaling) != ncol(locs)", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, scaling=1:5),
+               "Length of scaling must equal ncol of locs")
+})
+
+test_that("scaling not sequential", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, scaling=c(1,3)),
+               "scaling must consist of sequential integers starting at 1")
+})
+
+test_that("Invalid Apanasovich model", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, scaling=1:2,
+                            apanasovich=TRUE),
+               "Apanasovich models require a common scale parameter")
+})
+
+test_that("covparams not numeric", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, covparams="foo"),
+               "covparams must be a numeric vector")
+})
+
+test_that("covparams not numeric", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, covparams="foo"),
+               "covparams must be a numeric vector")
+})
+
+test_that("covparams not a vector", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, covparams=matrix(1:3)),
+               "covparams must be a numeric vector")
+})
+
+test_that("covparams incorrect length", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs, covparams=1:3),
+               "Incorrect number of parameters in covparams")
+})
+
+test_that("covparams incorrect length - multivariate", {
+  source("sim_multivariate_small.R")
+  pgp.mmodel1 <- new("MultivariateVecchiaModel")
+  expect_error(prestogp_fit(pgp.mmodel1, y.list, X.st, locs.list,
+                            covparams=c(0.9, 0.9, 0.5, 0.5, 0.5, 0.5,
+                                        0.1, 0.1)),
+               "Incorrect number of parameters in covparams")
+})
+
+test_that("Invalid initial variance estimate", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs,
+                            covparams=c(-0.9, 0.5, 0.5, 0.1)),
+               "Initial variance estimates must be positive")
+})
+
+test_that("Invalid initial range estimate", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs,
+                            covparams=c(0.9, -0.5, 0.5, 0.1)),
+               "Initial range estimates must be positive")
+})
+
+test_that("Invalid initial nugget estimate", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs,
+                            covparams=c(0.9, 0.5, 0.5, -0.1)),
+               "Initial nugget estimates must be positive")
+})
+
+test_that("Invalid initial smoothness estimate", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel")
+  expect_error(prestogp_fit(pgp.model1, y, X, locs,
+                            covparams=c(0.9, 0.5, -0.5, 0.1)),
+               "Initial smoothness estimates must be between 0 and 2.5")
+})
+
+test_that("Invalid initial correlation estimate", {
+  source("sim_multivariate_small.R")
+  pgp.mmodel1 <- new("MultivariateVecchiaModel")
+  expect_error(prestogp_fit(pgp.mmodel1, y.list, X.st, locs.list,
+                            covparams=c(0.9, 0.9, 0.5, 0.5, 0.5, 0.5,
+                                        0.1, 0.1, -1.2)),
+               "Initial correlation estimates must be between -1 and 1")
+})
+
+test_that("m too small", {
+  source("sim_multivariate_small.R")
+  pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=2)
+  expect_error(prestogp_fit(pgp.mmodel1, y.list, X.st, locs.list),
+               "m must be at least 3")
+})
+
+test_that("m too large", {
+  source("sim_vecchia_small.R")
+  pgp.model1 <- new("VecchiaModel", n_neighbors=101)
+  expect_warning(prestogp_fit(pgp.model1, y, X, locs),
+                 "Conditioning set size m chosen to be >=n. Changing to m=n-1")
+})
diff --git a/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R b/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R
index 5f1a64e..7a2a58a 100755
--- a/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R
+++ b/tests/testthat/test-PrestoGP_Multivariate_Vecchia_Model.R
@@ -142,6 +142,147 @@ test_that("Simulated dataset multivariate spatiotemporal", {
     ), tolerance = 1.1)
 })
 
+test_that("Invalid locs input for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, as.matrix(1:3)),
+                 "locs must be a list for multivariate models")
+})
+
+test_that("Invalid X input for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, as.matrix(1:3), locs.list.otst),
+                 "X must be a list for multivariate models")
+})
+
+test_that("locs/X length mismatch for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, list(as.matrix(1:3))),
+                 "locs and X must have the same length")
+})
+
+test_that("locs/locs_train length mismatch", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, list(1:3, 1:3, 1:3),
+                                  list(1:3, 1:3, 1:3)),
+                 "Training and test set locs must have the same length")
+})
+
+test_that("locs not a matrix for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, list(as.matrix(1:3),
+                                                    as.matrix(1:3)),
+                                  list(1:3, as.matrix(1:3))),
+                 "Each locs must be a matrix")
+})
+
+test_that("ncol(locs) != ncol(locs_train)", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, list(as.matrix(1:3),
+                                                    as.matrix(1:3)),
+                                  list(as.matrix(1:3), as.matrix(1:3))),
+                 "All locs must have the same number of columns as locs_train")
+})
+
+test_that("X not a matrix for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    X.st.otst[[1]] <- as.vector(X.st.otst[[1]])
+    expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst),
+                 "Each X must be a matrix")
+})
+
+test_that("nrow(X) != nrow(locs) for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    locs.list.otst[[1]] <- rbind(locs.list.otst[[1]], rep(0, 2))
+    expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst),
+                 "Each X must have the same number of rows as locs")
+})
+
+test_that("ncol(X) != ncol(X_train) for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    X.st.otst[[1]] <- cbind(X.st.otst[[1]], rep(0, nrow(X.st.otst[[1]])))
+    expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst),
+                 "X and X_train must have the same number of predictors")
+})
+
+test_that("m too small for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_error(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst, m=2),
+                 "m must be at least 3")
+})
+
+test_that("m too large for prediction", {
+    source("sim_multivariate_small_pred.R")
+    pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors=5)
+    pgp.mmodel1 <- prestogp_fit(pgp.mmodel1, y.list.otr, X.st.otr,
+                                locs.list.otr, scaling=c(1,1),
+                                apanasovich=TRUE, verbose=FALSE,
+                                optim.control=list(trace=0,maxit=5000,
+                                                   reltol=1e-3))
+    expect_warning(prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst,
+                                    m=51),
+                   "Conditioning set size m chosen to be >=n. Changing to m=n-1")
+})
+
 test_that("Simulated dataset multivariate spatial prediction", {
     source("sim_multivariate_big_pred.R")
     pgp.mmodel1 <- new("MultivariateVecchiaModel", n_neighbors = 25)
@@ -158,6 +299,8 @@ test_that("Simulated dataset multivariate spatial prediction", {
     pgp.mmodel1.pred <- prestogp_predict(pgp.mmodel1, X.st.otst, locs.list.otst)
 
     mse <- mean((pgp.mmodel1.pred$means - unlist(y.list.otst))^2)
+    me <- mean(pgp.mmodel1.pred$means - unlist(y.list.otst))
 
     expect_equal(mse, 1.99, tolerance = 0.1)
+    expect_equal(me, -0.04, tolerance = 0.03)
 })
diff --git a/tests/testthat/test-PrestoGP_Univariate_Model.R b/tests/testthat/test-PrestoGP_Univariate.R
similarity index 53%
rename from tests/testthat/test-PrestoGP_Univariate_Model.R
rename to tests/testthat/test-PrestoGP_Univariate.R
index 53a56b1..3ee7435 100755
--- a/tests/testthat/test-PrestoGP_Univariate_Model.R
+++ b/tests/testthat/test-PrestoGP_Univariate.R
@@ -41,7 +41,7 @@ test_that("nrow(Y) != nrow(X)", {
 })
 
 test_that("Simulated dataset spatial", {
-    source("sim_vecchia.R")
+    load("sim_vecchia.RData")
     pgp.model1 <- new("VecchiaModel", n_neighbors=25)
     pgp.model1 <- prestogp_fit(pgp.model1, y, X, locs,
                                scaling=c(1,1), apanasovich=TRUE, verbose=FALSE,
@@ -132,3 +132,111 @@ test_that("Simulated dataset spatiotemporal", {
     expect_equal(params.out[4], params.out2[4], tolerance=0.3)
     expect_equal(params.out[5], params.out2[5], tolerance=0.1)
 })
+
+test_that("Invalid locs input for prediction", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    expect_error(prestogp_predict(pgp.model1, X.otst, 1:3),
+                 "locs must be a matrix")
+})
+
+test_that("Invalid X input for prediction", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    expect_error(prestogp_predict(pgp.model1, 1:3, locs.otst),
+                 "X must be a matrix")
+})
+
+test_that("ncol(locs) != ncol(locs_train)", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    locs.otst <- cbind(locs.otst, rep(1, nrow(locs.otst)))
+    expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst),
+                 "locs must have the same number of columns as locs_train")
+})
+
+test_that("nrow(X) != nrow(locs) for prediction", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    X.otst <- rbind(X.otst, rep(1, ncol(X.otst)))
+    expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst),
+                 "X must have the same number of rows as locs")
+})
+
+test_that("ncol(X) != ncol(X_train) for prediction", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    X.otst <- cbind(X.otst, rep(1, nrow(X.otst)))
+    expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst),
+                 "X and X_train must have the same number of predictors")
+})
+
+test_that("m too small for prediction", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    expect_error(prestogp_predict(pgp.model1, X.otst, locs.otst, m=2),
+                 "m must be at least 3")
+})
+
+test_that("m too large for prediction", {
+    source("sim_vecchia_small_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors=5)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr,
+                               locs.otr, scaling=c(1,1),
+                               apanasovich=TRUE, verbose=FALSE,
+                               optim.control=list(trace=0,maxit=5000,
+                                                  reltol=1e-3))
+    expect_warning(prestogp_predict(pgp.model1, X.otst, locs.otst, m=201),
+                   "Conditioning set size m chosen to be >=n. Changing to m=n-1")
+})
+
+test_that("Simulated spatial prediction", {
+    source("sim_vecchia_pred.R")
+    pgp.model1 <- new("VecchiaModel", n_neighbors = 25)
+    pgp.model1 <- prestogp_fit(pgp.model1, y.otr, X.otr, locs.otr,
+        scaling = c(1, 1),
+        apanasovich = TRUE, verbose = FALSE,
+        optim.control = list(
+            trace = 0, maxit = 5000,
+            reltol = 1e-3
+        )
+    )
+
+    pgp.model1.pred <- prestogp_predict(pgp.model1, X.otst, locs.otst)
+
+    mse <- mean((pgp.model1.pred$means - y.otst)^2)
+    me <- mean(pgp.model1.pred$means - y.otst)
+
+    expect_equal(mse, 2.24, tolerance = 0.07)
+    expect_equal(me, 0.03, tolerance = 0.05)
+})