Skip to content

Commit

Permalink
Merge branch 'main' of sciome-bot-git:Spatiotemporal-Exposures-and-To…
Browse files Browse the repository at this point in the history
…xicology/PrestoGP into build-workflow
  • Loading branch information
sciome-bot committed Jan 12, 2024
2 parents 5355e24 + c060815 commit 245eaa3
Show file tree
Hide file tree
Showing 26 changed files with 1,149 additions and 183 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
^\.Rproj\.user$
^doc$
^Meta$
^\.github/
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ manuscript/

# Output files from R CMD build
/*.tar.gz
*.dll

# Output files from R CMD check
/*.Rcheck/
Expand Down
15 changes: 11 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,18 @@ Imports:
knitr,
dplyr,
glmnet,
rmarkdown,
markdown,
gtools,
geoR,
doParallel,
RANN,
stats,
methods,
foreach,
rlang,
methods
mvtnorm,
spam,
psych,
doParallel,
covr
License: GPL-3
Encoding: UTF-8
VignetteBuilder: knitr
Expand All @@ -62,3 +65,7 @@ Collate:
'Visualization.R'
'package.R'
'utils-tidy-eval.R'
Suggests:
roxygen2,
testthat (>= 3.0.0)
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import(glmnet)
import(ncvreg)
import(readxl)
import(scoringRules)
import(covr)
importFrom(aod,wald.test)
importFrom(dplyr,"%>%")
importFrom(foreach,"%dopar%")
Expand Down
32 changes: 22 additions & 10 deletions R/Log_Likelihood.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@
#'
#' @examples
#' @noRd
negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scaling, nscale) {
negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq,
scaling, nscale) {
parms <- unlog.params(logparms, param.seq, 1)
locs.scaled <- vecchia.approx$locsord
for (j in 1:nscale) {
locs.scaled[, scaling == j] <- locs.scaled[, scaling == j] /
parms[param.seq[2, 1] + j - 1]
}
vecchia.approx$locsord <- locs.scaled
-vecchia_likelihood(res, vecchia.approx, c(parms[1], 1, parms[param.seq[3, 1]]), parms[param.seq[4, 1]])
-vecchia_likelihood(res, vecchia.approx, c(parms[1], 1,
parms[param.seq[3, 1]]),
parms[param.seq[4, 1]])
}

#' negloglik_vecchia
Expand All @@ -39,7 +42,8 @@ negloglik_vecchia_ST <- function(logparms, res, vecchia.approx, param.seq, scali
#' @noRd
negloglik_vecchia <- function(logparms, res, vecchia.approx, param.seq) {
parms <- unlog.params(logparms, param.seq, 1)
-vecchia_likelihood(res, vecchia.approx, c(parms[1], parms[2], parms[3]), parms[4])
-vecchia_likelihood(res, vecchia.approx, c(parms[1], parms[2], parms[3]),
parms[4])
}

#' negloglik_full_ST
Expand Down Expand Up @@ -91,7 +95,8 @@ negloglik.full <- function(logparams, d, y, param.seq) {
params <- unlog.params(logparams, param.seq, 1)
# d <- fields::rdist(locs)
N <- nrow(d)
cov.mat <- params[1] * fields::Matern(d, range = params[2], smoothness = params[3]) +
cov.mat <- params[1] * fields::Matern(d, range = params[2],
smoothness = params[3]) +
params[4] * diag(N)
return(-1 * mvtnorm::dmvnorm(y, rep(0, N), cov.mat, log = TRUE))
}
Expand Down Expand Up @@ -128,7 +133,8 @@ mvnegloglik <- function(logparams, vecchia.approx, y, param.seq, P) {
##############################################################################
### Flexible Spatiotemporal Multivariate Matern Negative Loglikelihood Function ###########

mvnegloglik_ST <- function(logparams, vecchia.approx, y, param.seq, P, scaling, nscale) {
mvnegloglik_ST <- function(logparams, vecchia.approx, y, param.seq, P, scaling,
nscale) {
# Input-
# logparams: A numeric vector of length (4*P)+(4*choose(P,2)).
# To construct these parameters we unlist a list of the 7 covariance
Expand Down Expand Up @@ -236,11 +242,15 @@ create.cov.upper.flex <- function(P, marg.var, marg.range, marg.smooth,
j <- combs[iter, 2]

smoothness.mat[i, j] <- (marg.smooth[i] + marg.smooth[j]) / 2
range.mat[i, j] <- 1 / sqrt(((1 / marg.range[i])^2 + (1 / marg.range[j])^2) / 2)
range.mat[i, j] <- 1 / sqrt(((1 / marg.range[i])^2 +
(1 / marg.range[j])^2) / 2)

s1 <- sqrt(marg.var[i] * marg.var[j])
s2 <- ((1 / marg.range[i])^marg.smooth[i] * (1 / marg.range[j])^marg.smooth[j]) / ((1 / range.mat[i, j])^(2 * smoothness.mat[i, j]))
s3 <- gamma(smoothness.mat[i, j]) / (sqrt(gamma(marg.smooth[i])) * sqrt(gamma(marg.smooth[j])))
s2 <- ((1 / marg.range[i])^marg.smooth[i] *
(1 / marg.range[j])^marg.smooth[j]) /
((1 / range.mat[i, j])^(2 * smoothness.mat[i, j]))
s3 <- gamma(smoothness.mat[i, j]) / (sqrt(gamma(marg.smooth[i])) *
sqrt(gamma(marg.smooth[j])))
s4 <- R.corr[iter]
sig2.mat[i, j] <- s1 * s2 * s3 * s4
}
Expand Down Expand Up @@ -282,10 +292,12 @@ cat.covariances <- function(locs.list, sig2, range, smoothness, nugget) {
# Calculate the covariance matrix - if/then based on its location in the super-matrix
N <- nrow(d)
if (i == j) { # To accomodate varying size outcomes- the nugget is not included on cross-covariances
cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = smoothness[i, j]) +
cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa =
smoothness[i, j]) +
nugget[i, j] * diag(N)
} else {
cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa = smoothness[i, j])
cov.mat.ij <- sig2[i, j] * geoR::matern(d, phi = range[i, j], kappa =
smoothness[i, j])
}


Expand Down
22 changes: 16 additions & 6 deletions R/PrestoGP_CreateU_Multivariate.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ max_min_ordering <- function(locs, dist_func) {
#' @param dist_func Any distance function with a signature of dist(query_location, locations_matrix)
#'
#' @return A vector containing the indices of the neighbors
knn_indices <- function(ordered_locs, query, n_neighbors, dist_func, dist_func_code) {
knn_indices <- function(ordered_locs, query, n_neighbors,
dist_func, dist_func_code) {
if (dist_func_code == "custom") {
dists <- dist_func(query, ordered_locs)
dists_order <- order(dists)
Expand All @@ -90,25 +91,34 @@ knn_indices <- function(ordered_locs, query, n_neighbors, dist_func, dist_func_c
#' @param dist_func Any distance function with a signature of dist(query_location, locations_matrix)
#'
#' @return A list containing two matrices, each with one row per location: an indices matrix with the indices of nearest neighbors for each location, and a distance matrix with the associated distances
sparseNN <- function(ordered_locs, n_neighbors, dist_func, dist_func_code, ordered_locs_pred = NULL) {
sparseNN <- function(ordered_locs, n_neighbors,
dist_func, dist_func_code, ordered_locs_pred = NULL) {
ee <- min(apply(ordered_locs, 2, stats::sd))
n <- nrow(ordered_locs)
ordered_locs <- ordered_locs + matrix(
ee * 1e-04 *
stats::rnorm(n * ncol(ordered_locs)),
n, ncol(ordered_locs)
)
indices_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), ncol = n_neighbors)
distances_matrix <- matrix(data = NA, nrow = nrow(ordered_locs), ncol = n_neighbors)
indices_matrix <- matrix(data = NA, nrow = nrow(ordered_locs),
ncol = n_neighbors)
distances_matrix <- matrix(data = NA, nrow = nrow(ordered_locs),
ncol = n_neighbors)
for (row in 1:n_neighbors) {
# for the locations from 1 to n_neighbors, use the entire locs list to find the neighbors
nn <- knn_indices(ordered_locs[1:(n_neighbors + 1), , drop = FALSE][-row, , drop = FALSE], ordered_locs[row, , drop = FALSE], n_neighbors, dist_func, dist_func_code)
nn <- knn_indices(ordered_locs[1:
(n_neighbors + 1), , drop = FALSE][-row, ,
drop = FALSE],
ordered_locs[row, , drop = FALSE], n_neighbors,
dist_func, dist_func_code)
indices_matrix[row, 1:n_neighbors] <- nn$indices[1:n_neighbors]
distances_matrix[row, 1:n_neighbors] <- nn$distances[1:n_neighbors]
}
for (row in (n_neighbors + 1):nrow(ordered_locs)) {
# get the m nearest neighbors from the locs before this one in the max-min order
nn <- knn_indices(ordered_locs[1:(row - 1), , drop = FALSE], ordered_locs[row, , drop = FALSE], n_neighbors, dist_func, dist_func_code)
nn <- knn_indices(ordered_locs[1:(row - 1), , drop = FALSE],
ordered_locs[row, , drop = FALSE], n_neighbors,
dist_func, dist_func_code)
indices_matrix[row, 1:n_neighbors] <- nn$indices[1:n_neighbors]
distances_matrix[row, 1:n_neighbors] <- nn$distances[1:n_neighbors]
}
Expand Down
143 changes: 101 additions & 42 deletions R/PrestoGP_Model.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ setMethod("initialize", "PrestoGPModel", function(.Object, ...) {
setGeneric("show_theta", function(object, Y_names) standardGeneric("show_theta"))
setGeneric("prestogp_fit", function(model, Y, X, locs, scaling = NULL, apanasovich = FALSE, covparams = NULL, beta.hat = NULL, tol = 0.999999, max_iters = 100, verbose = FALSE, optim.method = "Nelder-Mead", optim.control = list(trace = 0, reltol = 1e-3, maxit = 5000), parallel = FALSE, foldid = NULL) standardGeneric("prestogp_fit"))
setGeneric("prestogp_predict", function(model, X = "matrix", locs = "matrix", m = "numeric", ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) standardGeneric("prestogp_predict"))
setGeneric("calc_covparams", function(model, locs, Y) standardGeneric("calc_covparams"))
setGeneric("calc_covparams", function(model, locs, Y, covparams) standardGeneric("calc_covparams"))
setGeneric("specify", function(model, ...) standardGeneric("specify"))
setGeneric("compute_residuals", function(model, Y, Y.hat) standardGeneric("compute_residuals"))
setGeneric("transform_data", function(model, Y, X) standardGeneric("transform_data"))
Expand All @@ -81,6 +81,7 @@ setGeneric("scale_locs", function(model, locs) standardGeneric("scale_locs"))
setGeneric("theta_names", function(model) standardGeneric("theta_names"))
setGeneric("transform_covariance_parameters", function(model) standardGeneric("transform_covariance_parameters"))
setGeneric("check_input", function(model, Y, X, locs) standardGeneric("check_input"))
setGeneric("check_input_pred", function(model, X, locs) standardGeneric("check_input_pred"))

#' show
#'
Expand Down Expand Up @@ -205,22 +206,35 @@ setMethod(
optim.control = list(trace = 0, reltol = 1e-3, maxit = 5000),
parallel = FALSE, foldid = NULL) {
model <- check_input(model, Y, X, locs)
if (!is.double(beta.hat) && !is.null(beta.hat)) {
stop("The beta.hat parameter must be floating point number.")
if (!is.null(beta.hat)) {
if (!is.vector(beta.hat) | !is.numeric(beta.hat)) {
stop("beta.hat parameter must be a numeric vector")
}
if (length(beta.hat) != (ncol(model@X_train) + 1)) {
stop("Length of beta.hat must match the number of predictors")
}
beta.hat <- as.matrix(beta.hat)
}
if (!is.double(tol)) {
stop("The tol parameter must be floating point number.")
if (!is.numeric(tol)) {
stop("tol must be numeric")
}
if (is.null(scaling)) {
if (is.matrix(locs)) {
scaling <- rep(1, ncol(locs))
} else {
scaling <- rep(1, ncol(locs[[1]]))
}
if (length(tol) != 1) {
stop("tol must be a scalar")
}
nscale <- length(unique(scaling))
if (sum(sort(unique(scaling)) == 1:nscale) < nscale) {
stop("scaling must consist of sequential integers between 1 and ncol(locs)")
if (tol<=0 | tol>1) {
stop("tol must satisfy 0<tol<=1")
}
if (is.null(scaling)) {
scaling <- rep(1, ncol(model@locs_train[[1]]))
nscale <- 1
} else {
if (length(scaling) != ncol(model@locs_train[[1]])) {
stop("Length of scaling must equal ncol of locs")
}
nscale <- length(unique(scaling))
if (sum(sort(unique(scaling)) == 1:nscale) < nscale) {
stop("scaling must consist of sequential integers starting at 1")
}
}
if (is.null(apanasovich)) {
if (nscale == 1) {
Expand All @@ -235,14 +249,18 @@ setMethod(
model@scaling <- scaling
model@nscale <- nscale
model@apanasovich <- apanasovich
if (is.null(covparams)) {
model <- calc_covparams(model, locs, Y)
}
if (!is.vector(model@covparams)) {
stop("The covparams paramter must be a numeric vector.")
if (!is.null(covparams)) {
if (!is.vector(covparams) | !is.numeric(covparams)) {
stop("covparams must be a numeric vector")
}
}
model <- calc_covparams(model, locs, Y, covparams)
if (model@n_neighbors < model@min_m) {
stop(paste("M must be at least ", model@min_m, ".", sep = ""))
stop(paste("m must be at least ", model@min_m, sep = ""))
}
if (model@n_neighbors >= nrow(model@Y_train)) {
warning("Conditioning set size m chosen to be >=n. Changing to m=n-1")
model@n_neighbors <- nrow(model@Y_train) - 1
}

model <- specify(model)
Expand Down Expand Up @@ -278,7 +296,7 @@ setMethod(
model <- specify(model)
}
model <- transform_data(model, model@Y_train, model@X_train)
model <- estimate_betas(model, parallel, foldid)
model <- estimate_betas(model, parallel)
min.error <- compute_error(model)
### Check min-error against the previous error and tolerance
if (min.error < prev.error * tol) {
Expand Down Expand Up @@ -390,35 +408,76 @@ setMethod("compute_error", "PrestoGPModel", function(model) {
#' @param Y the dependent variable matrix
#'
#' @return a model with initial covariance parameters
setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y) {
setMethod("calc_covparams", "PrestoGPModel", function(model, locs, Y, covparams) {
if (!is.list(locs)) {
P <- 1
locs <- list(locs)
Y <- list(Y)
} else {
P <- length(locs)
}
col.vars <- rep(NA, P)
D.sample.bar <- rep(NA, model@nscale * P)
for (i in 1:P) {
col.vars[i] <- var(Y[[i]])
N <- length(Y[[i]])
# TODO find a better way to compute initial spatial range
for (j in 1:model@nscale) {
d.sample <- sample(1:N, max(2, ceiling(N / 50)), replace = FALSE)
D.sample <- rdist(locs[[i]][d.sample, model@scaling == j])
D.sample.bar[(i - 1) * model@nscale + j] <- mean(D.sample) / 4
}
pseq <- create.param.sequence(P, model@nscale)
if (is.null(covparams)) {
col.vars <- rep(NA, P)
D.sample.bar <- rep(NA, model@nscale * P)
for (i in 1:P) {
col.vars[i] <- var(Y[[i]])
N <- length(Y[[i]])
# TODO find a better way to compute initial spatial range
for (j in 1:model@nscale) {
d.sample <- sample(1:N, max(2, ceiling(N / 50)), replace = FALSE)
D.sample <- rdist(locs[[i]][d.sample, model@scaling == j])
D.sample.bar[(i - 1) * model@nscale + j] <- mean(D.sample) / 4
}
}
model@logparams <- create.initial.values.flex(
c(0.9 * col.vars), # marginal variance
D.sample.bar, # range
rep(0.5, P), # smoothness
c(.1 * col.vars), # nuggets
rep(0, choose(P, 2)),
P
)
} else {
if (P==1) {
if (length(covparams) != pseq[4,2]) {
stop("Incorrect number of parameters in covparams")
}
} else {
if (length(covparams) != pseq[5,2]) {
stop("Incorrect number of parameters in covparams")
}
}
init.var <- covparams[pseq[1,1]:pseq[1,2]]
init.range <- covparams[pseq[2,1]:pseq[2,2]]
init.smooth <- covparams[pseq[3,1]:pseq[3,2]]
init.nugget <- covparams[pseq[4,1]:pseq[4,2]]
if (P>1) {
init.corr <- covparams[pseq[5,1]:pseq[5,2]]
}
else {
init.corr <- 0
}
if (sum(init.var<=0)>0) {
stop("Initial variance estimates must be positive")
}
if (sum(init.range<=0)>0) {
stop("Initial range estimates must be positive")
}
if (sum(init.nugget<=0)>0) {
stop("Initial nugget estimates must be positive")
}
if (sum(init.smooth<=0)>0 | sum(init.smooth>=2.5)>0) {
stop("Initial smoothness estimates must be between 0 and 2.5")
}
if (sum(init.corr < -1)>0 | sum(init.corr > 1)>0) {
stop("Initial correlation estimates must be between -1 and 1")
}
model@logparams <- create.initial.values.flex(init.var, init.range,
init.smooth, init.nugget,
init.corr, P)
}
model@logparams <- create.initial.values.flex(
c(0.9 * col.vars), # marginal variance
D.sample.bar, # range
rep(0.5, P), # smoothness
c(.1 * col.vars), # nuggets
rep(0, choose(P, 2)),
P
)
model@param_sequence <- create.param.sequence(P, model@nscale)
model@param_sequence <- pseq
model <- transform_covariance_parameters(model)
invisible(model)
})
Expand Down
Loading

0 comments on commit 245eaa3

Please sign in to comment.