Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove weights, rename to inv_var_weights -- and add a second type of… #27

Merged
merged 2 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 34 additions & 13 deletions R/fit_dfa.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,16 @@
#' of diagnostic functions -- but making the models a lot larger for storage. Finally, this argument may be a custom string
#' of parameters to monitor, e.g. c("x","sigma")
#' @param verbose Whether to print iterations and information from Stan, defaults to FALSE.
#' @param weights Optional name of "weights" argument in data frame. This is only implemented when data
#' are in long format. If not entered, defaults to weights = 1 for all observations. The implementation of weights
#' varies slightly by family: Gaussian family models use -log(w_i) in the dispersion formula
#' @param inv_var_weights Optional name of inverse variance weights argument in data frame. This is only implemented when data
#' are in long format. If not entered, defaults to inv_var_weights = 1 for all observations. The implementation of inv_var_weights
#' relies on inverse variance weightings, so that if you have standard errors associated with each observation,
#' the inverse variance weights are calculated as inv_var_weights <- 1 / (standard_errors^2) . The observation error sigma
#' in the likelihood then becomes sigma / sqrt(inv_var_weights)
#' @param likelihood_weights Optional name of likelihood weights argument in data frame. These
#' are used in the same way weights are implemented in packages `glmmTMB`, `brms`, `sdmTMB`, etc.
#' Weights are used as multipliers on the log-likelihood, with higher weights allowing observations
#' to contribute more. Currently only implemented with univariate distributions, when data is in long
#' format
#' @param ... Any other arguments to pass to [rstan::sampling()].
#' @details Note that there is nothing restricting the loadings and trends from
#' being inverted (i.e. multiplied by `-1`) for a given chain. Therefore, if
Expand Down Expand Up @@ -187,7 +194,8 @@ fit_dfa <- function(y = y,
par_list = NULL,
family = "gaussian",
verbose = FALSE,
weights = NULL,
inv_var_weights = NULL,
likelihood_weights = NULL,
gp_theta_prior = c(3, 1),
expansion_prior = FALSE,
...) {
Expand Down Expand Up @@ -234,9 +242,14 @@ fit_dfa <- function(y = y,
y$ts <- as.numeric(as.factor(y[["ts"]]))
N <- max(y[["time"]])
P <- max(y[["ts"]])
if (!is.null(weights)) {
if(weights %in% names(y) == FALSE) {
stop("Error: weight name is not found in long data frame")
if (!is.null(inv_var_weights)) {
if(inv_var_weights %in% names(y) == FALSE) {
stop("Error: inverse variance weight name is not found in long data frame")
}
}
if (!is.null(likelihood_weights)) {
if(likelihood_weights %in% names(y) == FALSE) {
stop("Error: likelihood weight name is not found in long data frame")
}
}
}
Expand Down Expand Up @@ -333,7 +346,8 @@ fit_dfa <- function(y = y,
}
nVariances <- length(unique(varIndx))

weights_vec <- NULL
inv_var_weights_vec <- NULL
likelihood_weights_vec <- NULL
# indices of positive values - Stan can't handle NAs
if (data_shape[1] == "wide") {
row_indx_pos <- matrix(rep(seq_len(P), N), P, N)[!is.na(y)]
Expand All @@ -343,7 +357,8 @@ fit_dfa <- function(y = y,
col_indx_na <- matrix(sort(rep(seq_len(N), P)), P, N)[is.na(y)]
n_na <- length(row_indx_na)
y <- y[!is.na(y)]
weights_vec <- rep(1, length(y))
inv_var_weights_vec <- rep(1, length(y))
likelihood_weights_vec <- rep(1, length(y))
if(!is.null(offset)) {
stop("Error: if offset is included, data shape must be long")
}
Expand All @@ -358,10 +373,15 @@ fit_dfa <- function(y = y,
col_indx_na <- matrix(1, 1, 1)[is.na(runif(1))]
n_na <- length(row_indx_na)

if(!is.null(weights)) {
weights_vec <- y[[weights]]
if(!is.null(inv_var_weights)) {
inv_var_weights_vec <- y[[inv_var_weights]]
} else {
inv_var_weights_vec <- rep(1, nrow(y))
}
if(!is.null(likelihood_weights)) {
likelihood_weights_vec <- y[[likelihood_weights]]
} else {
weights_vec <- rep(1, nrow(y))
likelihood_weights_vec <- rep(1, nrow(y))
}

offset_vec <- rep(0, nrow(y))
Expand Down Expand Up @@ -543,7 +563,8 @@ fit_dfa <- function(y = y,
gp_theta_prior = gp_theta_prior,
use_expansion_prior = as.integer(expansion_prior),
input_offset = offset_vec,
weights_vec = weights_vec
inv_var_weights_vec = sqrt(1.0/inv_var_weights_vec),
weights_vec = likelihood_weights_vec
)

if (is.null(par_list)) {
Expand Down
23 changes: 10 additions & 13 deletions inst/stan/dfa.stan
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,19 @@ data {
int<lower=0, upper=1> est_nb2_params;
int<lower=0, upper=1> use_expansion_prior;
array[2] real gp_theta_prior;
array[n_pos] real inv_var_weights_vec;
array[n_pos] real weights_vec;
}
transformed data {
int n_pcor; // dimension for cov matrix
int n_loglik; // dimension for loglik calculation
vector[K] zeros;
array[N] real data_locs; // for gp model
array[n_pos] real log_weights_vec; // weights
vector[K*proportional_model] alpha_vec;
vector[n_knots] muZeros;
real gp_delta = 1e-9; // stabilizing value for GP model
real lower_bound_z;

for(i in 1:n_pos) {
log_weights_vec[i] = log(weights_vec[i]);
}
for(i in 1:N) {
data_locs[i] = i;
}
Expand Down Expand Up @@ -579,19 +576,19 @@ model {
if(long_format==0) {
if(obs_model == 1) {for(i in 1:P) target += normal_lpdf(yall[i] | pred[i], sigma_vec[i]);} // gaussian
} else {
if(obs_model == 1) {for(i in 1:n_pos) target += normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], exp(log(sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));}
if(obs_model == 2) {for(i in 1:n_pos) target += gamma_lpdf(y[i] | gamma_a_vec[row_indx_pos[i]], gamma_a_vec[row_indx_pos[i]] / exp(input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]));} // gamma
if(obs_model == 3) {for(i in 1:n_pos) target += poisson_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // poisson
if(obs_model == 4) {for(i in 1:n_pos) target += neg_binomial_2_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], nb_phi_vec[row_indx_pos[i]]);} // negbin
if(obs_model == 5) {for(i in 1:n_pos) target += bernoulli_logit_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // binomial
if(obs_model == 6) {for(i in 1:n_pos) target += lognormal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]]);} // lognormal
if(obs_model == 1) {for(i in 1:n_pos) target += weights_vec[i] * normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);}
if(obs_model == 2) {for(i in 1:n_pos) target += weights_vec[i] * gamma_lpdf(y[i] | gamma_a_vec[row_indx_pos[i]], gamma_a_vec[row_indx_pos[i]] / exp(input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]));} // gamma
if(obs_model == 3) {for(i in 1:n_pos) target += weights_vec[i] * poisson_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // poisson
if(obs_model == 4) {for(i in 1:n_pos) target += weights_vec[i] * neg_binomial_2_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], nb_phi_vec[row_indx_pos[i]]);} // negbin
if(obs_model == 5) {for(i in 1:n_pos) target += weights_vec[i] * bernoulli_logit_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // binomial
if(obs_model == 6) {for(i in 1:n_pos) target += weights_vec[i] * lognormal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]]);} // lognormal
}
} else {
// need to loop over time slices / columns - each ~ MVN
if(long_format==0) {
if(obs_model == 1) {for(i in 1:N) target += multi_normal_cholesky_lpdf(col(yall,i) | col(pred,i), diag_pre_multiply(sigma_vec, Lcorr));}
} else {
if(obs_model == 1) {for(i in 1:n_pos) target += normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], exp(log(cond_sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));}
if(obs_model == 1) {for(i in 1:n_pos) target += weights_vec[i] * normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], cond_sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);}
}
}
}
Expand Down Expand Up @@ -622,7 +619,7 @@ generated quantities {
}
}
} else {
if(obs_model == 1) {for(i in 1:n_pos) log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], exp(log(sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));}
if(obs_model == 1) {for(i in 1:n_pos) log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);}
if(obs_model == 2) {for(i in 1:n_pos) log_lik[i] = gamma_lpdf(y[i] | gamma_a_vec[row_indx_pos[i]], gamma_a_vec[row_indx_pos[i]] / exp(input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]));} // gamma
if(obs_model == 3) {for(i in 1:n_pos) log_lik[i] = poisson_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i]);} // poisson
if(obs_model == 4) {for(i in 1:n_pos) log_lik[i] = neg_binomial_2_log_lpmf(y_int[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i], nb_phi_vec[row_indx_pos[i]]);} // negbin
Expand All @@ -639,7 +636,7 @@ generated quantities {
} else {
for(i in 1:n_pos) {
// //row_indx_pos[i] is the time series, col_index_pos is the time
log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], exp(log(cond_sigma_vec[row_indx_pos[i]]) - log_weights_vec[i]));
log_lik[i] = normal_lpdf(y[i] | input_offset[i] + pred[row_indx_pos[i],col_indx_pos[i]] + obs_cov_offset[i] + cond_mean_vec[row_indx_pos[i]], cond_sigma_vec[row_indx_pos[i]] * inv_var_weights_vec[i]);
}
}
}
Expand Down
17 changes: 13 additions & 4 deletions man/fit_dfa.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading