diff --git a/CRAN-RELEASE b/CRAN-RELEASE new file mode 100644 index 0000000..04019aa --- /dev/null +++ b/CRAN-RELEASE @@ -0,0 +1,2 @@ +This package was submitted to CRAN on 2021-05-18. +Once it is accepted, delete this file and tag the release (commit 037b6a9). diff --git a/DESCRIPTION b/DESCRIPTION index 9cd7e68..8524fd4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: bayesdfa Type: Package Title: Bayesian Dynamic Factor Analysis (DFA) with 'Stan' -Version: 1.0.0 +Version: 1.1.0 Authors@R: c( person(c("Eric", "J."), "Ward", role = c("aut", "cre"), email = "eric.ward@noaa.gov"), diff --git a/NEWS.md b/NEWS.md index 2201cf4..169365b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -29,3 +29,7 @@ # bayesdfa 1.0.0 * Added constraint on diagonal of Z matrix to keep parameter estimates from 'flipping' within MCMC chains. Ensures convergence for problematic cases. This was present in 0.1.1, but later removed. + +# bayesdfa 1.1.0 + +* Following 1.0.0, included a new argument to fit_dfa() function 'expansion_prior' that allows user to toggle on / off the constraint. If not included (default=FALSE), there is no constraint on the Z diagonal, and post-hoc MCMC chain inverting resolves identifiability. If 'expansion_prior' = TRUE, then the positive constraint is applied, in combination with the expansion prior for trends and loadings. diff --git a/R/converge_rhat.R b/R/converge_rhat.R index 4b5f7d5..eb5c389 100644 --- a/R/converge_rhat.R +++ b/R/converge_rhat.R @@ -10,9 +10,8 @@ #' @export #' is_converged <- function(fitted_model, - threshold = 1.05, - parameters = c("sigma", "x", "Z")) { - + threshold = 1.05, + parameters = c("sigma", "x", "Z")) { Rhats <- fitted_model$monitor[which(grepl( paste(parameters, collapse = "|"), diff --git a/R/dfa_cv.R b/R/dfa_cv.R index 30e6aae..fcd3168 100644 --- a/R/dfa_cv.R +++ b/R/dfa_cv.R @@ -20,79 +20,79 @@ #' \dontrun{ #' set.seed(42) #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -#' long = data.frame("obs" = obs, "ts" = sort(rep(1:3,20)), "time" = rep(1:20,3)) -#' m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape="long", sample=FALSE) +#' obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +#' long <- data.frame("obs" = obs, "ts" = sort(rep(1:3, 20)), "time" = rep(1:20, 3)) +#' m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape = "long", sample = FALSE) #' # random folds -#' fit_cv = dfa_cv(m, cv_method="loocv", n_folds = 5, iter=50, chains=1) +#' fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1) #' #' # folds can also be passed in -#' fold_ids = sample(1:5, size=nrow(long), replace=TRUE) -#' m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape="long", sample=FALSE) -#' fit_cv = dfa_cv(m, cv_method="loocv", n_folds = 5, iter=50, chains=1, fold_ids=fold_ids) +#' fold_ids <- sample(1:5, size = nrow(long), replace = TRUE) +#' m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape = "long", sample = FALSE) +#' fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1, fold_ids = fold_ids) #' #' # do an example of leave-time-out cross validation where years are dropped -#' fold_ids = long$time -#' m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape="long", sample=FALSE) -#' fit_cv = dfa_cv(m, cv_method="loocv", iter=100, chains=1, fold_ids = fold_ids) +#' fold_ids <- long$time +#' m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape = "long", sample = FALSE) +#' fit_cv <- dfa_cv(m, cv_method = "loocv", iter = 100, chains = 1, fold_ids = fold_ids) #' #' # example with covariates and long format data -#' obs_covar = expand.grid("time"=1:20,"timeseries"=1:3,"covariate"=1:2) -#' obs_covar$value=rnorm(nrow(obs_covar),0,0.1) -#' obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -#' m <- fit_dfa(y = long, iter = 50, chains = 1, obs_covar=obs_covar,data_shape="long", sample=FALSE) -#' fit_cv = dfa_cv(m, cv_method="loocv", n_folds = 5, iter=50, chains=1) +#' obs_covar <- expand.grid("time" = 1:20, "timeseries" = 1:3, "covariate" = 1:2) +#' obs_covar$value <- rnorm(nrow(obs_covar), 0, 0.1) +#' obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +#' m <- fit_dfa(y = long, iter = 50, chains = 1, obs_covar = obs_covar, data_shape = "long", sample = FALSE) +#' fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1) #' } #' dfa_cv <- function(stanfit, - cv_method = c("loocv","lfocv"), - fold_ids = NULL, - n_folds = 10, - iter = 2000, - chains = 4, - thin = 1, - ...) { - - cv_method <- match.arg(cv_method, c("loocv","lfocv")) - if(is.null(fold_ids)) { + cv_method = c("loocv", "lfocv"), + fold_ids = NULL, + n_folds = 10, + iter = 2000, + chains = 4, + thin = 1, + ...) { + cv_method <- match.arg(cv_method, c("loocv", "lfocv")) + if (is.null(fold_ids)) { warning("the vector fold_ids containing fold ids is null, so random folds are being used") - fold_ids <- sample(1:n_folds, nrow(stanfit$orig_data), replace=TRUE) + fold_ids <- sample(1:n_folds, nrow(stanfit$orig_data), replace = TRUE) } - if(length(fold_ids) != nrow(stanfit$orig_data)) { + if (length(fold_ids) != nrow(stanfit$orig_data)) { stop("The length of the vector fold_ids needs to tbe the same as the number of rows in the long format dataframe") } - if(stanfit$shape!="long") { + if (stanfit$shape != "long") { stop("Error, please reshape the data into long format") } - if(!is.null(fold_ids)) n_folds = max(fold_ids) + if (!is.null(fold_ids)) n_folds <- max(fold_ids) y <- stanfit$orig_data - y$time = y$time - min(y$time) + 1 + y$time <- y$time - min(y$time) + 1 # loop over the folds, re-fitting the dfa model each time with the folds held out - log_lik <- matrix(0, nrow=ceiling(iter/(2*thin))*chains, ncol=n_folds) - for(f in 1:n_folds) { + log_lik <- matrix(0, nrow = ceiling(iter / (2 * thin)) * chains, ncol = n_folds) + for (f in 1:n_folds) { # fit model holding out each time slice. subset observed data and covar y_train <- y - y_train[which(fold_ids == f),"obs"] <- NA - y_test <- y[which(fold_ids == f),] - obs_covar_train=NULL - if(length(stanfit$sampling_args$data$obs_covar_value) > 0) { - stanfit$obs_covar$time_timeseries <- paste(stanfit$obs_covar$time,stanfit$obs_covar$timeseries) - y_train$time_timeseries <- paste(y_train$time,y_train$ts) - y_test$time_timeseries <- paste(y_test$time,y_test$ts) - obs_covar_train <- stanfit$obs_covar[which(stanfit$obs_covar$time_timeseries %in% y_train$time_timeseries[which(fold_ids!=f)]),1:4] - obs_covar_test <- stanfit$obs_covar[which(stanfit$obs_covar$time_timeseries %in% y_test$time_timeseries),1:4] + y_train[which(fold_ids == f), "obs"] <- NA + y_test <- y[which(fold_ids == f), ] + obs_covar_train <- NULL + if (length(stanfit$sampling_args$data$obs_covar_value) > 0) { + stanfit$obs_covar$time_timeseries <- paste(stanfit$obs_covar$time, stanfit$obs_covar$timeseries) + y_train$time_timeseries <- paste(y_train$time, y_train$ts) + y_test$time_timeseries <- paste(y_test$time, y_test$ts) + obs_covar_train <- stanfit$obs_covar[which(stanfit$obs_covar$time_timeseries %in% y_train$time_timeseries[which(fold_ids != f)]), 1:4] + obs_covar_test <- stanfit$obs_covar[which(stanfit$obs_covar$time_timeseries %in% y_test$time_timeseries), 1:4] } - pro_covar_train=NULL - if(length(stanfit$sampling_args$data$pro_covar_value) > 0) { - pro_covar_train <- stanfit$pro_covar[which(fold_ids != f),] - pro_covar_test <- stanfit$pro_covar[which(fold_ids == f),] + pro_covar_train <- NULL + if (length(stanfit$sampling_args$data$pro_covar_value) > 0) { + pro_covar_train <- stanfit$pro_covar[which(fold_ids != f), ] + pro_covar_test <- stanfit$pro_covar[which(fold_ids == f), ] } # fit the new model - fit.mod <- fit_dfa(y = y_train, + fit.mod <- fit_dfa( + y = y_train, num_trends = stanfit$sampling_args$data$K, varIndx = stanfit$sampling_args$data$varIndx, zscore = stanfit$zscore, @@ -103,7 +103,7 @@ dfa_cv <- function(stanfit, nu_fixed = stanfit$sampling_args$data$nu_fixed, est_correlation = stanfit$sampling_args$data$est_cor, estimate_nu = stanfit$sampling_args$data$estimate_nu, - estimate_trend_ar = ifelse(stanfit$sampling_args$data$est_phi==1, TRUE, FALSE), + estimate_trend_ar = ifelse(stanfit$sampling_args$data$est_phi == 1, TRUE, FALSE), estimate_trend_ma = ifelse(stanfit$sampling_args$data$est_theta == 1, TRUE, FALSE), estimate_process_sigma = ifelse(stanfit$sampling_args$data$est_sigma_process == 1, TRUE, FALSE), equal_process_sigma = ifelse(stanfit$sampling_args$data$n_sigma_process == 1, TRUE, FALSE), @@ -114,63 +114,67 @@ dfa_cv <- function(stanfit, z_bound = stanfit$z_bound, z_model = stanfit$z_model, trend_model = stanfit$trend_model, - verbose = FALSE) + verbose = FALSE + ) # extract posterior parameters for the training set pars <- rstan::extract(fit.mod$model) r <- rotate_trends(fit.mod) # loop over each iterations (mcmc sample) - for(j in 1:nrow(log_lik)) { + for (j in 1:nrow(log_lik)) { # determine if covariates are included - obs_covar_offset = rep(0, nrow(y_test)) - if(is.null(obs_covar_train) & is.null(pro_covar_train)) { - #pred <- pars$Z[j,,] %*% matrix(pars$x[j,,],nrow=stanfit$sampling_args$data$K) - pred <- r$Z_rot[j,,] %*% matrix(r$trends[j,,],nrow=stanfit$sampling_args$data$K) + obs_covar_offset <- rep(0, nrow(y_test)) + if (is.null(obs_covar_train) & is.null(pro_covar_train)) { + # pred <- pars$Z[j,,] %*% matrix(pars$x[j,,],nrow=stanfit$sampling_args$data$K) + pred <- r$Z_rot[j, , ] %*% matrix(r$trends[j, , ], nrow = stanfit$sampling_args$data$K) # subset predictions corresponding to observations - pred <- pred[cbind(y_test$ts,y_test$time)] - #pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) + pred <- pred[cbind(y_test$ts, y_test$time)] + # pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) } - if(!is.null(obs_covar_train) & is.null(pro_covar_train)) { - #pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) + pars$b_obs[j,,] * obs_covar_test$value - #pred <- pars$Z[j,,] %*% matrix(pars$x[j,,],nrow=stanfit$sampling_args$data$K) - pred <- r$Z_rot[j,,] %*% matrix(r$trends[j,,],nrow=stanfit$sampling_args$data$K) - pred <- pred[cbind(y_test$ts,y_test$time)] - for(i in 1:max(obs_covar_test$covariate)) { + if (!is.null(obs_covar_train) & is.null(pro_covar_train)) { + # pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) + pars$b_obs[j,,] * obs_covar_test$value + # pred <- pars$Z[j,,] %*% matrix(pars$x[j,,],nrow=stanfit$sampling_args$data$K) + pred <- r$Z_rot[j, , ] %*% matrix(r$trends[j, , ], nrow = stanfit$sampling_args$data$K) + pred <- pred[cbind(y_test$ts, y_test$time)] + for (i in 1:max(obs_covar_test$covariate)) { indx <- which(obs_covar_test$covariate == i) - pred <- pred + pars$b_obs[j,i,obs_covar_test$timeseries[indx]] * obs_covar_test$value[indx] + pred <- pred + pars$b_obs[j, i, obs_covar_test$timeseries[indx]] * obs_covar_test$value[indx] } } - log_lik[j,f] <- sum(dnorm(x = y_test$obs, + log_lik[j, f] <- sum(dnorm( + x = y_test$obs, mean = pred, - sd = pars$sigma[j,stanfit$sampling_args$data$varIndx], log=TRUE), na.rm=T) - #log_lik[j,k] = sum(dnorm(x = ytest, mean = pred, sd = pars$sigma[j,varIndx], log=TRUE), na.rm=T) + sd = pars$sigma[j, stanfit$sampling_args$data$varIndx], log = TRUE + ), na.rm = T) + # log_lik[j,k] = sum(dnorm(x = ytest, mean = pred, sd = pars$sigma[j,varIndx], log=TRUE), na.rm=T) } # Predictions now vary based on how the cross validation is done, and whether covariates used - #if(cv_method == "loocv") { - #} - #if(cv_method == "lfocv") { - # for(j in 1:nrow(log_lik)) { - # # loop over iterations - # if(is.null(obs_covar) & is.null(pro_covar)) { - # pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) - # } - # if(!is.null(obs_covar) & is.null(pro_covar)) { - # pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) + pars$b_obs[j,,] * covar_test$value - # } - # log_lik[j,k] = sum(dnorm(x = ytest, mean = pred, sd = pars$sigma[j,varIndx], log=TRUE), na.rm=T) - # } - #} - + # if(cv_method == "loocv") { + # } + # if(cv_method == "lfocv") { + # for(j in 1:nrow(log_lik)) { + # # loop over iterations + # if(is.null(obs_covar) & is.null(pro_covar)) { + # pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) + # } + # if(!is.null(obs_covar) & is.null(pro_covar)) { + # pred = pars$Z[j,,] %*% matrix(pars$xstar[j,,],ncol=1) + pars$b_obs[j,,] * covar_test$value + # } + # log_lik[j,k] = sum(dnorm(x = ytest, mean = pred, sd = pars$sigma[j,varIndx], log=TRUE), na.rm=T) + # } + # } } - elpds <- apply(log_lik,2,log_sum_exp) - elpd <- list("log_lik"=log_lik, + elpds <- apply(log_lik, 2, log_sum_exp) + elpd <- list( + "log_lik" = log_lik, "elpds" = elpds, - "elpd_kfold"=sum(elpds), - "se_elpd_kfold" = sqrt(length(elpds) * var(elpds))) + "elpd_kfold" = sum(elpds), + "se_elpd_kfold" = sqrt(length(elpds) * var(elpds)) + ) return(elpd) } diff --git a/R/dfa_fitted.R b/R/dfa_fitted.R index c3450d0..20a1ab9 100644 --- a/R/dfa_fitted.R +++ b/R/dfa_fitted.R @@ -18,11 +18,11 @@ dfa_fitted <- function(modelfit, conf_level = 0.95, names = NULL) { # pred and Y have same dimensions if data is wide - pred = predicted(modelfit) - n_mcmc = dim(pred)[1] - n_chains = dim(pred)[2] - n_years = dim(pred)[3] - n_ts = dim(pred)[4] + pred <- predicted(modelfit) + n_mcmc <- dim(pred)[1] + n_chains <- dim(pred)[2] + n_years <- dim(pred)[3] + n_ts <- dim(pred)[4] # this is the same for both data types df_pred <- data.frame( @@ -33,31 +33,33 @@ dfa_fitted <- function(modelfit, conf_level = 0.95, names = NULL) { "upper" = c(t(apply(pred, c(3, 4), quantile, (1 - conf_level) / 2))) ) - if(modelfit$shape == "wide") { + if (modelfit$shape == "wide") { df_obs <- data.frame( - "ID" = rep(seq_len(n_ts), n_years), - "time" = sort(rep(seq_len(n_years), n_ts)), - "y" = c(modelfit$orig_data)) + "ID" = rep(seq_len(n_ts), n_years), + "time" = sort(rep(seq_len(n_years), n_ts)), + "y" = c(modelfit$orig_data) + ) } else { df_obs <- data.frame( "ID" = modelfit$orig_data[["ts"]], "time" = modelfit$orig_data[["time"]], - "y" = modelfit$orig_data[["obs"]]) + "y" = modelfit$orig_data[["obs"]] + ) } df_obs$time <- df_obs$time - min(df_obs$time) + 1 # standardize - for(i in seq_len(n_ts)) { - indx = which(df_obs[["ID"]] == i) - df_obs[indx,"y"] = scale(df_obs[indx,"y" ], center = TRUE, scale = TRUE) + for (i in seq_len(n_ts)) { + indx <- which(df_obs[["ID"]] == i) + df_obs[indx, "y"] <- scale(df_obs[indx, "y"], center = TRUE, scale = TRUE) } df_obs <- df_obs[order(df_obs$ID, df_obs$time), ] df_pred <- df_pred[order(df_pred$ID, df_pred$time), ] if (!is.null(names)) { - if(length(names) != n_ts) { - warning("bayesdfa: Length of 'names' should match number of time series. Ignoring 'names'.") + if (length(names) != n_ts) { + warning("bayesdfa: Length of 'names' should match number of time series. Ignoring 'names'.") } else { df_pred$ID <- names[df_pred$ID] df_obs$ID <- names[df_obs$ID] @@ -66,5 +68,4 @@ dfa_fitted <- function(modelfit, conf_level = 0.95, names = NULL) { df <- merge(df_obs, df_pred, by = c("ID", "time"), sort = FALSE) return(df) - } diff --git a/R/dfa_loadings.R b/R/dfa_loadings.R index c1ec906..fc0c28b 100644 --- a/R/dfa_loadings.R +++ b/R/dfa_loadings.R @@ -23,18 +23,17 @@ #' r <- rotate_trends(m) #' loadings <- dfa_loadings(r, summary = TRUE) #' loadings <- dfa_loadings(r, summary = FALSE) - dfa_loadings <- function(rotated_modelfit, names = NULL, summary = TRUE, conf_level = 0.95) { - v <- reshape2::melt(rotated_modelfit$Z_rot, - varnames = c("iter", "name", "trend"), value.name = "loading") + varnames = c("iter", "name", "trend"), value.name = "loading" + ) v$draw <- as.numeric(gsub("_chain.*$", "", v$iter)) v$chain <- as.numeric(gsub("^[0-9]+_chain:", "", v$iter)) v$iter <- NULL - v <- v[ , c("chain", "draw", "name", "trend", "loading")] + v <- v[, c("chain", "draw", "name", "trend", "loading")] v$trend <- paste0("Trend ", v$trend) v$trend <- as.factor(v$trend) @@ -52,7 +51,7 @@ dfa_loadings <- function(rotated_modelfit, v <- as.data.frame(dplyr::ungroup(v)) out <- v - if(summary) { + if (summary) { vsum <- dplyr::group_by(v, .data$name, .data$trend) vsum <- dplyr::summarize(vsum, median = median(.data$loading), diff --git a/R/dfa_trends.R b/R/dfa_trends.R index 197e22f..4f37eff 100644 --- a/R/dfa_trends.R +++ b/R/dfa_trends.R @@ -15,7 +15,6 @@ #' r <- rotate_trends(m) #' trends <- dfa_trends(r) dfa_trends <- function(rotated_modelfit, years = NULL) { - rotated <- rotated_modelfit n_ts <- dim(rotated$Z_rot)[2] n_trends <- dim(rotated$Z_rot)[3] @@ -28,7 +27,8 @@ dfa_trends <- function(rotated_modelfit, years = NULL) { trend_number = paste0("Trend ", sort(rep(seq_len(n_trends), n_years))), estimate = c(t(rotated$trends_mean)), lower = c(t(rotated$trends_lower)), - upper = c(t(rotated$trends_upper))) + upper = c(t(rotated$trends_upper)) + ) return(df) } diff --git a/R/find_dfa_trends.R b/R/find_dfa_trends.R index ce0f92a..25f26f5 100644 --- a/R/find_dfa_trends.R +++ b/R/find_dfa_trends.R @@ -25,7 +25,8 @@ #' y = s$y_sim, iter = 50, #' kmin = 1, kmax = 2, chains = 1, compare_normal = FALSE, #' variance = "equal", convergence_threshold = 1.1, -#' control = list(adapt_delta = 0.95, max_treedepth = 20)) +#' control = list(adapt_delta = 0.95, max_treedepth = 20) +#' ) #' m$summary #' m$best_model #' } @@ -34,15 +35,14 @@ #' @importFrom rlang .data find_dfa_trends <- function(y = y, - kmin = 1, - kmax = 5, - iter = 2000, - thin = 1, - compare_normal = FALSE, - convergence_threshold = 1.05, - variance = c("equal", "unequal"), - ...) { - + kmin = 1, + kmax = 5, + iter = 2000, + thin = 1, + compare_normal = FALSE, + convergence_threshold = 1.05, + variance = c("equal", "unequal"), + ...) { df <- data.frame( model = seq(1, ifelse(compare_normal == FALSE, length(variance) * length(seq(kmin, kmax)), @@ -72,10 +72,10 @@ find_dfa_trends <- function(y = y, # relative effective sample size log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) - n_chains <- dim(rstan::extract(model$model, "log_lik", permuted=FALSE))[2] + n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] rel_eff <- loo::relative_eff(exp(log_lik)) # calculate looic - df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic",1] + df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] # if model is best, keep it if (df$looic[indx] < best_loo & df$converge[indx] == TRUE) { @@ -97,10 +97,10 @@ find_dfa_trends <- function(y = y, df$num_trends[indx] <- i log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) - n_chains <- dim(rstan::extract(model$model, "log_lik", permuted=FALSE))[2] + n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] rel_eff <- loo::relative_eff(exp(log_lik)) # calculate looic - df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic",1] + df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] df$converge[indx] <- is_converged(model, convergence_threshold) # if model is best, keep it @@ -125,10 +125,10 @@ find_dfa_trends <- function(y = y, df$num_trends[indx] <- i log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) - n_chains <- dim(rstan::extract(model$model, "log_lik", permuted=FALSE))[2] + n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] rel_eff <- loo::relative_eff(exp(log_lik)) # calculate looic - df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic",1] + df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] df$converge[indx] <- is_converged(model, convergence_threshold) # if model is best, keep it @@ -138,8 +138,8 @@ find_dfa_trends <- function(y = y, } df$error[indx] <- "normal" df$cor[indx] <- "equal" - #df$max_rhat[indx] <- max(as.data.frame(summary(model$model)$summary)[,"Rhat"]) - #df$min_neff[indx] <- min(as.data.frame(summary(model$model)$summary)[,"n_eff"]) + # df$max_rhat[indx] <- max(as.data.frame(summary(model$model)$summary)[,"Rhat"]) + # df$min_neff[indx] <- min(as.data.frame(summary(model$model)$summary)[,"n_eff"]) indx <- indx + 1 } } @@ -153,10 +153,10 @@ find_dfa_trends <- function(y = y, df$num_trends[indx] <- i log_lik <- loo::extract_log_lik(model$model, merge_chains = FALSE) - n_chains <- dim(rstan::extract(model$model, "log_lik", permuted=FALSE))[2] + n_chains <- dim(rstan::extract(model$model, "log_lik", permuted = FALSE))[2] rel_eff <- loo::relative_eff(exp(log_lik)) # calculate looic - df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic",1] + df$looic[indx] <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] df$converge[indx] <- is_converged(model, convergence_threshold) # if model is best, keep it @@ -166,8 +166,8 @@ find_dfa_trends <- function(y = y, } df$error[indx] <- "normal" df$cor[indx] <- "independent" - #df$max_rhat[indx] <- max(as.data.frame(summary(model$model)$summary)[,"Rhat"]) - #df$min_neff[indx] <- min(as.data.frame(summary(model$model)$summary)[,"n_eff"]) + # df$max_rhat[indx] <- max(as.data.frame(summary(model$model)$summary)[,"Rhat"]) + # df$min_neff[indx] <- min(as.data.frame(summary(model$model)$summary)[,"n_eff"]) indx <- indx + 1 } } diff --git a/R/find_regimes.R b/R/find_regimes.R index 56bf7c3..204acf2 100644 --- a/R/find_regimes.R +++ b/R/find_regimes.R @@ -16,16 +16,14 @@ #' @examples #' data(Nile) #' find_regimes(log(Nile), iter = 50, chains = 1, max_regimes = 2) - find_regimes <- function(y, - sds = NULL, - min_regimes = 1, - max_regimes = 3, - iter = 2000, - thin = 1, - chains = 1, - ...) { - + sds = NULL, + min_regimes = 1, + max_regimes = 3, + iter = 2000, + thin = 1, + chains = 1, + ...) { df <- data.frame(regimes = seq(min_regimes, max_regimes), looic = NA) best_loo <- 1.0e10 best_model <- NA @@ -35,9 +33,9 @@ find_regimes <- function(y, chains = chains, ... ) looic <- loo.bayesdfa(fit) - loo_bad <- loo::pareto_k_table(looic)["(0.7, 1]","Count"] - loo_very_bad <- loo::pareto_k_table(looic)["(1, Inf)","Count"] - df$looic[which(df$regimes == regime)] = looic$estimates["looic", "Estimate"] + loo_bad <- loo::pareto_k_table(looic)["(0.7, 1]", "Count"] + loo_very_bad <- loo::pareto_k_table(looic)["(1, Inf)", "Count"] + df$looic[which(df$regimes == regime)] <- looic$estimates["looic", "Estimate"] if (fit$looic < best_loo) { best_loo <- fit$looic @@ -47,6 +45,8 @@ find_regimes <- function(y, } } - list(table = df, best_model = best_model, n_loo_bad = n_loo_bad, - n_loo_very_bad = n_loo_very_bad) + list( + table = df, best_model = best_model, n_loo_bad = n_loo_bad, + n_loo_very_bad = n_loo_very_bad + ) } diff --git a/R/find_swans.R b/R/find_swans.R index 6e56427..f0fe753 100644 --- a/R/find_swans.R +++ b/R/find_swans.R @@ -14,7 +14,7 @@ #' set.seed(1) #' s <- sim_dfa(num_trends = 1, num_ts = 3, num_years = 30) #' s$y_sim[1, 15] <- s$y_sim[1, 15] - 6 -#' plot(s$y_sim[1,], type = "o") +#' plot(s$y_sim[1, ], type = "o") #' abline(v = 15, col = "red") #' # only 1 chain and 250 iterations used so example runs quickly: #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1, nu_fixed = 2) @@ -23,7 +23,6 @@ #' print(p) #' # a 1 in 1000 probability if was from a normal distribution: #' find_swans(r, plot = TRUE, threshold = 0.001) -#' #' @references #' Anderson, S.C., Branch, T.A., Cooper, A.B., and Dulvy, N.K. 2017. #' Black-swan events in animal populations. Proceedings of the National Academy @@ -33,9 +32,8 @@ #' @importFrom stats pnorm find_swans <- function(rotated_modelfit, - threshold = 0.01, - plot = FALSE) { - + threshold = 0.01, + plot = FALSE) { x <- rotated_modelfit$trends_mean d <- apply(x, 1, function(xx) c(NA, diff(xx))) sds <- apply(d, 2, sd, na.rm = TRUE) # sds != 1 @@ -63,7 +61,8 @@ find_swans <- function(rotated_modelfit, x = "time", y = "trend_value", color = "below_threshold" )) + - geom_point() + facet_wrap(~ trend_number) + geom_point() + + facet_wrap(~trend_number) print(g) } invisible(trends) diff --git a/R/fit_dfa.R b/R/fit_dfa.R index b08f8e2..cb6912a 100644 --- a/R/fit_dfa.R +++ b/R/fit_dfa.R @@ -65,6 +65,7 @@ #' @param gp_theta_prior A 2-element vector controlling the prior on the Gaussian process parameter in cov_exp_quad. #' This prior is a half-Student t prior, with the first argument of gp_theta_prior being the degrees of freedom (nu), #' and the second element being the standard deviation +#' @param expansion_prior Defaults to FALSE, if TRUE uses the parameter expansion prior of Ghosh & Dunson 2009 #' @param ... Any other arguments to pass to [rstan::sampling()]. #' @param par_list A vector of parameter names of variables to be estimated by Stan. If NULL, this will default to #' c("x", "Z", "sigma", "log_lik", "psi","xstar") for most models -- though if AR / MA, or Student-t models are used @@ -93,54 +94,54 @@ #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) #' # only 1 chain and 250 iterations used so example runs quickly: #' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1) -#'\dontrun{ +#' \dontrun{ #' # example of observation error covariates #' set.seed(42) -#' obs_covar = expand.grid("time"=1:20,"timeseries"=1:3,"covariate"=1) -#' obs_covar$value=rnorm(nrow(obs_covar),0,0.1) -#' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, obs_covar=obs_covar) +#' obs_covar <- expand.grid("time" = 1:20, "timeseries" = 1:3, "covariate" = 1) +#' obs_covar$value <- rnorm(nrow(obs_covar), 0, 0.1) +#' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, obs_covar = obs_covar) #' #' # example of process error covariates -#' pro_covar = expand.grid("time"=1:20,"trend"=1:2,"covariate"=1) -#' pro_covar$value=rnorm(nrow(pro_covar),0,0.1) -#' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, num_trends = 2, pro_covar=pro_covar) +#' pro_covar <- expand.grid("time" = 1:20, "trend" = 1:2, "covariate" = 1) +#' pro_covar$value <- rnorm(nrow(pro_covar), 0, 0.1) +#' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, num_trends = 2, pro_covar = pro_covar) #' #' # example of long format data #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -#' long = data.frame("obs" = obs, "ts" = sort(rep(1:3,20)), "time" = rep(1:20,3)) -#' m = fit_dfa(y = long, data_shape = "long", iter = 50, chains = 1) +#' obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +#' long <- data.frame("obs" = obs, "ts" = sort(rep(1:3, 20)), "time" = rep(1:20, 3)) +#' m <- fit_dfa(y = long, data_shape = "long", iter = 50, chains = 1) #' #' # example of long format data with obs covariates #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -#' long = data.frame("obs" = obs, "ts" = sort(rep(1:3,20)), "time" = rep(1:20,3)) -#' obs_covar = expand.grid("time"=1:20,"timeseries"=1:3,"covariate"=1:2) -#' obs_covar$value=rnorm(nrow(obs_covar),0,0.1) -#' m = fit_dfa(y = long, data_shape = "long", iter = 50, chains = 1,obs_covar=obs_covar) +#' obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +#' long <- data.frame("obs" = obs, "ts" = sort(rep(1:3, 20)), "time" = rep(1:20, 3)) +#' obs_covar <- expand.grid("time" = 1:20, "timeseries" = 1:3, "covariate" = 1:2) +#' obs_covar$value <- rnorm(nrow(obs_covar), 0, 0.1) +#' m <- fit_dfa(y = long, data_shape = "long", iter = 50, chains = 1, obs_covar = obs_covar) #' #' # example of model with Z constrained to be proportions and wide format data #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' m = fit_dfa(y = s$y_sim, z_model = "proportion", iter = 50, chains = 1) +#' m <- fit_dfa(y = s$y_sim, z_model = "proportion", iter = 50, chains = 1) #' #' # example of model with Z constrained to be proportions and long format data #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -#' long = data.frame("obs" = obs, "ts" = sort(rep(1:3,20)), "time" = rep(1:20,3)) -#' m = fit_dfa(y = long, data_shape = "long", z_model = "proportion", iter = 50, chains = 1) +#' obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +#' long <- data.frame("obs" = obs, "ts" = sort(rep(1:3, 20)), "time" = rep(1:20, 3)) +#' m <- fit_dfa(y = long, data_shape = "long", z_model = "proportion", iter = 50, chains = 1) #' #' #' # example of B-spline model with wide format data #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' m = fit_dfa(y = s$y_sim, iter = 50, chains = 1, trend_model = "spline", n_knots = 10) +#' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, trend_model = "spline", n_knots = 10) #' #' # example of B-spline model with wide format data #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -#' m = fit_dfa(y = s$y_sim, iter = 50, chains = 1, trend_model = "gp", n_knots = 5) -#'} +#' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1, trend_model = "gp", n_knots = 5) +#' } fit_dfa <- function(y = y, num_trends = 1, varIndx = NULL, - scale = c("zscore","center","none"), + scale = c("zscore", "center", "none"), iter = 2000, chains = 4, thin = 1, @@ -157,31 +158,34 @@ fit_dfa <- function(y = y, obs_covar = NULL, pro_covar = NULL, z_bound = NULL, - z_model = c("dfa","proportion"), - trend_model = c("rw","spline","gp"), + z_model = c("dfa", "proportion"), + trend_model = c("rw", "spline", "gp"), n_knots = NULL, knot_locs = NULL, par_list = NULL, family = "gaussian", verbose = FALSE, - gp_theta_prior = c(3,1), + gp_theta_prior = c(3, 1), + expansion_prior = FALSE, ...) { # check arguments - data_shape <- match.arg(data_shape, c("wide","long")) - z_model <- match.arg(z_model, c("dfa","proportion")) - trend_model <- match.arg(trend_model, c("rw","spline","gp")) + data_shape <- match.arg(data_shape, c("wide", "long")) + z_model <- match.arg(z_model, c("dfa", "proportion")) + trend_model <- match.arg(trend_model, c("rw", "spline", "gp")) - obs_model <- match(family, c("gaussian", "gamma", "poisson", "nbinom2", - "binomial", "lognormal")) - if(is.na(obs_model)) { + obs_model <- match(family, c( + "gaussian", "gamma", "poisson", "nbinom2", + "binomial", "lognormal" + )) + if (is.na(obs_model)) { stop("Error: family not found. Please enter family as gaussian(), gamma(), etc.") } - if(family != "gaussian") { - if(data_shape=="wide") stop("Error: if family is non-gaussian, data must be in long format") - if(est_correlation == TRUE) stop("Error: correlation can't be estimated with non-gaussian data. Please set est_correlation=FALSE") + if (family != "gaussian") { + if (data_shape == "wide") stop("Error: if family is non-gaussian, data must be in long format") + if (est_correlation == TRUE) stop("Error: correlation can't be estimated with non-gaussian data. Please set est_correlation=FALSE") } - orig_data = y # save original data + orig_data <- y # save original data if (ncol(y) < nrow(y) && data_shape[1] == "wide") { warning( @@ -190,45 +194,47 @@ fit_dfa <- function(y = y, ) } if (data_shape[1] == "long") { - if(est_correlation==TRUE) { + if (est_correlation == TRUE) { stop("Estimation of the observation error correlation matrix not currently estimated when data are in long format") } - if(length(which(names(y)=="ts"))==0) { + if (length(which(names(y) == "ts")) == 0) { stop("Error: data shape is long, and must contain a field 'ts' representing time series dimension") } - if(length(which(names(y)=="time"))==0) { + if (length(which(names(y) == "time")) == 0) { stop("Error: data shape is long, and must contain a field 'time' representing time dimension") } - if(length(which(names(y)=="obs"))==0) { + if (length(which(names(y) == "obs")) == 0) { stop("Error: data shape is long, and must contain a field 'obs' representing observations") } # rescale if needed - #y$time <- y$time - min(y[["time"]]) + 1 # min time now = 1 + # y$time <- y$time - min(y[["time"]]) + 1 # min time now = 1 y$ts <- as.numeric(as.factor(y[["ts"]])) N <- max(y[["time"]]) P <- max(y[["ts"]]) } - if(data_shape[1]=="wide") { + if (data_shape[1] == "wide") { N <- ncol(y) # number of time steps P <- nrow(y) # number of time series if (nrow(y) < 3) { - stop("fit_dfa() only works with 3 or more time series. We detected ", - nrow(y), " time series.") + stop( + "fit_dfa() only works with 3 or more time series. We detected ", + nrow(y), " time series." + ) } } - if(!is.null(obs_covar)) { - if(ncol(obs_covar) != 4) { + if (!is.null(obs_covar)) { + if (ncol(obs_covar) != 4) { stop("observation covariates must be in a data frame with 4 columns") } } - if(!is.null(pro_covar)) { - if(ncol(pro_covar) != 4) { + if (!is.null(pro_covar)) { + if (ncol(pro_covar) != 4) { stop("process covariates must be in a data frame with 4 columns") } } - if(!is.null(z_bound) && length(z_bound)!=2) { + if (!is.null(z_bound) && length(z_bound) != 2) { stop("if you're using z bounds, this needs to be a 2-element vector") } @@ -237,10 +243,10 @@ fit_dfa <- function(y = y, nZ <- P * K - sum(seq_len(K)) # number of non-zero parameters that are unconstrained # standardizing data by rows only works if data provided in "wide" format - if(family == "gaussian") { - if(data_shape[1] == "wide") { + if (family == "gaussian") { + if (data_shape[1] == "wide") { for (i in seq_len(P)) { - if (scale[1]=="zscore") { + if (scale[1] == "zscore") { if (length(unique(na.omit(c(y[i, ])))) == 1L) { stop("Can't scale one or more of the time series because all values ", "are the same. Remove this/these time series or set `scale` = `center`.", @@ -249,23 +255,23 @@ fit_dfa <- function(y = y, } y[i, ] <- scale(y[i, ], center = TRUE, scale = TRUE) } - if(scale[1]=="center") { + if (scale[1] == "center") { y[i, ] <- scale(y[i, ], center = TRUE, scale = FALSE) } } } else { - if(scale[1]=="zscore") { + if (scale[1] == "zscore") { # standardize - for(i in seq_len(P)) { - indx = which(y[["ts"]] == i) - y[indx,"obs"] = scale(y[indx,"obs" ], center = TRUE, scale = TRUE) + for (i in seq_len(P)) { + indx <- which(y[["ts"]] == i) + y[indx, "obs"] <- scale(y[indx, "obs"], center = TRUE, scale = TRUE) } } - if(scale[1]=="center") { + if (scale[1] == "center") { # just center - for(i in seq_len(P)) { - indx = which(y[["ts"]] == i) - y[indx,"obs"] = scale(y[indx, "obs"], center = TRUE, scale = FALSE) + for (i in seq_len(P)) { + indx <- which(y[["ts"]] == i) + y[indx, "obs"] <- scale(y[indx, "obs"], center = TRUE, scale = FALSE) } } } @@ -300,7 +306,7 @@ fit_dfa <- function(y = y, nVariances <- length(unique(varIndx)) # indices of positive values - Stan can't handle NAs - if(data_shape[1] == "wide") { + if (data_shape[1] == "wide") { row_indx_pos <- matrix(rep(seq_len(P), N), P, N)[!is.na(y)] col_indx_pos <- matrix(sort(rep(seq_len(N), P)), P, N)[!is.na(y)] n_pos <- length(row_indx_pos) @@ -309,15 +315,15 @@ fit_dfa <- function(y = y, n_na <- length(row_indx_na) y <- y[!is.na(y)] } else { - y = y[which(!is.na(y[["obs"]])),] - row_indx_pos = y[["ts"]] - col_indx_pos = y[["time"]] - n_pos = length(row_indx_pos) + y <- y[which(!is.na(y[["obs"]])), ] + row_indx_pos <- y[["ts"]] + col_indx_pos <- y[["time"]] + n_pos <- length(row_indx_pos) # these are just dummy placeholders - row_indx_na = matrix(1,1,1)[is.na(runif(1))] - col_indx_na = matrix(1,1,1)[is.na(runif(1))] + row_indx_na <- matrix(1, 1, 1)[is.na(runif(1))] + col_indx_na <- matrix(1, 1, 1)[is.na(runif(1))] n_na <- length(row_indx_na) - y = y[["obs"]] + y <- y[["obs"]] } # flag for whether to use a normal dist @@ -325,80 +331,86 @@ fit_dfa <- function(y = y, if (estimate_nu) use_normal <- 0 # competing flags # covariates - if(!is.null(obs_covar)) { - obs_covar_index <- as.matrix(obs_covar[,c("time","timeseries","covariate")]) + if (!is.null(obs_covar)) { + obs_covar_index <- as.matrix(obs_covar[, c("time", "timeseries", "covariate")]) num_obs_covar <- nrow(obs_covar_index) - n_obs_covar <- length(unique(obs_covar_index[,"covariate"])) - obs_covar_value <- obs_covar[,"value"] + n_obs_covar <- length(unique(obs_covar_index[, "covariate"])) + obs_covar_value <- obs_covar[, "value"] - if(data_shape[1] == "wide") { + if (data_shape[1] == "wide") { match_obs_covar <- rep(0, num_obs_covar) } else { - match_obs_covar <- match(paste(obs_covar$time,obs_covar$timeseries), paste(Y$time[which(!is.na(Y$obs))],Y$ts[which(!is.na(Y$obs))])) + match_obs_covar <- match(paste(obs_covar$time, obs_covar$timeseries), paste(Y$time[which(!is.na(Y$obs))], Y$ts[which(!is.na(Y$obs))])) + keep <- which(!is.na(match_obs_covar)) + # keep covariates not associated with missing values + obs_covar_index <- obs_covar_index[keep, ] + num_obs_covar <- nrow(obs_covar_index) + n_obs_covar <- length(unique(obs_covar_index[, "covariate"])) + obs_covar_value <- obs_covar[keep, "value"] + # keep matches + match_obs_covar <- match_obs_covar[keep] } - } else { num_obs_covar <- 0 n_obs_covar <- 0 obs_covar_value <- c(0)[0] match_obs_covar <- c(0)[0] - obs_covar_index <- matrix(0,1,3)[c(0)[0],] + obs_covar_index <- matrix(0, 1, 3)[c(0)[0], ] } - if(!is.null(pro_covar)) { - pro_covar_index <- as.matrix(pro_covar[,c("time","trend","covariate")]) + if (!is.null(pro_covar)) { + pro_covar_index <- as.matrix(pro_covar[, c("time", "trend", "covariate")]) num_pro_covar <- nrow(pro_covar_index) - n_pro_covar <- length(unique(pro_covar_index[,"covariate"])) - pro_covar_value <- pro_covar[,"value"] - + n_pro_covar <- length(unique(pro_covar_index[, "covariate"])) + pro_covar_value <- pro_covar[, "value"] } else { num_pro_covar <- 0 n_pro_covar <- 0 pro_covar_value <- c(0)[0] - pro_covar_index <- matrix(0,1,3)[c(0)[0],] + pro_covar_index <- matrix(0, 1, 3)[c(0)[0], ] } - if(is.null(z_bound)) { - z_bound <- c(-100,100) + if (is.null(z_bound)) { + z_bound <- c(-100, 100) } n_sigma_process <- 1 - if(equal_process_sigma == FALSE) n_sigma_process <- K + if (equal_process_sigma == FALSE) n_sigma_process <- K est_sigma_process <- 0 - if(estimate_process_sigma == TRUE) est_sigma_process <- 1 + if (estimate_process_sigma == TRUE) est_sigma_process <- 1 # default args that need to be passed in est_spline <- 0 est_gp <- 0 est_rw <- 1 # these are flags specifying model structure. default is rw - if(is.null(n_knots)) n_knots <- round(N/3) - if(is.null(knot_locs)) knot_locs = seq(1,N,length.out=n_knots) + if (is.null(n_knots)) n_knots <- round(N / 3) + if (is.null(knot_locs)) knot_locs <- seq(1, N, length.out = n_knots) distKnots <- matrix(0, n_knots, n_knots) - #distKnots21 <- matrix(0, N, n_knots) + # distKnots21 <- matrix(0, N, n_knots) distKnots21_pred <- rep(0, n_knots) # set up cubic b-splines design matrix B_spline <- matrix(0, n_knots, N) - if(trend_model == "spline") { + if (trend_model == "spline") { est_spline <- 1 est_rw <- 0 # turn of things conventionally estimated when trend is a random walk estimate_trend_ar <- FALSE estimate_trend_ma <- FALSE estimate_nu <- FALSE - B_spline <- t(splines::bs(1:N, df=n_knots, degree = 3, intercept = TRUE)) + B_spline <- t(splines::bs(1:N, df = n_knots, degree = 3, intercept = TRUE)) } - if(trend_model == "gp") { + if (trend_model == "gp") { # Gaussian kernel - est_gp = 1 + est_gp <- 1 est_rw <- 0 - if(is.null(knot_locs)) knot_locs = seq(1,N,length.out=n_knots) - distKnots = as.matrix(stats::dist(knot_locs)) # distances between time stamps - distAll = as.matrix(stats::dist(c(1:N,knot_locs))) # distances between data and knot locs - #distKnots21 <- t(distAll[-seq_len(N), 1:N]) - distKnots21_pred <- as.matrix(stats::dist(c(N+1,knot_locs)))[1,-1] - #distKnots <- distKnots ^ 2 - #distKnots21 <- distKnots21 ^ 2 - distKnots21_pred <- distKnots21_pred ^ 2 + if (is.null(knot_locs)) knot_locs <- seq(1, N, length.out = n_knots) + distKnots <- as.matrix(stats::dist(knot_locs)) # distances between time stamps + distAll <- as.matrix(stats::dist(c(1:N, knot_locs))) # distances between data and knot locs + # distKnots21 <- t(distAll[-seq_len(N), 1:N]) + distKnots21_pred <- as.matrix(stats::dist(c(N + 1, knot_locs)))[1, -1] + # distKnots <- distKnots ^ 2 + # distKnots21 <- distKnots21 ^ 2 + distKnots21_pred <- distKnots21_pred^2 est_sigma_process <- 1 # turn this on as a scale for variance estimate_trend_ar <- FALSE estimate_trend_ma <- FALSE @@ -406,12 +418,12 @@ fit_dfa <- function(y = y, } y_int <- rep(0, length(y)) - if(family %in% c("binomial", "nbinom2", "poisson")) { - y_int = as.integer(y) + if (family %in% c("binomial", "nbinom2", "poisson")) { + y_int <- as.integer(y) } - est_sigma_params <- ifelse(family %in% c("gaussian","lognormal"), 1, 0); - est_gamma_params <- ifelse(family=="gamma",1,0); - est_nb2_params <- ifelse(family=="nbinom2",1,0); + est_sigma_params <- ifelse(family %in% c("gaussian", "lognormal"), 1, 0) + est_gamma_params <- ifelse(family == "gamma", 1, 0) + est_nb2_params <- ifelse(family == "nbinom2", 1, 0) data_list <- list( N = N, @@ -449,8 +461,8 @@ fit_dfa <- function(y = y, pro_covar_value = pro_covar_value, pro_covar_index = pro_covar_index, z_bound = z_bound, - long_format = ifelse(data_shape[1]=="wide",0,1), - proportional_model = ifelse(z_model[1]=="dfa",0,1), + long_format = ifelse(data_shape[1] == "wide", 0, 1), + proportional_model = ifelse(z_model[1] == "dfa", 0, 1), est_sigma_process = est_sigma_process, n_sigma_process = n_sigma_process, est_rw = est_rw, @@ -459,39 +471,41 @@ fit_dfa <- function(y = y, n_knots = n_knots, knot_locs = knot_locs, est_gp = est_gp, - #distKnots = distKnots, - #distKnots21 = distKnots21, + # distKnots = distKnots, + # distKnots21 = distKnots21, obs_model = obs_model, - distKnots21_pred = matrix(distKnots21_pred,nrow=1), + distKnots21_pred = matrix(distKnots21_pred, nrow = 1), est_sigma_params = est_sigma_params, est_gamma_params = est_gamma_params, est_nb2_params = est_nb2_params, - gp_theta_prior = gp_theta_prior + gp_theta_prior = gp_theta_prior, + use_expansion_prior = as.integer(expansion_prior) ) - if(is.null(par_list)) { - pars <- c("x", "Z", "log_lik", "psi","xstar") + if (is.null(par_list)) { + pars <- c("x", "Z", "log_lik", "xstar") + if (expansion_prior) pars <- c(pars, "psi") } else { - pars = par_list + pars <- par_list } # family - if(family %in% c("gaussian","lognormal")) pars <- c(pars, "sigma") - if(family %in% c("gamma")) pars <- c(pars, "gamma_a") - if(family %in% c("nbinom2")) pars <- c(pars, "nb2_phi") + if (family %in% c("gaussian", "lognormal")) pars <- c(pars, "sigma") + if (family %in% c("gamma")) pars <- c(pars, "gamma_a") + if (family %in% c("nbinom2")) pars <- c(pars, "nb2_phi") if (est_correlation) pars <- c(pars, "Omega", "Sigma") # add correlation matrix if (estimate_nu) pars <- c(pars, "nu") if (estimate_trend_ar) pars <- c(pars, "phi") if (estimate_trend_ma) pars <- c(pars, "theta") - if(!is.null(obs_covar)) pars <- c(pars, "b_obs") - if(!is.null(pro_covar)) pars <- c(pars, "b_pro") - if(est_sigma_process) pars <- c(pars, "sigma_process") - if(trend_model=="gp") pars <- c(pars, "gp_theta") + if (!is.null(obs_covar)) pars <- c(pars, "b_obs") + if (!is.null(pro_covar)) pars <- c(pars, "b_pro") + if (est_sigma_process) pars <- c(pars, "sigma_process") + if (trend_model == "gp") pars <- c(pars, "gp_theta") - if(!is.null(par_list)) { - if(par_list[1]=="all") { - pars <- NA # removed pred + if (!is.null(par_list)) { + if (par_list[1] == "all") { + pars <- NA # removed pred } } @@ -521,6 +535,7 @@ fit_dfa <- function(y = y, ) } } + out[["sampling_args"]] <- sampling_args out[["orig_data"]] <- orig_data out[["shape"]] <- data_shape @@ -536,4 +551,3 @@ fit_dfa <- function(y = y, out <- structure(out, class = "bayesdfa") out } - diff --git a/R/fit_regimes.R b/R/fit_regimes.R index 86bf5f6..636d094 100644 --- a/R/fit_regimes.R +++ b/R/fit_regimes.R @@ -23,15 +23,13 @@ #' @examples #' data(Nile) #' fit_regimes(log(Nile), iter = 50, n_regimes = 1) - fit_regimes <- function(y, - sds = NULL, - n_regimes = 2, - iter = 2000, - thin = 1, - chains = 1, - ...) { - + sds = NULL, + n_regimes = 2, + iter = 2000, + thin = 1, + chains = 1, + ...) { est_sigma <- 0 if (is.null(sds)) { # estimate sigma, instead of using fixed values @@ -51,7 +49,8 @@ fit_regimes <- function(y, pars = c("mu_k", "sigma_k", "log_lik") ) - m <- rstan::sampling(object=stanmodels$regime_1, + m <- rstan::sampling( + object = stanmodels$regime_1, data = stan_data, iter = iter, chains = chains, @@ -76,7 +75,8 @@ fit_regimes <- function(y, ) ) - m <- rstan::sampling(object=stanmodels$hmm_gaussian, + m <- rstan::sampling( + object = stanmodels$hmm_gaussian, data = stan_data, iter = iter, thin = thin, @@ -88,11 +88,11 @@ fit_regimes <- function(y, ) } - log_lik <- loo::extract_log_lik(m, merge_chains=FALSE) - #n_chains = dim(rstan::extract(m, "log_lik", permuted=FALSE))[2] + log_lik <- loo::extract_log_lik(m, merge_chains = FALSE) + # n_chains = dim(rstan::extract(m, "log_lik", permuted=FALSE))[2] rel_eff <- loo::relative_eff(exp(log_lik)) # calculate looic - looic <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic",1] + looic <- loo::loo(log_lik, r_eff = rel_eff)$estimates["looic", 1] list(model = m, y = y, looic = looic) } diff --git a/R/invert_chains.R b/R/invert_chains.R index 5f3c76c..8bea5e6 100644 --- a/R/invert_chains.R +++ b/R/invert_chains.R @@ -20,21 +20,19 @@ #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 30, chains = 2) #' # chains were already inverted, but we can redo that, as an example, with: #' find_inverted_chains(m$model, plot = TRUE) - find_inverted_chains <- function(model, trend = 1, plot = FALSE) { - - chains = NULL # required for dplyr 0.8 update - parameters = NULL - value = NULL + chains <- NULL # required for dplyr 0.8 update + parameters <- NULL + value <- NULL e <- rstan::extract(model, permuted = FALSE) v <- reshape2::melt(e) vv <- v[grepl(paste0("x\\[", trend), v$parameters), ] - vv$parameters = as.factor(as.character(vv$parameters)) # needed with dplyr 0.8, all levels returned otherwise + vv$parameters <- as.factor(as.character(vv$parameters)) # needed with dplyr 0.8, all levels returned otherwise vv <- dplyr::group_by(vv, chains, parameters) vv <- dplyr::summarise(vv, estimate = stats::median(value)) zz <- v[grepl(paste0("Z\\["), v$parameters), ] - zz$parameters = as.factor(as.character(zz$parameters)) # needed with dplyr 0.8, all levels returned otherwise + zz$parameters <- as.factor(as.character(zz$parameters)) # needed with dplyr 0.8, all levels returned otherwise zz <- zz[grepl(paste0(trend, "]"), zz$parameters), ] zz <- dplyr::group_by(zz, chains, parameters) zz <- dplyr::summarise(zz, estimate = stats::median(value)) @@ -71,11 +69,11 @@ find_inverted_chains <- function(model, trend = 1, plot = FALSE) { (sum((pred1_loadings - pred0_loadings)^2) + sum((pred1_trend - pred0_trend)^2))) { # flip this chain -- seems to be something not right with commented out line - #flipped_chains <- ifelse(flipped_chains == 0, i, c(flipped_chains, i)) - if(flipped_chains==0) { - flipped_chains = i + # flipped_chains <- ifelse(flipped_chains == 0, i, c(flipped_chains, i)) + if (flipped_chains == 0) { + flipped_chains <- i } else { - flipped_chains = c(flipped_chains, i) + flipped_chains <- c(flipped_chains, i) } } } @@ -106,10 +104,10 @@ invert_chains <- function(model, trends = 1, print = FALSE, ...) { for (f_ in f) { for (i in grep(paste0("x\\[", k), pars)) { - e[, f_, i] <- -1*e[, f_, i] + e[, f_, i] <- -1 * e[, f_, i] } for (i in grep(paste0("Z\\[[0-9]+,", k, "\\]"), pars)) { - e[, f_, i] <- -1*e[, f_, i] + e[, f_, i] <- -1 * e[, f_, i] } } } diff --git a/R/loo.R b/R/loo.R index 8331382..f7639bc 100644 --- a/R/loo.R +++ b/R/loo.R @@ -30,7 +30,8 @@ loo.bayesdfa <- function(x, ...) { loo::loo.array(log_lik, r_eff = rel_eff, save_psis = FALSE, - ...) + ... + ) } #' @name loo diff --git a/R/plot_fitted.R b/R/plot_fitted.R index 2308d30..01212f0 100644 --- a/R/plot_fitted.R +++ b/R/plot_fitted.R @@ -22,33 +22,33 @@ #' print(p) #' } plot_fitted <- function(modelfit, conf_level = 0.95, names = NULL, spaghetti = FALSE) { - df <- dfa_fitted(modelfit, conf_level = conf_level, names = names) df$ID <- as.factor(df$ID) - if(spaghetti == TRUE) { - + if (spaghetti == TRUE) { cols <- viridis(length(unique((df$ID))), end = 0.8) p1 <- ggplot(df) + - geom_line(aes_string(x = "time", y = "y", group = "ID"), - color = "grey50", size = 0.5) + - geom_line(aes_string(x = "time", y = "estimate", group = "ID", color = "ID"), - size = 1.2) + - scale_color_manual(values = cols) + - xlab("Time") + - theme(legend.position = "none") - + geom_line(aes_string(x = "time", y = "y", group = "ID"), + color = "grey50", size = 0.5 + ) + + geom_line(aes_string(x = "time", y = "estimate", group = "ID", color = "ID"), + size = 1.2 + ) + + scale_color_manual(values = cols) + + xlab("Time") + + theme(legend.position = "none") } else { - p1 <- ggplot(df) + geom_ribbon(aes_string(x = "time", ymin = "lower", ymax = "upper"), alpha = 0.4) + geom_line(aes_string(x = "time", y = "estimate")) + geom_point(aes_string(x = "time", y = "y"), col = "red", size = 0.5, - alpha = 0.4) + + alpha = 0.4 + ) + facet_wrap("ID", scales = "free_y") + - xlab("Time") + ylab("") + xlab("Time") + + ylab("") } p1 } diff --git a/R/plot_loadings.R b/R/plot_loadings.R index e2a4d9e..3cfb1b4 100644 --- a/R/plot_loadings.R +++ b/R/plot_loadings.R @@ -30,22 +30,22 @@ #' plot_loadings(r, violin = FALSE, facet = FALSE) #' plot_loadings(r, violin = TRUE, facet = FALSE) #' plot_loadings(r, violin = TRUE, facet = TRUE) - plot_loadings <- function(rotated_modelfit, names = NULL, facet = TRUE, violin = TRUE, conf_level = 0.95, - threshold=NULL) { - + threshold = NULL) { v <- dfa_loadings(rotated_modelfit, - summary = FALSE, - names = names, - conf_level = conf_level) + summary = FALSE, + names = names, + conf_level = conf_level + ) df <- dfa_loadings(rotated_modelfit, - summary = TRUE, - names = names, - conf_level = conf_level) + summary = TRUE, + names = names, + conf_level = conf_level + ) # filter values below threshold if (!is.null(threshold)) { @@ -63,7 +63,9 @@ plot_loadings <- function(rotated_modelfit, position = position_dodge(0.3), width = 0 ) + geom_hline(yintercept = 0, lty = 2) + - coord_flip() + xlab("Time Series") + ylab("Loading") + coord_flip() + + xlab("Time Series") + + ylab("Loading") } if (violin) { @@ -73,11 +75,13 @@ plot_loadings <- function(rotated_modelfit, )) + geom_violin(color = NA) + geom_hline(yintercept = 0, lty = 2) + - coord_flip() + xlab("Time Series") + ylab("Loading") + coord_flip() + + xlab("Time Series") + + ylab("Loading") } if (facet) { - p1 <- p1 + facet_wrap(~ trend, scales = "free_x") + p1 <- p1 + facet_wrap(~trend, scales = "free_x") } p1 diff --git a/R/plot_regime_model.R b/R/plot_regime_model.R index e4b60ca..c966cb9 100644 --- a/R/plot_regime_model.R +++ b/R/plot_regime_model.R @@ -19,10 +19,10 @@ #' data(Nile) #' m <- fit_regimes(log(Nile), n_regimes = 2, chains = 1, iter = 50) #' plot_regime_model(m) -#' plot_regime_model(m, plot_prob_indices=c(2)) +#' plot_regime_model(m, plot_prob_indices = c(2)) #' plot_regime_model(m, type = "means") #' } - +#' plot_regime_model <- function(model, probs = c(0.05, 0.95), type = c("probability", "means"), regime_prob_threshold = 0.9, @@ -39,8 +39,9 @@ plot_regime_model <- function(model, probs = c(0.05, 0.95), mu_k_low <- apply(mu_k, 2, quantile, probs = probs[[1]]) mu_k_high <- apply(mu_k, 2, quantile, probs = probs[[2]]) mu_k <- apply(mu_k, 2, median) - confident_regimes <- apply(gamma_tk, 2:3, function(x) - mean(x > 0.5) > regime_prob_threshold) + confident_regimes <- apply(gamma_tk, 2:3, function(x) { + mean(x > 0.5) > regime_prob_threshold + }) regime_indexes <- apply(confident_regimes, 1, function(x) { w <- which(x) if (length(w) == 0) NA else w diff --git a/R/plot_trends.R b/R/plot_trends.R index e93f653..6610987 100644 --- a/R/plot_trends.R +++ b/R/plot_trends.R @@ -20,20 +20,20 @@ #' r <- rotate_trends(m) #' p <- plot_trends(r) #' print(p) - plot_trends <- function(rotated_modelfit, - years = NULL, - highlight_outliers = FALSE, - threshold = 0.01) { - + years = NULL, + highlight_outliers = FALSE, + threshold = 0.01) { rotated <- rotated_modelfit df <- dfa_trends(rotated, years = years) # make faceted ribbon plot of trends p1 <- ggplot(df, aes_string(x = "time", y = "estimate")) + geom_ribbon(aes_string(ymin = "lower", ymax = "upper"), alpha = 0.4) + - geom_line() + facet_wrap("trend_number") + - xlab("Time") + ylab("") + geom_line() + + facet_wrap("trend_number") + + xlab("Time") + + ylab("") if (highlight_outliers) { swans <- find_swans(rotated, threshold = threshold) diff --git a/R/predicted.R b/R/predicted.R index c8a4878..90f50ad 100644 --- a/R/predicted.R +++ b/R/predicted.R @@ -11,14 +11,14 @@ #' set.seed(42) #' s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) #' # only 1 chain and 1000 iterations used so example runs quickly: -#' m <- fit_dfa(y = s$y_sim, iter = 2000, chains = 3, num_trends=1) +#' m <- fit_dfa(y = s$y_sim, iter = 2000, chains = 3, num_trends = 1) #' pred <- predicted(m) #' } predicted <- function(fitted_model) { - Z <- rstan::extract(fitted_model$model, "Z", permuted=FALSE) - x <- rstan::extract(fitted_model$model, "x", permuted=FALSE) - Zperm <- rstan::extract(fitted_model$model, "Z", permuted=TRUE) - xperm <- rstan::extract(fitted_model$model, "x", permuted=TRUE) + Z <- rstan::extract(fitted_model$model, "Z", permuted = FALSE) + x <- rstan::extract(fitted_model$model, "x", permuted = FALSE) + Zperm <- rstan::extract(fitted_model$model, "Z", permuted = TRUE) + xperm <- rstan::extract(fitted_model$model, "x", permuted = TRUE) n_ts <- dim(Zperm$Z)[2] n_y <- dim(xperm$x)[3] @@ -27,12 +27,12 @@ predicted <- function(fitted_model) { n_mcmc <- dim(x)[1] pred <- array(0, c(n_mcmc, n_chains, n_y, n_ts)) - for(i in 1:n_mcmc) { - for(chain in 1:n_chains) { + for (i in 1:n_mcmc) { + for (chain in 1:n_chains) { # for each MCMC draw / chain - x_i <- t(matrix(x[i,chain,], nrow=n_trends, ncol=n_y)) - Z_i <- t(matrix(Z[i,chain,], nrow=n_ts, ncol=n_trends)) - pred[i,chain,,] <- x_i %*% Z_i + x_i <- t(matrix(x[i, chain, ], nrow = n_trends, ncol = n_y)) + Z_i <- t(matrix(Z[i, chain, ], nrow = n_ts, ncol = n_trends)) + pred[i, chain, , ] <- x_i %*% Z_i } } diff --git a/R/rotate_trends.R b/R/rotate_trends.R index 024aaa4..6a0d554 100644 --- a/R/rotate_trends.R +++ b/R/rotate_trends.R @@ -15,13 +15,12 @@ #' m <- fit_dfa(y = s$y_sim, iter = 50, chains = 1) #' r <- rotate_trends(m) #' plot_trends(r) - rotate_trends <- function(fitted_model, conf_level = 0.95, invert = FALSE) { # get the inverse of the rotation matrix n_mcmc <- dim(fitted_model$samples)[2] * dim(fitted_model$samples)[1] - flip <- ifelse(invert==FALSE, 1, -1) + flip <- ifelse(invert == FALSE, 1, -1) temp <- reshape_samples(fitted_model$samples) Z <- temp$Z diff --git a/R/sim.R b/R/sim.R index 9581924..b4da6cb 100644 --- a/R/sim.R +++ b/R/sim.R @@ -39,14 +39,17 @@ #' #' set.seed(42) #' x <- sim_dfa(extreme_value = -4, extreme_loc = 10) -#' matplot(t(x$x), type = "l");abline(v = 10) -#' matplot(t(x$pred), type = "l");abline(v = 10) +#' matplot(t(x$x), type = "l") +#' abline(v = 10) +#' matplot(t(x$pred), type = "l") +#' abline(v = 10) #' #' set.seed(42) #' x <- sim_dfa() -#' matplot(t(x$x), type = "l");abline(v = 10) -#' matplot(t(x$pred), type = "l");abline(v = 10) -#' +#' matplot(t(x$x), type = "l") +#' abline(v = 10) +#' matplot(t(x$pred), type = "l") +#' abline(v = 10) #' @export sim_dfa <- function(num_trends = 1, @@ -65,7 +68,7 @@ sim_dfa <- function(num_trends = 1, y_ignore <- matrix(rnorm(num_ts * num_years), nrow = num_ts, ncol = num_years) d <- fit_dfa(y_ignore, - num_trends = num_trends, sample = FALSE, scale="center", + num_trends = num_trends, sample = FALSE, scale = "center", varIndx = varIndx, nu_fixed = nu_fixed, trend_model = "rw" ) @@ -86,9 +89,8 @@ sim_dfa <- function(num_trends = 1, # initial state for each trend for (k in seq_len(d$sampling_args$data$K)) { - if (!is.null(user_supplied_deviations)) { - devs <- user_supplied_deviations[,k] + devs <- user_supplied_deviations[, k] } else { devs <- rt(d$sampling_args$data$N, df = d$sampling_args$data$nu_fixed) } diff --git a/R/trend_cor.R b/R/trend_cor.R index 788cd0b..0267001 100644 --- a/R/trend_cor.R +++ b/R/trend_cor.R @@ -35,23 +35,25 @@ #' s <- sim_dfa(num_trends = 1, num_years = 15) #' m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1) #' r <- rotate_trends(m) -#' n_years <- ncol(r$trends[,1,]) +#' n_years <- ncol(r$trends[, 1, ]) #' fake_dat <- rnorm(n_years, 0, 1) #' correlation <- trend_cor(r, fake_dat, trend_samples = 25) #' hist(correlation) -#' correlation <- trend_cor(r, y = fake_dat, time_window = 5:15, -#' trend_samples = 25) +#' correlation <- trend_cor(r, +#' y = fake_dat, time_window = 5:15, +#' trend_samples = 25 +#' ) #' hist(correlation) #' @export trend_cor <- function(rotated_modelfit, - y, - trend = 1, - time_window = seq_len(length(y)), - trend_samples = 100, - stan_iter = 300, - stan_chains = 1, - ...) { + y, + trend = 1, + time_window = seq_len(length(y)), + trend_samples = 100, + stan_iter = 300, + stan_chains = 1, + ...) { # must be even to cleanly divide by 2 later: @@ -67,7 +69,8 @@ trend_cor <- function(rotated_modelfit, out <- vapply(seq_len(length(samples)), FUN = function(i) { xi <- as.numeric(scale(as.numeric(x[samples[i], ]))) - m <- rstan::sampling(object=stanmodels$corr, + m <- rstan::sampling( + object = stanmodels$corr, data = list(x = xi, y = y, N = length(y)), iter = stan_iter, chains = stan_chains, warmup = stan_iter / 2, ... ) diff --git a/docs/404.html b/docs/404.html index 24c26d7..660d11a 100644 --- a/docs/404.html +++ b/docs/404.html @@ -71,7 +71,7 @@
diff --git a/docs/articles/bayesdfa.html b/docs/articles/bayesdfa.html index 09d5b04..86c3e1b 100644 --- a/docs/articles/bayesdfa.html +++ b/docs/articles/bayesdfa.html @@ -31,7 +31,7 @@ @@ -97,7 +97,7 @@vignettes/bayesdfa.Rmd
bayesdfa.Rmd
round(r2$Z_rot_mean, 2)
## [,1] [,2]
-## [1,] 1.25 -0.84
-## [2,] -60.06 40.73
-## [3,] 77.03 -112.99
-## [4,] 51.76 -30.41
+## [1,] -90.54 -41.89
+## [2,] -19.10 -117.64
+## [3,] -7.55 58.95
+## [4,] -49.06 15.14
These loadings can also be plotted with the plot_loadings()
function. This shows the distribution of the densities as violin plots, with color proportional to being different from 0.
plot_loadings(r2) + theme_bw()
loo1 <- loo(f1)
loo1$estimates
## Estimate SE
-## elpd_loo -2.719423e+04 5.625610e+03
-## p_loo -1.998401e-15 1.094370e-15
-## looic 5.438846e+04 1.125122e+04
-where 5.438846210^{4} is the estimate and 1.12512210^{4} is the standard error.
+## elpd_loo -2.973137e+04 6.859515e+03 +## p_loo -2.220446e-16 2.107833e-15 +## looic 5.946275e+04 1.371903e+04 +where 5.946274510^{4} is the estimate and 1.37190310^{4} is the standard error.
As an alternative to fitting each model individually as we did above, we also developed the find_dfa_trends()
to automate fitting a larger number of models. In addition to evaluating different trends, this function allows the user to optionally evaluate models with normal and Student-t process errors, and alternative variance structures (observation variance of time series being equal, or not). For example, to fit models with 1:5 trends, both Student-t and normal errors, and equal and unequal variances, the call would be
m <- find_dfa_trends(
@@ -290,13 +290,13 @@
We can also look at the estimated nu
parameter, which shows some support for using the Student-t distribution (values greater than ~ 30 lead to similar behavior as a normal distribution),
-## V1
-## Min. :2.364
-## 1st Qu.:2.364
-## Median :2.364
-## Mean :2.364
-## 3rd Qu.:2.364
-## Max. :2.364
+## V1
+## Min. :2.82
+## 1st Qu.:2.82
+## Median :2.82
+## Mean :2.82
+## 3rd Qu.:2.82
+## Max. :2.82
vignettes/combining_data.Rmd
combining_data.Rmd
r = rotate_trends(fit_1)
round(r$Z_rot_mean,3)
## 1
-## 1 2.535
-## 2 4.551
-## 3 1.412
+## 1
+## 1 -98.567
+## 2 -15.488
+## 3 19.395
Now, we’ll pretend that in time steps 1:50 we have observations from time series 1 (but not the others). We’ll fit several additional models, adding in back data points in steps of 10, and going backwards in time. All these runs would use time points 51:100 for time series 2 and 3, but they would include time steps 51:100, then 41:100, 31:100, etc. for time series 1.
Note for comparison purposes, we’ll also standardize all time series 1 time before passing them in as an argument. Time series # 1 won’t be re-scaled, but will be re-centered for each iteration. This is important because the time-series are non-stationary.
diff --git a/docs/articles/combining_data_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/combining_data_files/figure-html/unnamed-chunk-8-1.png
index 3b97551..3558bbc 100644
Binary files a/docs/articles/combining_data_files/figure-html/unnamed-chunk-8-1.png and b/docs/articles/combining_data_files/figure-html/unnamed-chunk-8-1.png differ
diff --git a/docs/articles/combining_data_files/figure-html/unnamed-chunk-9-1.png b/docs/articles/combining_data_files/figure-html/unnamed-chunk-9-1.png
index ba62264..b6eabd0 100644
Binary files a/docs/articles/combining_data_files/figure-html/unnamed-chunk-9-1.png and b/docs/articles/combining_data_files/figure-html/unnamed-chunk-9-1.png differ
diff --git a/docs/articles/compositional.html b/docs/articles/compositional.html
index c32e50a..29a04c4 100644
--- a/docs/articles/compositional.html
+++ b/docs/articles/compositional.html
@@ -31,7 +31,7 @@
vignettes/compositional.Rmd
compositional.Rmd
##
## [,1] [,2]
-## [1,] 0.99 0.01
-## [2,] 0.96 0.04
-## [3,] 0.02 0.98
-## [4,] 0.19 0.81
-## [5,] 0.79 0.21
+## [1,] 0.98 0.02
+## [2,] 0.97 0.03
+## [3,] 0.01 0.99
+## [4,] 0.18 0.82
+## [5,] 0.76 0.24
Combining the estimated trends and true trends in the simulation shows that the trends are offset by an intercept, but track the overall simulated values very well (time series 1 represents the estimated trend trying to recover the true trend indicated with time series 3, time series 2 represents the estimated trend trying to recover the true trend indicated with time series 4)
x = apply(pars$x, c(2,3), mean)[c(2,1),]
diff --git a/docs/articles/compositional_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/compositional_files/figure-html/unnamed-chunk-12-1.png
index 8918861..8fb2255 100644
Binary files a/docs/articles/compositional_files/figure-html/unnamed-chunk-12-1.png and b/docs/articles/compositional_files/figure-html/unnamed-chunk-12-1.png differ
diff --git a/docs/articles/compositional_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/compositional_files/figure-html/unnamed-chunk-7-1.png
index f4077cc..d845c83 100644
Binary files a/docs/articles/compositional_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/compositional_files/figure-html/unnamed-chunk-7-1.png differ
diff --git a/docs/articles/covariates.html b/docs/articles/covariates.html
index 29020a9..19d9c02 100644
--- a/docs/articles/covariates.html
+++ b/docs/articles/covariates.html
@@ -31,7 +31,7 @@
vignettes/covariates.Rmd
covariates.Rmd
vignettes/estimate_process_sigma.Rmd
estimate_process_sigma.Rmd
The estimated loadings from the DFA where the trends are forced to have the same fixed variance are good
-## [,1] [,2]
-## [1,] 5.47 -1.12
-## [2,] -47.55 11.24
-## [3,] -53.02 36.55
-## [4,] 4.51 -19.23
+## [,1] [,2]
+## [1,] -91.70 -32.20
+## [2,] -10.27 -109.07
+## [3,] 42.34 -12.66
+## [4,] -28.00 -7.90
but some of the loadings are far off. These loadings are also not well estimated for either of the models that estimate the process variances,
-## [,1] [,2]
-## [1,] 0.10 0.00
-## [2,] -0.09 8.19
-## [3,] -0.35 1.73
-## [4,] 0.40 11.18
+## [,1] [,2]
+## [1,] -70.20 68.36
+## [2,] -14.61 -118.64
+## [3,] -66.64 9.32
+## [4,] -47.34 -10.62
or
## [,1] [,2]
-## [1,] 1.21 0.87
-## [2,] -124.53 -84.54
-## [3,] -47.60 -98.89
-## [4,] -129.04 -53.41
+## [1,] -92.62 37.23
+## [2,] -100.07 -64.45
+## [3,] -3.13 -44.88
+## [4,] -83.12 -29.19
The loadings for Model 4 are given by
-## [,1] [,2]
-## [1,] 3.46 -0.32
-## [2,] 9.05 -0.73
-## [3,] -3.52 -30.51
-## [4,] 1.81 -3.40
+## [,1] [,2]
+## [1,] -99.51 5.65
+## [2,] -7.46 -98.49
+## [3,] 8.32 2.71
+## [4,] 1.16 0.06
and Model 5 by
-## [,1] [,2]
-## [1,] 6.25 4.94
-## [2,] -103.69 -81.06
-## [3,] 49.81 82.37
-## [4,] -119.48 -63.72
+## [,1] [,2]
+## [1,] -89.54 -26.98
+## [2,] -19.58 -107.16
+## [3,] 44.17 66.35
+## [4,] -47.99 -90.43
If we calculate the RMSE of the different models, model # 3 (estimated process trends, raw data not standardized) performs the best
1 | -6983.1329 | +24263.24 | ||||||||||||
2 | -200.1707 | +30917.61 | ||||||||||||
3 | -54128.9021 | +33873.93 | ||||||||||||
4 | -1133.3543 | +19927.94 | ||||||||||||
5 | -44316.3665 | +37172.58 |
diff --git a/docs/reference/dfa_fitted.html b/docs/reference/dfa_fitted.html index 4bf7bf4..41fb158 100644 --- a/docs/reference/dfa_fitted.html +++ b/docs/reference/dfa_fitted.html @@ -72,7 +72,7 @@ @@ -177,8 +177,8 @@if (FALSE) { set.seed(42) s <- sim_dfa(num_trends = 1, num_years = 20, num_ts = 3) -obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -long = data.frame("obs" = obs, "ts" = sort(rep(1:3,20)), "time" = rep(1:20,3)) -m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape="long", sample=FALSE) +obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +long <- data.frame("obs" = obs, "ts" = sort(rep(1:3, 20)), "time" = rep(1:20, 3)) +m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape = "long", sample = FALSE) # random folds -fit_cv = dfa_cv(m, cv_method="loocv", n_folds = 5, iter=50, chains=1) +fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1) # folds can also be passed in -fold_ids = sample(1:5, size=nrow(long), replace=TRUE) -m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape="long", sample=FALSE) -fit_cv = dfa_cv(m, cv_method="loocv", n_folds = 5, iter=50, chains=1, fold_ids=fold_ids) +fold_ids <- sample(1:5, size = nrow(long), replace = TRUE) +m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape = "long", sample = FALSE) +fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1, fold_ids = fold_ids) # do an example of leave-time-out cross validation where years are dropped -fold_ids = long$time -m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape="long", sample=FALSE) -fit_cv = dfa_cv(m, cv_method="loocv", iter=100, chains=1, fold_ids = fold_ids) +fold_ids <- long$time +m <- fit_dfa(y = long, iter = 50, chains = 1, data_shape = "long", sample = FALSE) +fit_cv <- dfa_cv(m, cv_method = "loocv", iter = 100, chains = 1, fold_ids = fold_ids) # example with covariates and long format data -obs_covar = expand.grid("time"=1:20,"timeseries"=1:3,"covariate"=1:2) -obs_covar$value=rnorm(nrow(obs_covar),0,0.1) -obs <- c(s$y_sim[1,], s$y_sim[2,], s$y_sim[3,]) -m <- fit_dfa(y = long, iter = 50, chains = 1, obs_covar=obs_covar,data_shape="long", sample=FALSE) -fit_cv = dfa_cv(m, cv_method="loocv", n_folds = 5, iter=50, chains=1) +obs_covar <- expand.grid("time" = 1:20, "timeseries" = 1:3, "covariate" = 1:2) +obs_covar$value <- rnorm(nrow(obs_covar), 0, 0.1) +obs <- c(s$y_sim[1, ], s$y_sim[2, ], s$y_sim[3, ]) +m <- fit_dfa(y = long, iter = 50, chains = 1, obs_covar = obs_covar, data_shape = "long", sample = FALSE) +fit_cv <- dfa_cv(m, cv_method = "loocv", n_folds = 5, iter = 50, chains = 1) }
+ @@ -173,6 +173,7 @@set.seed(1) s <- sim_dfa(num_trends = 1, num_ts = 3, num_years = 30) s$y_sim[1, 15] <- s$y_sim[1, 15] - 6 -plot(s$y_sim[1,], type = "o") +plot(s$y_sim[1, ], type = "o")# only 1 chain and 250 iterations used so example runs quickly: m <- fit_dfa(y = s$y_sim, num_trends = 1, iter = 50, chains = 1, nu_fixed = 2)#> #> SAMPLING FOR MODEL 'dfa' NOW (CHAIN 1). #> Chain 1: -#> Chain 1: Gradient evaluation took 5.2e-05 seconds -#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.52 seconds. +#> Chain 1: Gradient evaluation took 4.1e-05 seconds +#> Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.41 seconds. #> Chain 1: Adjust your expectations accordingly! #> Chain 1: #> Chain 1: @@ -212,11 +212,12 @@Examp #> Chain 1: Iteration: 45 / 50 [ 90%] (Sampling) #> Chain 1: Iteration: 50 / 50 [100%] (Sampling) #> Chain 1: -#> Chain 1: Elapsed Time: 0.005366 seconds (Warm-up) -#> Chain 1: 0.055166 seconds (Sampling) -#> Chain 1: 0.060532 seconds (Total) -#> Chain 1:
#> Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See -#> http://mc-stan.org/misc/warnings.html#bfmi-low#> Warning: Examine the pairs() plot to diagnose sampling problems#> Warning: The largest R-hat is 2.1, indicating chains have not mixed. +#> Chain 1: Elapsed Time: 0.134745 seconds (Warm-up) +#> Chain 1: 0.00172 seconds (Sampling) +#> Chain 1: 0.136465 seconds (Total) +#> Chain 1:#> Warning: There were 25 divergent transitions after warmup. See +#> http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup +#> to find out why this is a problem and how to eliminate them.#> Warning: Examine the pairs() plot to diagnose sampling problems#> Warning: The largest R-hat is NA, indicating chains have not mixed. #> Running the chains for more iterations may help. See #> http://mc-stan.org/misc/warnings.html#r-hat#> Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable. #> Running the chains for more iterations may help. See @@ -224,134 +225,133 @@Examp #> Running the chains for more iterations may help. See #> http://mc-stan.org/misc/warnings.html#tail-ess
#> Inference for the input samples (1 chains: each with iter = 25; warmup = 12): #> -#> Q5 Q50 Q95 Mean SD Rhat Bulk_ESS Tail_ESS -#> x[1,1] 1.6 1.8 1.8 1.7 0.1 2.06 4 13 -#> x[1,2] -0.8 -0.4 -0.4 -0.6 0.2 2.06 4 13 -#> x[1,3] -1.0 -0.4 -0.4 -0.6 0.3 2.06 4 13 -#> x[1,4] 0.8 1.6 1.7 1.3 0.4 2.06 4 13 -#> x[1,5] 0.3 1.2 1.2 0.8 0.4 2.06 4 13 -#> x[1,6] -2.2 -1.2 -1.2 -1.6 0.5 2.06 4 13 -#> x[1,7] 0.0 1.0 1.1 0.6 0.5 2.06 4 13 -#> x[1,8] 1.9 2.9 3.0 2.5 0.5 2.06 4 13 -#> x[1,9] -0.2 0.7 0.7 0.4 0.4 2.06 4 13 -#> x[1,10] -1.8 -1.0 -1.0 -1.3 0.3 2.06 4 13 -#> x[1,11] -0.6 0.0 0.0 -0.2 0.3 2.06 4 13 -#> x[1,12] -0.3 0.2 0.2 0.1 0.2 2.06 3 13 -#> x[1,13] 0.4 0.8 0.8 0.7 0.2 2.06 4 13 -#> x[1,14] 0.3 0.6 0.8 0.6 0.2 2.06 8 13 -#> x[1,15] 3.1 3.2 3.6 3.3 0.2 2.06 8 13 -#> x[1,16] 0.9 0.9 1.6 1.1 0.3 2.06 3 13 -#> x[1,17] -1.8 -1.8 -0.7 -1.4 0.5 2.06 3 13 -#> x[1,18] -2.1 -2.1 -0.6 -1.5 0.7 2.06 4 13 -#> x[1,19] -2.5 -2.5 -0.5 -1.7 0.9 2.06 3 13 -#> x[1,20] -0.4 -0.4 1.6 0.4 0.9 2.06 3 13 -#> x[1,21] -2.7 -2.7 -0.9 -2.0 0.9 2.06 3 13 -#> x[1,22] -3.0 -3.0 -1.3 -2.3 0.8 2.06 3 13 -#> x[1,23] -0.5 -0.5 0.8 0.0 0.6 2.06 3 13 -#> x[1,24] 0.8 0.8 1.5 1.1 0.4 2.06 4 13 -#> x[1,25] 1.0 1.0 1.2 1.1 0.1 2.06 4 13 -#> x[1,26] -1.8 -1.8 -1.7 -1.8 0.0 2.06 4 13 -#> x[1,27] -0.2 0.2 0.2 0.0 0.2 2.06 4 13 -#> x[1,28] -1.5 -0.6 -0.6 -1.0 0.4 2.06 4 13 -#> x[1,29] 0.2 1.6 1.6 1.0 0.7 2.06 4 13 -#> x[1,30] 2.3 4.0 4.1 3.3 0.9 2.06 4 13 -#> Z[1,1] 1.5 1.6 1.6 1.6 0.0 2.06 4 13 -#> Z[2,1] -9.4 33.5 33.9 17.9 18.7 2.06 4 13 -#> Z[3,1] -15.6 38.9 39.2 20.3 23.2 2.06 4 13 -#> log_lik[1] -3.8 -2.2 -2.2 -2.9 0.8 2.06 4 13 -#> log_lik[2] -351.6 -307.6 -3.8 -181.0 171.1 2.06 4 13 -#> log_lik[3] -470.1 -414.5 -4.0 -242.2 229.7 2.06 4 13 -#> log_lik[4] -3.9 -2.0 -2.0 -2.8 0.9 2.06 4 13 -#> log_lik[5] -23.8 -21.9 -3.8 -14.1 10.0 2.06 3 13 -#> log_lik[6] -30.6 -28.2 -3.8 -17.8 13.3 2.06 3 13 -#> log_lik[7] -3.9 -2.0 -1.9 -2.8 1.0 2.06 4 13 -#> log_lik[8] -18.8 -17.8 -3.8 -11.7 7.5 2.06 3 13 -#> log_lik[9] -24.4 -23.2 -3.8 -14.7 10.3 2.06 3 13 -#> log_lik[10] -3.8 -2.0 -2.0 -2.8 0.9 2.06 4 13 -#> log_lik[11] -299.1 -257.1 -3.8 -153.2 144.5 2.06 3 13 -#> log_lik[12] -400.3 -346.7 -3.8 -205.0 194.4 2.06 3 13 -#> log_lik[13] -3.8 -1.9 -1.9 -2.8 1.0 2.06 4 13 -#> log_lik[14] -157.0 -132.8 -3.8 -80.7 74.5 2.06 3 13 -#> log_lik[15] -209.3 -178.4 -3.8 -107.3 100.2 2.06 3 13 -#> log_lik[16] -3.9 -2.5 -2.5 -3.1 0.7 2.06 4 13 -#> log_lik[17] -174.9 -160.0 -3.8 -92.7 85.4 2.06 3 13 -#> log_lik[18] -231.7 -213.6 -4.1 -122.8 113.6 2.06 3 13 -#> log_lik[19] -3.8 -1.9 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[20] -121.5 -101.7 -3.7 -62.6 57.0 2.06 4 13 -#> log_lik[21] -162.6 -137.1 -3.8 -83.3 77.1 2.06 3 13 -#> log_lik[22] -3.9 -3.3 -3.2 -3.5 0.3 2.06 4 13 -#> log_lik[23] -966.8 -833.8 -3.8 -491.9 471.8 2.06 4 13 -#> log_lik[24] -1291.6 -1122.7 -4.1 -658.5 632.0 2.06 4 13 -#> log_lik[25] -3.8 -1.8 -1.7 -2.7 1.0 2.06 4 13 -#> log_lik[26] -58.6 -47.8 -3.7 -30.8 26.3 2.06 4 13 -#> log_lik[27] -77.9 -63.9 -3.7 -40.5 35.7 2.06 4 13 -#> log_lik[28] -3.9 -2.3 -2.3 -3.0 0.8 2.06 4 13 -#> log_lik[29] -119.2 -110.4 -3.8 -63.9 57.7 2.06 3 13 -#> log_lik[30] -157.6 -147.1 -3.9 -84.3 76.9 2.06 3 13 -#> log_lik[31] -3.8 -1.8 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[32] -3.9 -2.1 -1.8 -2.8 1.0 2.06 4 13 -#> log_lik[33] -4.0 -2.1 -1.8 -2.8 1.0 2.06 3 13 -#> log_lik[34] -3.8 -1.8 -1.7 -2.7 1.0 2.06 4 13 -#> log_lik[35] -7.1 -5.6 -3.7 -5.2 1.5 2.06 4 13 -#> log_lik[36] -9.3 -7.3 -3.7 -6.3 2.5 2.06 4 13 -#> log_lik[37] -3.8 -1.8 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[38] -75.8 -64.4 -3.8 -40.0 35.0 2.06 3 13 -#> log_lik[39] -100.8 -86.2 -3.8 -52.7 47.3 2.06 4 13 -#> log_lik[40] -3.8 -1.8 -1.7 -2.7 1.0 2.06 4 13 -#> log_lik[41] -37.3 -31.9 -3.8 -20.6 16.3 2.06 3 13 -#> log_lik[42] -49.8 -42.9 -3.8 -27.0 22.4 2.06 4 13 -#> log_lik[43] -7.7 -7.2 -3.9 -5.8 1.9 2.06 4 13 -#> log_lik[44] -1156.3 -1013.2 -4.0 -592.0 567.4 2.06 4 13 -#> log_lik[45] -1544.0 -1363.5 -4.9 -792.7 759.0 2.06 4 13 -#> log_lik[46] -3.8 -1.9 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[47] -86.6 -76.6 -3.8 -46.3 40.9 2.06 4 13 -#> log_lik[48] -114.9 -102.4 -4.0 -61.1 54.8 2.06 4 13 -#> log_lik[49] -3.9 -2.9 -2.8 -3.3 0.5 2.06 4 13 -#> log_lik[50] -381.6 -330.2 -3.8 -195.0 184.9 2.06 3 13 -#> log_lik[51] -506.7 -441.8 -3.8 -259.1 246.6 2.06 3 13 -#> log_lik[52] -3.8 -3.0 -2.9 -3.3 0.4 2.06 4 13 -#> log_lik[53] -508.8 -437.8 -3.8 -258.9 246.8 2.06 3 13 -#> log_lik[54] -677.5 -587.3 -3.8 -345.0 329.9 2.06 3 13 -#> log_lik[55] -3.8 -3.4 -3.2 -3.5 0.3 2.06 4 13 -#> log_lik[56] -689.4 -592.1 -3.8 -349.9 334.8 2.06 3 13 -#> log_lik[57] -920.5 -796.8 -3.8 -467.8 448.6 2.06 3 13 -#> log_lik[58] -3.8 -1.8 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[59] -23.0 -18.0 -3.8 -12.9 8.8 2.06 4 13 -#> log_lik[60] -30.7 -24.0 -3.9 -16.8 12.3 2.06 4 13 -#> log_lik[61] -3.8 -3.4 -3.2 -3.5 0.3 2.06 4 13 -#> log_lik[62] -831.8 -713.3 -3.8 -421.5 404.1 2.06 3 13 -#> log_lik[63] -1109.1 -958.5 -3.8 -562.9 540.5 2.06 3 13 -#> log_lik[64] -4.1 -3.9 -3.7 -3.9 0.1 2.06 4 13 -#> log_lik[65] -1032.7 -890.1 -3.8 -523.9 503.0 2.06 3 13 -#> log_lik[66] -1375.7 -1194.9 -3.9 -699.3 671.9 2.06 3 13 -#> log_lik[67] -3.8 -1.8 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[68] -26.3 -21.4 -3.8 -14.6 10.5 2.06 3 13 -#> log_lik[69] -34.5 -28.2 -3.8 -18.7 14.4 2.06 4 13 -#> log_lik[70] -3.9 -2.2 -2.1 -2.9 0.9 2.06 4 13 -#> log_lik[71] -69.7 -63.4 -3.8 -38.1 32.9 2.06 3 13 -#> log_lik[72] -91.0 -83.4 -3.9 -49.5 43.6 2.06 4 13 -#> log_lik[73] -3.9 -2.3 -2.3 -3.0 0.8 2.06 4 13 -#> log_lik[74] -125.6 -111.5 -3.8 -66.5 60.5 2.06 3 13 -#> log_lik[75] -165.2 -147.8 -3.8 -87.3 80.2 2.06 3 13 -#> log_lik[76] -3.8 -2.1 -2.1 -2.9 0.9 2.06 4 13 -#> log_lik[77] -333.5 -293.1 -3.8 -171.6 161.9 2.06 4 13 -#> log_lik[78] -445.8 -394.8 -4.1 -229.6 217.3 2.06 4 13 -#> log_lik[79] -3.8 -2.0 -2.0 -2.8 0.9 2.06 4 13 -#> log_lik[80] -10.6 -9.3 -3.8 -7.2 3.3 2.06 4 13 -#> log_lik[81] -13.1 -11.4 -3.8 -8.5 4.5 2.06 4 13 -#> log_lik[82] -3.8 -1.8 -1.8 -2.7 1.0 2.06 4 13 -#> log_lik[83] -30.0 -27.2 -3.8 -17.2 12.7 2.06 4 13 -#> log_lik[84] -40.0 -36.5 -4.0 -22.5 17.5 2.06 4 13 -#> log_lik[85] -3.9 -3.7 -3.5 -3.7 0.1 2.06 4 13 -#> log_lik[86] -311.8 -268.8 -3.7 -160.3 151.5 2.06 4 13 -#> log_lik[87] -410.2 -356.1 -3.7 -210.8 200.2 2.06 4 13 -#> log_lik[88] -8.3 -7.7 -3.8 -6.1 2.2 2.06 4 13 -#> log_lik[89] -1895.4 -1647.1 -3.8 -966.9 930.8 2.06 3 13 -#> log_lik[90] -2513.3 -2200.8 -4.1 -1285.1 1237.0 2.06 3 13 -#> psi[1] 2.5 2.5 2.5 2.5 0.0 1.33 13 13 -#> xstar[1,1] 3.5 5.5 7.4 5.3 1.4 1.58 4 13 -#> sigma[1] 2.3 2.4 18.6 9.3 7.8 2.06 4 13 -#> lp__ -26610.9 -23557.0 -449.9 -13949.9 12784.3 2.06 3 13 +#> Q5 Q50 Q95 Mean SD Rhat Bulk_ESS Tail_ESS +#> x[1,1] -1.3 -1.3 -1.3 -1.3 0.0 1.00 13 13 +#> x[1,2] -1.9 -1.9 -1.9 -1.9 0.0 1.00 13 13 +#> x[1,3] -2.2 -2.2 -2.2 -2.2 0.0 1.00 13 13 +#> x[1,4] -2.4 -2.4 -2.4 -2.4 0.0 1.00 13 13 +#> x[1,5] -2.7 -2.7 -2.7 -2.7 0.0 1.00 13 13 +#> x[1,6] -2.1 -2.1 -2.1 -2.1 0.0 1.00 13 13 +#> x[1,7] -2.1 -2.1 -2.1 -2.1 0.0 1.00 13 13 +#> x[1,8] -2.3 -2.3 -2.3 -2.3 0.0 1.00 13 13 +#> x[1,9] -2.1 -2.1 -2.1 -2.1 0.0 1.00 13 13 +#> x[1,10] -1.8 -1.8 -1.8 -1.8 0.0 1.00 13 13 +#> x[1,11] -1.8 -1.8 -1.8 -1.8 0.0 1.00 13 13 +#> x[1,12] -1.4 -1.4 -1.4 -1.4 0.0 1.00 13 13 +#> x[1,13] -1.1 -1.1 -1.1 -1.1 0.0 1.00 13 13 +#> x[1,14] -0.8 -0.8 -0.8 -0.8 0.0 1.00 13 13 +#> x[1,15] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> x[1,16] -0.7 -0.7 -0.7 -0.7 0.0 1.00 13 13 +#> x[1,17] -1.0 -1.0 -1.0 -1.0 0.0 1.00 13 13 +#> x[1,18] -0.6 -0.6 -0.6 -0.6 0.0 1.00 13 13 +#> x[1,19] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> x[1,20] 0.2 0.2 0.2 0.2 0.0 1.00 13 13 +#> x[1,21] 1.3 1.3 1.3 1.3 0.0 1.00 13 13 +#> x[1,22] 1.1 1.1 1.1 1.1 0.0 1.00 13 13 +#> x[1,23] 0.8 0.8 0.8 0.8 0.0 1.00 13 13 +#> x[1,24] 1.8 1.8 1.8 1.8 0.0 1.00 13 13 +#> x[1,25] 1.9 1.9 1.9 1.9 0.0 1.00 13 13 +#> x[1,26] 3.1 3.1 3.1 3.1 0.0 1.00 13 13 +#> x[1,27] 4.1 4.1 4.1 4.1 0.0 1.00 13 13 +#> x[1,28] 4.9 4.9 4.9 4.9 0.0 1.00 13 13 +#> x[1,29] 5.5 5.5 5.5 5.5 0.0 1.00 13 13 +#> x[1,30] 5.5 5.5 5.5 5.5 0.0 1.00 13 13 +#> Z[1,1] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> Z[2,1] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> Z[3,1] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[1] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> log_lik[2] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[3] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[4] -0.6 -0.6 -0.6 -0.6 0.0 1.00 13 13 +#> log_lik[5] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[6] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[7] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[8] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[9] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[10] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> log_lik[11] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[12] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[13] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[14] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[15] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[16] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[17] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[18] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[19] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[20] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[21] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[22] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[23] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[24] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[25] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[26] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[27] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[28] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[29] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[30] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[31] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[32] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[33] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[34] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[35] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[36] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[37] -0.7 -0.7 -0.7 -0.7 0.0 1.00 13 13 +#> log_lik[38] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[39] -0.5 -0.5 -0.5 -0.5 0.0 1.00 13 13 +#> log_lik[40] -0.5 -0.5 -0.5 -0.5 0.0 1.00 13 13 +#> log_lik[41] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[42] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[43] -22.6 -22.6 -22.6 -22.6 0.0 1.00 13 13 +#> log_lik[44] -0.5 -0.5 -0.5 -0.5 0.0 1.00 13 13 +#> log_lik[45] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> log_lik[46] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[47] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[48] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[49] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[50] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[51] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[52] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[53] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[54] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[55] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[56] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[57] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[58] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[59] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> log_lik[60] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[61] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[62] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[63] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[64] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> log_lik[65] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[66] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[67] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[68] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[69] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[70] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[71] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[72] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[73] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[74] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[75] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[76] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[77] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[78] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[79] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[80] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[81] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[82] -0.1 -0.1 -0.1 -0.1 0.0 1.00 13 13 +#> log_lik[83] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[84] -0.4 -0.4 -0.4 -0.4 0.0 1.00 13 13 +#> log_lik[85] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> log_lik[86] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[87] -0.3 -0.3 -0.3 -0.3 0.0 1.00 13 13 +#> log_lik[88] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[89] 0.0 0.0 0.0 0.0 0.0 1.00 13 13 +#> log_lik[90] -0.2 -0.2 -0.2 -0.2 0.0 1.00 13 13 +#> xstar[1,1] 4.1 5.8 7.8 5.9 1.3 1.07 13 13 +#> sigma[1] 0.4 0.4 0.4 0.4 0.0 1.00 13 13 +#> lp__ -29.7 -29.7 -29.7 -29.7 0.0 1.00 13 13 #> #> For each parameter, Bulk_ESS and Tail_ESS are crude measures of #> effective sample size for bulk and tail quantities respectively (an ESS > 100 @@ -361,8 +361,7 @@Examp print(p)
# a 1 in 1000 probability if was from a normal distribution: find_swans(r, plot = TRUE, threshold = 0.001) --
Defaults to FALSE, if TRUE uses the parameter expansion prior of Ghosh & Dunson 2009
Any other arguments to pass to rstan::sampling()
.