diff --git a/NAMESPACE b/NAMESPACE
index 13df97a..9d3cd13 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 S3method("[",EstimatorScoreResultList)
 S3method(format,EstimatorScoreResultList)
+export(AdaptivelyWeightedSampleMean)
 export(Bias)
 export(BiasReduced)
 export(Centrality)
@@ -76,8 +77,11 @@ importFrom(ggplot2,ggplot)
 importFrom(ggplot2,scale_color_manual)
 importFrom(ggplot2,scale_fill_gradient)
 importFrom(ggplot2,scale_x_continuous)
+importFrom(ggpubr,ggarrange)
 importFrom(ggpubr,theme_pubclean)
+importFrom(ggpubr,theme_pubr)
 importFrom(grDevices,xy.coords)
+importFrom(graphics,plot.default)
 importFrom(latex2exp,TeX)
 importFrom(progressr,progressor)
 importFrom(scales,percent)
diff --git a/R/adestr_package.R b/R/adestr_package.R
index e4370d4..c41cf1c 100644
--- a/R/adestr_package.R
+++ b/R/adestr_package.R
@@ -1,13 +1,13 @@
 #' adestr
-#' @description Point estimates, confidence intervals, and p-values for optimal adaptive two-stage designs
+#' @description Point estimates, confidence intervals, and p-values for optimal adaptive two-stage designs.
 #'
-#' @details This package implements methods to evaluate the performance characteristics of
-#' various point and interval estimators for optimal adaptive two-stage designs.
+#' @details This package implements methods to \link[adestr:evaluate_estimator]{evaluate the performance characteristics} of
+#' various \link[adestr:PointEstimator]{point} and \link[adestr:IntervalEstimator]{interval} estimators for optimal adaptive two-stage designs.
 #' Specifically, this package is written to interface with trial designs created by the \code{\link{adoptr}} package
 #' \insertCite{kunzmann2021adoptr,pilz2021optimal}{adestr}.
 #' Apart from the a priori evaluation of performance characteristics, this package also allows for the
-#' evaluation of the implemented estimators on real datasets, and it implements methods
-#' to calculate p-values.
+#' \link[adestr:analyze]{calculation of the values of the estimators} given real datasets, and it implements methods
+#' to calculate \link[adestr:PValue]{p-values}.
 #'
 #' @docType package
 #' @name adestr
@@ -15,6 +15,11 @@
 #' @importFrom stats dnorm pnorm qnorm dt pt qt dchisq pchisq qchisq integrate uniroot var
 #' @importFrom cubature hcubature
 #' @importFrom Rdpack reprompt
+#' @seealso \code{\link[adestr:evaluate_estimator]{evaluate_estimator}}
+#' @seealso \code{\link[adestr:analyze]{analyze}}
+#' @seealso \code{\link[adestr:Statistic]{Statistic}} \code{\link[adestr:PointEstimator]{PointEstimator}} \code{\link[adestr:IntervalEstimator]{IntervalEstimator}} \code{\link[adestr:PValue]{PValue}}
+#' @seealso \code{\link[adestr:plot,EstimatorScoreResultList-method]{plot}} \code{\link[adestr:plot_p]{plot_p}}
+#' @seealso \url{https://jan-imbi.github.io/adestr/}
 #'
 #' @references
 #' \insertAllCited{}
@@ -25,16 +30,16 @@
 
 .adestr_options <- list(
   # Root finding inside estimators
-  adestr_tol_roots = 1e-3,
-  adestr_maxiter_roots = 1e3,
+  adestr_tol_roots = 2e-3,
+  adestr_maxiter_roots = 5e2,
   # Integrals used inside estimators
-  adestr_tol_inner = 5e-3,
-  adestr_maxEval_inner = 1e4,
-  adestr_absError_inner = 1e-5,
+  adestr_tol_inner = 2e-3,
+  adestr_maxEval_inner = 5e2,
+  adestr_absError_inner = 5e-5,
   # Integrals to evaluate estimators
-  adestr_tol_outer = 5e-6,
-  adestr_maxEval_outer = 3e5,
-  adestr_absError_outer = 1e-8
+  adestr_tol_outer = 1e-3,
+  adestr_maxEval_outer = 1e3,
+  adestr_absError_outer = 1e-5
 )
 
 #### Universal argument order for all functions: ####
diff --git a/R/analyze.R b/R/analyze.R
index 922dad0..6b78ddd 100644
--- a/R/analyze.R
+++ b/R/analyze.R
@@ -11,8 +11,18 @@ Results <- setClass("Results", slots = c(data ="data.frame",
 #' \link[adestr:ConfidenceInterval]{confidence intervals},
 #' and \link[adestr:PValue]{p-values} for a given dataset.
 #'
+#' Note that in \code{\link{adestr}}, statistics are codes as functions of the
+#' stage-wise sample means (and stage-wise sample variances if data_distribution is
+#' \code{\link{Student}}). In a first-step, the data is summarized to produce these
+#' parameters. Then, the list of statistics are evaluated at the values of these parameters.
+#'
+#' The output of the \code{analyze} function also displays information on the hypothesis
+#' test and the interim decision. If the \code{\link[adestr:Statistic-class]{statistics}} list is empty, this will
+#' be the only information displayed.
+#'
 #' @param data a data.frame containing the data to be analyzed.
-#' @param statistics a list of objects of class \code{\link{PointEstimator}}, \code{\link{ConfidenceInterval}} or
+#' @param statistics a list of objects of class \code{\link{PointEstimator}},
+#' \code{\link{ConfidenceInterval}} or
 #' \code{\link{PValue}}.
 #' @inheritParams evaluate_estimator
 #'
@@ -21,21 +31,28 @@ Results <- setClass("Results", slots = c(data ="data.frame",
 #' @export
 #'
 #' @examples
-#' set.seed(321)
+#' set.seed(123)
 #' dat <- data.frame(
-#'   endpoint = c(rnorm(28, .2, 1), rnorm(28, 0, 1),
-#'                rnorm(23, .2, 1), rnorm(23, 0, 1)),
-#'   group = factor(rep(c("ctl", "trt", "ctl", "trt"),
-#'                      c(28,28,23,23))),
-#'   stage = rep(c(1L, 2L), c(56, 46))
-#' )
-#' analyze(
-#'   data = dat,
-#'   statistics = c(get_example_statistics()),
-#'   data_distribution = Normal(),
-#'   sigma = 1,
-#'   design = get_example_design()
+#'   endpoint = c(rnorm(28, 0.3)),
+#'   stage = rep(1, 28)
 #' )
+#' analyze(data = dat,
+#'         statistics = list(),
+#'         data_distribution = Normal(FALSE),
+#'         design = get_example_design(),
+#'         sigma = 1)
+#'
+#' # The results suggest recruiting 32 patients for the second stage
+#' dat <- rbind(
+#'   dat,
+#'   data.frame(
+#'     endpoint = rnorm(32, mean = 0.3),
+#'     stage = rep(2, 32)))
+#' analyze(data = dat,
+#'         statistics = get_example_statistics(),
+#'         data_distribution = Normal(FALSE),
+#'         design = get_example_design(),
+#'         sigma = 1)
 setGeneric("analyze", function(data,
                                statistics = list(),
                                data_distribution,
@@ -78,18 +95,18 @@ setMethod("analyze", signature("data.frame"),
                      if(data_distribution@two_armed) c(sdata$n_s1_g1, sdata$n_s1_g2) else sdata$n1,
                      data_distribution@two_armed)
             if (length(statistics)>1) {
-              if (abs(sdata@n_s1_g1 - design@n1)/ (design@n1) > 0.1)
+              if (abs(sdata$n_s1_g1 - design@n1)/ (design@n1) > 0.1)
                 warning("Planned first-stage sample size in group 1 differs from actually observed sample size by more than 10%. Results may be unreliable.")
               if (data_distribution@two_armed){
-                if (abs(sdata@n_s1_g2 - design@n1)/ (design@n1) > 0.1)
+                if (abs(sdata$n_s1_g2 - design@n1)/ (design@n1) > 0.1)
                   warning("Planned first-stage sample size in group 2 differs from actually observed sample size by more than 10%. Results may be unreliable.")
               }
               calc_n2 <- n2(design, test_val, round=FALSE)
               if (sdata$n_stages==2L){
-                if (abs(sdata@n_s2_g1 - calc_n2 )/ (calc_n2) > 0.1)
+                if (abs(sdata$n_s2_g1 - calc_n2 )/ (calc_n2) > 0.1)
                   warning("Planned second-stage sample size in group 1 differs from actually observed sample size by more than 10%. Results may be unreliable.")
                 if (data_distribution@two_armed) {
-                  if (abs(sdata@n_s2_g2 - calc_n2 )/ (calc_n2) > 0.1)
+                  if (abs(sdata$n_s2_g2 - calc_n2 )/ (calc_n2) > 0.1)
                     warning("Planned second-stage sample size in group 2 differs from actually observed sample size by more than 10%. Results may be unreliable.")                }
                 if (test_val > design@c1e | test_val < design@c1f)
                   warning("Second-stage data was recorded but trial should have been stopped at interim. Results may be unreliable.")
diff --git a/R/estimators.R b/R/estimators.R
index 50d0642..5ff9417 100644
--- a/R/estimators.R
+++ b/R/estimators.R
@@ -26,11 +26,58 @@ setClass("Estimator", contains = "Statistic")
 
 #' Point estimators
 #'
+#' This is the parent class for all point estimators implemented in this package.
+#' Currently, only estimators for the parameter \eqn{\mu} of a normal distribution
+#' are implemented.
+#'
+#' @details
+#' Details about the point estimators can be found in (our upcoming paper).
+#' ## Sample Mean (\code{SampleMean()})
+#' The sample mean is the maximum likelihood estimator for the mean and probably the
+#' 'most straightforward' of the implemented estimators.
+#' ## Fixed weighted sample means (\code{WeightedSampleMean()})
+#' The first- and second-stage (if available) sample means are combined via fixed, predefined
+#' weights. See \insertCite{brannath2006estimation}{adestr} and \insertCite{@Section 8.3.2 in @wassmer2016group}{adestr}.
+#' ## Adaptively weighted sample means (\code{AdaptivelyWeightedSampleMean()})
+#' The first- and second-stage (if available) sample means are combined via a combination of
+#' fixed and adaptively modified
+#' weights that depend on the standard error.
+#' See \insertCite{@Section 8.3.4 in @wassmer2016group}{adestr}.
+#' ## Minimizing peak variance in adaptively weighted sample means (\code{MinimizePeakVariance()})
+#' For this estimator, the weights of the adaptively weighted sample mean are chosen to
+#' minimize the variance of the estimator for the value of \eqn{\mu} which maximizes
+#' the expected sample size.
+#' ## (Pseudo) Rao-Blackwell estimators (\code{RaoBlackwell} and \code{PseudoRaoBlackwell})
+#' The conditional expectation of the first-stage sample mean given the overall sample
+#' mean and the second-stage sample size. See \insertCite{emerson1997computationally}{adestr}.
+#' ## A bias-reduced estimator (\code{BiasReduced()})
+#' This estimator is calculated by subtracting an estimate of the bias from the MLE.
+#' See \insertCite{whitehead1986bias}{adestr}.
+#' ## Median-unbiased estimators
+#' The implemented median-unbiased estimators are:
+#' * \code{MedianUnbiasedMLEOrdering()}
+#' * \code{MedianUnbiasedLikelihoodRatioOrdering()}
+#' * \code{MedianUnbiasedScoreTestOrdering()}
+#' * \code{MedianUnbiasedStagewiseCombinationFunctionOrdering()}
+#'
+#' These estimators are constructed by specifying an ordering of the sample space
+#' and finding the value of \eqn{\mu}, such that the observed sample is the
+#' median of the sample space according to the chosen ordering.
+#' Some of the implemented orderings are based on the work presented in
+#' \insertCite{emerson1990parameter}{adestr},
+#' \insertCite{@Sections 8.4 in @jennison1999group}{adestr},
+#' and \insertCite{@Sections 4.1.1 and 8.2.1 in @wassmer2016group}{adestr}.
+#' @md
+#'
 #' @param g1 functional representation of the estimator in the early futility and efficacy regions.
 #' @param g2 functional representation of the estimator in the continuation region.
 #' @param label name of the estimator. Used in printing methods.
+#' @seealso \code{\link{evaluate_estimator}}
 #'
-#' @return An object of class \code{PointEstimator}.
+#' @return an object of class \code{PointEstimator}.
+#'
+#' @references
+#' \insertAllCited{}
 #'
 #' @export
 #'
@@ -46,18 +93,42 @@ VirtualPointEstimator <- function() stop("Cannot create instance of class Virtua
 
 #' P-values
 #'
+#' This is the parent class for all p-values implemented in this package.
+#' Details about the methods for calculating p-values can be found in
+#' (our upcoming paper).
 #' @param g1 functional representation of the p-value in the early futility and efficacy regions.
 #' @param g2 functional representation of the p-value in the continuation region.
 #' @param label name of the p-value. Used in printing methods.
+#' @seealso [plot_p]
 #'
 #' @return An object of class \code{PValue}.
+#' @details
+#' The implemented p-values are:
+#' * \code{MLEOrderingPValue()}
+#' * \code{LikelihoodRatioOrderingPValue()}
+#' * \code{ScoreTestOrderingPValue()}
+#' * \code{StagewiseCombinationFunctionOrderingPValue()}
+#'
+#' The p-values are calculated by specifying an ordering of the sample space
+#' calculating the probability that a random sample under the null hypothesis is
+#' larger than the observed sample.
+#' Some of the implemented orderings are based on the work presented in
+#' \insertCite{emerson1990parameter}{adestr},
+#' \insertCite{@Sections 8.4 in @jennison1999group}{adestr},
+#' and \insertCite{@Sections 4.1.1 and 8.2.1 in @wassmer2016group}{adestr}.
+#' @md
+#'
+#' @references
+#' \insertAllCited{}
 #'
 #' @export
 #'
 #' @examples
+#' # This is the definition of a 'naive' p-value based on a Z-test for a one-armed trial
 #' PValue(
-#'   g1 = \(smean1, ...) runif(length(smean1)),
-#'   g2 = \(smean2, ...) runif(length(smean2)),
+#'   g1 = \(smean1, n1, sigma, ...) pnorm(smean1*sqrt(n1)/sigma, lower.tail=FALSE),
+#'   g2 = \(smean1, smean2, n1, n2, ...) pnorm((n1 * smean1 + n2 * smean2)/(n1 + n2) *
+#'                                         sqrt(n1+n2)/sigma, lower.tail=FALSE),
 #'   label="My custom p-value")
 setClass("PValue", slots = c(g1 = "function", g2 = "function"),  contains = "Statistic")
 #' @rdname PValue-class
@@ -69,26 +140,52 @@ VirtualPValue <- function() stop("Cannot create instance of class VirtualPValue.
 
 #' Interval estimators
 #'
+#' This is the parent class for all confidence intervals implemented in this package.
+#' Currently, only confidence intervals for the parameter \eqn{\mu} of a normal distribution
+#' are implemented. Details about the methods for calculating confidence intervals can be found in
+#' (our upcoming paper).
 #' @param l1 functional representation of the lower boundary of the interval in the early futility and efficacy regions.
 #' @param u1 functional representation of the upper boundary of the interval in the early futility and efficacy regions.
 #' @param l2 functional representation of the lower boundary of the interval in the continuation region.
 #' @param u2 functional representation of the upper boundary of the interval in the continuation region.
 #' @param two_sided logical indicating whether the confidence interval is two-sided.
 #' @param label name of the estimator. Used in printing methods.
+#' @seealso \code{\link{evaluate_estimator}}
 #'
-#' @return An object of class \code{IntervalEstimator}.
+#' @return an object of class \code{IntervalEstimator}.
 #'
 #' @export
 #' @aliases ConfidenceInterval ConfidenceInterval-class
 #'
+#' @details
+#' The implemented confidence intervals are:
+#' * \code{MLEOrderingCI()}
+#' * \code{LikelihoodRatioOrderingCI()}
+#' * \code{ScoreTestOrderingCI()}
+#' * \code{StagewiseCombinationFunctionOrderingCI()}
+#'
+#' These confidence intervals are constructed by specifying an ordering of the sample space
+#' and finding the value of \eqn{\mu}, such that the observed sample is the
+#' \eqn{\alpha/2} (or (\eqn{1-\alpha/2})) quantile of the sample space according to the
+#' chosen ordering.
+#' Some of the implemented orderings are based on the work presented in
+#' \insertCite{emerson1990parameter}{adestr},
+#' \insertCite{@Sections 8.4 in @jennison1999group}{adestr},
+#' and \insertCite{@Sections 4.1.1 and 8.2.1 in @wassmer2016group}{adestr}.
+#' @md
+#'
+#' @references
+#' \insertAllCited{}
+#'
 #' @examples
+#' # This is the definition of the 'naive' confidence interval for one-armed trials
 #' IntervalEstimator(
-#'   two_sided = FALSE,
-#'   l1 = \(smean1, ...) smean1 - 1,
-#'   u1 = \(smean1, ...) smean1 + 1,
-#'   l2 = \(smean2, ...) smean2 - 1,
-#'   u2 = \(smean2, ...) smean2 + 1,
-#'   label="My custom p-value")
+#'   two_sided = TRUE,
+#'   l1 = \(smean1, n1, sigma, ...) smean1 - qnorm(.95, sd = sigma/sqrt(n1)),
+#'   u1 = \(smean1, n1, sigma, ...) smean1 + qnorm(.95, sd = sigma/sqrt(n1)),
+#'   l2 = \(smean1, smean2, n1, n2, sigma, ...) smean2 - qnorm(.95, sd = sigma/sqrt(n1 + n2)),
+#'   u2 = \(smean1, smean2, n1, n2, sigma, ...) smean2 + qnorm(.95, sd = sigma/sqrt(n1 + n2)),
+#'   label="My custom CI")
 setClass("IntervalEstimator", slots = c(two_sided = "logical", l1 = "function", u1 = "function", l2 = "function", u2 = "function"), contains = "Estimator")
 #' @rdname IntervalEstimator-class
 #' @export
@@ -415,7 +512,7 @@ FirstStageSampleMean <- function() new("PointEstimator", g1 = \(smean1, ...) sme
 setClass("WeightedSampleMean", contains = "PointEstimator")
 
 #' @rdname PointEstimator-class
-#' @param w1 weight of the first-stage sample mean.
+#' @param w1 weight of the first-stage data.
 #' @importFrom scales percent
 #' @export
 WeightedSampleMean <- function(w1=.5) new("PointEstimator", g1 = \(smean1, ...) smean1, g2 = \(smean1, smean2, n1, n2, ...) w1 * smean1 + (1-w1) * smean2,
@@ -424,6 +521,7 @@ WeightedSampleMean <- function(w1=.5) new("PointEstimator", g1 = \(smean1, ...)
 setClass("AdaptivelyWeightedSampleMean", contains = "VirtualPointEstimator", slots = c(w1 = "numeric"))
 #' @rdname PointEstimator-class
 #' @importFrom scales percent
+#' @export
 AdaptivelyWeightedSampleMean <- function(w1 = 1/sqrt(2)) {
   new(
     "AdaptivelyWeightedSampleMean",
diff --git a/R/evaluate_estimator.R b/R/evaluate_estimator.R
index e7527d9..9a08826 100644
--- a/R/evaluate_estimator.R
+++ b/R/evaluate_estimator.R
@@ -3,6 +3,9 @@
 #' These classes encode various metrics which can be used to evaluate
 #' the performance characteristics of point and interval estimators.
 #'
+#' @param interval confidence interval with respect to which centrality of a point
+#' estimator should be evaluated.
+#'
 #' @details
 #' # Details on the implemented estimators
 #' In the following, precise definitions of the performance scores implemented
@@ -10,7 +13,8 @@
 #' are given. To this end,
 #' let \eqn{\hat{\mu}} denote a point estimator, (\eqn{\hat{l}}, \eqn{\hat{u}})
 #' an interval estimator, denote the expected value of a random variable
-#' by \eqn{\mathbb{E}}, and let \eqn{\mu} be the real value of the underlying
+#' by \eqn{\mathbb{E}}, the probability of an event by \eqn{P},
+#' and let \eqn{\mu} be the real value of the underlying
 #' parameter to be estimated.
 #'
 #' ## Scores for point estimators (\code{PointEstimatorScore}):
@@ -18,7 +22,12 @@
 #' * \code{Bias()}: \eqn{\mathbb{E}[\hat{\mu} - \mu]}
 #' * \code{Variance()}: \eqn{\mathbb{E}[(\hat{\mu} - \mathbb{E}[\hat{\mu}])^2]}
 #' * \code{MSE()}: \eqn{\mathbb{E}[(\hat{\mu} - mu)^2]}
-#' * \code{OverestimationProbability()}: \eqn{\mathbb{E}[(\hat{\mu} - mu)^2]}
+#' * \code{OverestimationProbability()}: \eqn{P(\hat{\mu} > \mu)}
+#' * \code{Centrality(interval)}: \eqn{\mathbb{E}[(\hat{\mu} - \hat{l}) + (\hat{\mu} - \hat{u}]}
+#' ## Scores for confidence intervals (\code{IntervalEstimatorScore}):
+#' * \code{Coverage()}: \eqn{P(\hat{l} \leq \mu \leq \hat{u})}
+#' * \code{Width()}: \eqn{\mathbb{E}[\hat{u} - \hat{l}]}
+#' * \code{TestAgreement()}: \eqn{P\left( \left(\{0 < \hat{l} \text{ and } (c_{1, e} < Z_1 \text{ or } c_{2}(Z_1) < Z_2 ) \right) \text{ or } \left(\{\hat{l} \leq 0  \text{ and } ( Z_1 < c_{1, f} \text{ or } Z_2 \leq c_{2}(Z_1))\}\right)\right)}
 #'
 #' @md
 #' @slot label name of the performance score. Used in printing methods.
@@ -41,10 +50,21 @@ EstimatorScoreResultList <- function(...) {
   class(r) <- c("EstimatorScoreResultList", "list")
   r
 }
+
+#' Combine EstimatoreScoreResult objects into a list
+#'
+#' Creates an object of class EstimatoreScoreResultList,
+#' which is a basically list with the respective
+#' EstimatoreScoreResult objects.
+#'
+#' @param x an object of class EstimatorScoreResult.
+#' @param ... additional arguments passed along to the \code{\link{list}} function
+#' @return an object of class EstimatoreScoreResultList.
 setMethod("c", signature("EstimatorScoreResult"), definition =
             function(x, ...) {
               EstimatorScoreResultList(x, ...)
             })
+#' @inherit c,EstimatorScoreResult-method
 setMethod("c", signature("EstimatorScoreResultList"), definition =
             function(x, ...) {
               EstimatorScoreResultList(x, ...)
@@ -52,6 +72,48 @@ setMethod("c", signature("EstimatorScoreResultList"), definition =
 
 #' Evaluate performance characteristics of an estimator
 #'
+#' This function evaluates an \code{\link{EstimatorScore}} for a \code{\link{PointEstimator}}
+#' or and \code{\link{IntervalEstimator}} by integrating over the sampling distribution.
+#'
+#' @details
+#' ## General
+#'
+#' First, a functional representation of the integrand is created by combining information
+#' from the \code{\link{EstimatorScore}} object (\code{score}) and the \code{\link{PointEstimator}} or
+#' \code{\link{IntervalEstimator}} object (\code{estimator}).
+#' The sampling distribution of a design is determined by the \code{\link{TwoStageDesign}} object
+#' (\code{design}) and the \code{\link{DataDistribution}} object (\code{data_distribution}),
+#' as well as the assumed parameters \eqn{\mu} (mu) and \eqn{\sigma} (sigma).
+#' The other parameters control various details of the integration problem.
+#'
+#' ## Other parameters
+#'
+#' For a two-armed \code{data_distribution},
+#' if \code{use_full_twoarm_sampling_distribution} is \code{TRUE}, the sample means
+#' for both groups are integrated independently. If \code{use_full_twoarm_sampling_distribution}
+#' is \code{FALSE}, only the difference in sample means is integrated.
+#'
+#' \code{true_parameter} controls which parameters is supposed to be estimated. This
+#' is usually \code{mu}, but could be set to \code{sigma} if one is interested in
+#' estimating the standard deviation.
+#'
+#' If the parameter \code{exact} is set to \code{FALSE}
+#' (the default), the continuous version of the second-stage sample-size function \code{\link{n2}}
+#' is used. Otherwise, an integer valued version of that function will be used,
+#' though this is considerably slower.
+#'
+#' The parameters \code{early_futility_part},
+#' \code{continuation_part} and \code{early_efficacy_part} control which parts
+#' of the sample-space should be integrated over (all default to \code{TRUE}).
+#' They can be used in conjunction with the parameter \code{conditional_integral},
+#' which enables the calculation of the expected value of performance score conditional
+#' on reaching any of the selected integration regions.
+#'
+#' Lastly, the paramters
+#' \code{tol}, \code{maxEval}, and \code{absError} control the integration accuracy.
+#' They are handed down to the \code{\link{hcubature}} function.
+#' @md
+#'
 #' @param score performance measure to evaluate.
 #' @param estimator object of class \code{PointEstimator}, \code{IntervalEstimator} or \code{PValue}.
 #' @param data_distribution object of class \code{Normal} or \code{Student}.
@@ -71,6 +133,9 @@ setMethod("c", signature("EstimatorScoreResultList"), definition =
 #' @param conditional_integral treat integral as a conditional integral.
 #'
 #' @return \code{EstimatorScoreResult} object containing the performance characteristics of the estimator.
+#' @seealso [EstimatorScore]
+#' @seealso [PointEstimator] [IntervalEstimator]
+#' @seealso \link[adestr:plot,EstimatorScoreResultList-method]{plot}
 #' @export
 #'
 #' @examples
@@ -83,6 +148,17 @@ setMethod("c", signature("EstimatorScoreResultList"), definition =
 #'   sigma = 1,
 #'   exact = FALSE
 #' )
+#'
+#' evaluate_estimator(
+#'   score = Coverage(),
+#'   estimator = StagewiseCombinationFunctionOrderingCI(),
+#'   data_distribution = Normal(FALSE),
+#'   design = get_example_design(),
+#'   mu = c(0, 0.3, 0.6),
+#'   sigma = 1,
+#'   exact = FALSE
+#' )
+#'
 setGeneric("evaluate_estimator", function(score,
                                           estimator,
                                           data_distribution,
@@ -918,7 +994,7 @@ setMethod("evaluate_estimator", signature("Centrality", "PointEstimator"),
 #' of the elements of the respective sublists is then used to create separate
 #' scenarios.
 #' These scenarios are than evaluated independelty of each other,
-#' allowing for parallelization via the \code{\link{future}} framework. For each scenario,
+#' allowing for parallelization via the \code{\link[future:future]{future}} framework. For each scenario,
 #' one call to the \code{\link{evaluate_estimator}} function is made.
 #'
 #' Concretely, the cross product of
diff --git a/R/plot.R b/R/plot.R
index baee414..ede0317 100644
--- a/R/plot.R
+++ b/R/plot.R
@@ -4,8 +4,9 @@
 #' one facet per score. If the input argument is a list, the different estimators
 #' will be displayed in the same facets, differentiated by color.
 #'
-#' @param EstimatorScoreResultList An output object from evaluate_estimator or a list
-#' of such objects
+#' @param x an output object from evaluate_estimator (\code{EstimatorScoreResult}) or a list
+#' of such objects (\code{EstimatorScoreResultList}).
+#' @param y unused.
 #' @param ... additional arguments handed down to ggplot.
 #' @export
 #' @importFrom ggplot2 ggplot scale_x_continuous geom_line facet_wrap
@@ -46,16 +47,19 @@ setMethod("plot", signature = "EstimatorScoreResultList", definition =
                   )
                 }
               }
-              ggplot(data = dat, mapping = aes(x = mu, y = Score, col = Estimator), ...) +
+              ggplot(data = dat, mapping = aes(x = .data$mu, y = .data$Score, col = .data$Estimator), ...) +
                 scale_x_continuous(name = TeX("$\\mu$")) +
                 geom_line() +
-                facet_wrap(vars(score_name), scales = "free_y")
+                facet_wrap(vars(.data$score_name), scales = "free_y")
             })
+#' @inherit plot,EstimatorScoreResultList-method
 setMethod("plot", signature = "EstimatorScoreResult", definition =
             function(x, ...) {
               l <- EstimatorScoreResultList(x)
               plot(l, ...)
             })
+#' @inherit plot,EstimatorScoreResultList-method
+#' @importFrom graphics plot.default
 setMethod("plot", signature = "list", definition =
             function(x, ...) {
               if (is(x[[1]], "EstimatorScoreResult")) {
@@ -95,7 +99,7 @@ setMethod("plot", signature = "list", definition =
 #' @examples
 #' plot_p(estimator = StagewiseCombinationFunctionOrderingPValue(),
 #'   data_distribution = Normal(FALSE),
-#'   design = design,
+#'   design = get_example_design(),
 #'   mu = 0,
 #'   sigma = 1)
 plot_p <- function(estimator, data_distribution, design, mu = 0, sigma, boundary_color="lightgreen", subdivisions = 100, ...){
@@ -382,6 +386,8 @@ setMethod("plot_sample_mean", signature("DataDistribution", "TwoStageDesign"),
 
 # Helper function to plot designs
 #' @importFrom scales percent
+#' @importFrom ggpubr theme_pubr ggarrange
+#' @import ggplot2
 plot_design <- function(design, data_distribution = Normal(two_armed = FALSE)){
   two_armed <- data_distribution@two_armed
   if (is(data_distribution, "Student")) {
diff --git a/README.Rmd b/README.Rmd
index 6e4a532..99b3a91 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,7 +13,7 @@ knitr::opts_chunk$set(
 )
 ```
 
-# adestr <a href='https://github.com/jan-imbi/adestr'><img src='man/figures/sticker.png' align="right" height="70" /></a>
+# adestr <a href='https://github.com/jan-imbi/adestr'><img src='man/figures/sticker.png' align="right" height="80" /></a>
 
 <!-- badges: start -->
 [![R-CMD-check](https://github.com/jan-imbi/adestr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/jan-imbi/adestr/actions/workflows/R-CMD-check.yaml)
@@ -70,6 +70,16 @@ evaluate_estimator(
  mu = c(0, 0.3, 0.6),
  sigma = 1
 )
+
+evaluate_estimator(
+ score = MSE(),
+ estimator = SampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(),
+ mu = seq(-0.7, 1.5, .05),
+ sigma = 1
+) |> 
+  plot()
 ```
 
 You can analyze a dataset like this:
@@ -85,8 +95,8 @@ dat <- data.frame(
 )
 analyze(
  data = dat,
- estimator = get_example_statistics(),
- data_distribution = Normal(),
+ statistics = get_example_statistics(),
+ data_distribution = Normal(two_armed = TRUE),
  sigma = 1,
  design = get_example_design()
 )
diff --git a/README.md b/README.md
index a5ec737..1c307c4 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
 
-# adestr <a href='https://github.com/jan-imbi/adestr'><img src='man/figures/sticker.png' align="right" height="170" /></a>
+# adestr <a href='https://github.com/jan-imbi/adestr'><img src='man/figures/sticker.png' align="right" height="100" /></a>
 
 <!-- badges: start -->
 
@@ -61,18 +61,30 @@ evaluate_estimator(
  mu = c(0, 0.3, 0.6),
  sigma = 1
 )
-#> Design:                              TwoStageDesign<n1=28;0.8<=x1<=2.3;n2=10-40>
+#> Design:                               TwoStageDesign<n1=28;0.8<=x1<=2.3:n2=9-40>
 #> Data Distribution:                                             Normal<two-armed>
 #> Estimator:                                                           Sample mean
 #> Assumed sigma:                                                                 1
 #> Assumed mu:                                                          0.0 0.3 0.6
 #> Results:
-#>  Expectation:                                -0.03523827  0.28169661  0.63556747
-#>  Bias:                                       -0.03523827 -0.01830339  0.03556747
+#>  Expectation:                                -0.03523827  0.28169661  0.63556746
+#>  Bias:                                       -0.03523827 -0.01830339  0.03556746
 #>  Variance:                                      0.05558910 0.07330464 0.06591361
 #>  MSE:                                           0.05683084 0.07363966 0.06717865
+
+evaluate_estimator(
+ score = MSE(),
+ estimator = SampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(),
+ mu = seq(-0.7, 1.5, .05),
+ sigma = 1
+) |> 
+  plot()
 ```
 
+<img src="man/figures/README-unnamed-chunk-4-1.png" width="100%" />
+
 You can analyze a dataset like this:
 
 ``` r
@@ -86,23 +98,34 @@ dat <- data.frame(
 )
 analyze(
  data = dat,
- estimator = get_example_statistics(),
- data_distribution = Normal(),
+ statistics = get_example_statistics(),
+ data_distribution = Normal(two_armed = TRUE),
  sigma = 1,
  design = get_example_design()
 )
-#> Design:                              TwoStageDesign<n1=28;0.8<=x1<=2.3;n2=10-40>
+#> Design:                               TwoStageDesign<n1=28;0.8<=x1<=2.3:n2=9-40>
 #> Data Distribution:                                             Normal<two-armed>
+#> Observed number of stages:                                                     2
+#> Observed n1 (group 1)                                                         28
+#> Observed n1 (group 2)                                                         28
+#> Observed n1 (total)                                                           56
 #> Z1                                                                          1.75
-#> Actual number of stages:                                                       2
+#> Interim decision:                                       continue to second stage
+#> Calculated n2(Z1) (per group)                                                 23
+#> Calculated c2(Z1)                                                           1.14
+#> Observed n2 (group 1)                                                         23
+#> Observed n2 (group 2)                                                         23
+#> Observed n2 (in total)                                                        46
+#> Z2                                                                          2.12
+#> Final test decision:                                                 reject null
 #> 
 #> Stage 2 results:
-#>  Sample mean:                                                          0.5044685
-#>  Pseudo Rao-Blackwellized:                                             0.3559511
-#>  Median unbiased (LR test ordering):                                   0.4806717
-#>  Bias reduced MLE (iterations=1):                                      0.4994132
-#>  SWCF ordering CI:                                       [0.06262736, 0.7232758]
-#>  LR test ordering CI:                                     [0.2127897, 0.7949281]
-#>  SWCF ordering p-value:                                                 0.010977
-#>  LR test ordering p-value:                                          0.0001877094
+#>  Sample mean:                                                          0.5389012
+#>  Pseudo Rao-Blackwellized:                                             0.3632916
+#>  Median unbiased (LR test ordering):                                   0.5069941
+#>  Bias reduced MLE (iterations=1):                                      0.5253743
+#>  SWCF ordering CI:                                       [0.06264641, 0.7431231]
+#>  LR test ordering CI:                                       [0.2504091, 0.81829]
+#>  SWCF ordering p-value:                                               0.01097483
+#>  LR test ordering p-value:                                          6.653031e-05
 ```
diff --git a/inst/REFERENCES.bib b/inst/REFERENCES.bib
index 9401871..d1f9f23 100644
--- a/inst/REFERENCES.bib
+++ b/inst/REFERENCES.bib
@@ -19,3 +19,78 @@ @article{pilz2021optimal
 	doi = {10.1002/sim.8953},
 	year = {2021}
 }
+@article{brannath2006estimation,
+author = {Brannath, Werner and König, Franz and Bauer, Peter},
+title = {Estimation in flexible two stage designs},
+journal = {Statistics in Medicine},
+volume = {25},
+number = {19},
+pages = {3366-3381},
+keywords = {adaptive design, flexible confidence interval, invariance principle, maximum likelihood estimate, mean unbiased estimate, median unbiased estimate},
+doi = {10.1002/sim.2258},
+url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.2258},
+eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/sim.2258},
+year = {2006}
+}
+@book{wassmer2016group,
+  title={Group Sequential and Confirmatory Adaptive Designs in Clinical Trials},
+  author={Wassmer, Gernot and Brannath, Werner},
+  year={2016},
+  publisher={Springer},
+  address={Cham, Switzerland},
+  edition={1},
+  doi = {10.1007/978-3-319-32562-0}
+}
+@article{emerson1997computationally,
+title={A computationally simpler algorithm for the UMVUE of a normal mean following a group sequential trial},
+author={Emerson, Scott S and Kittelson, John M},
+journal={Biometrics},
+volume={53},
+number={1},
+pages={365--369},
+year={1997},
+publisher={JSTOR},
+doi={10.2307/2533122}
+}
+@article{whitehead1986bias,
+  title={On the bias of maximum likelihood estimation following a sequential test},
+  author={Whitehead, John},
+  journal={Biometrika},
+  volume={73},
+  number={3},
+  pages={573--581},
+  year={1986},
+  publisher={Oxford University Press},
+  doi={10.2307/2336521}
+}
+@article{emerson1990parameter,
+  title={Parameter estimation following group sequential hypothesis testing},
+  author={Emerson, Scott S and Fleming, Thomas R},
+  journal={Biometrika},
+  volume={77},
+  number={4},
+  pages={875--892},
+  year={1990},
+  publisher={Oxford University Press},
+  doi={10.2307/2337110}
+}
+@book{jennison1999group,
+  title={Group Sequential Methods with Applications to Clinical Trials},
+  author={Jennison, Christopher and Turnbull, Bruce W},
+  year={1999},
+  publisher={Chapman and Hall/CRC.},
+  address = {New York},
+  edition={1},
+  doi={10.1201/9780367805326}
+}
+
+
+
+
+
+
+
+
+
+
+
diff --git a/man/EstimatorScore-class.Rd b/man/EstimatorScore-class.Rd
index 90b370a..1b92746 100644
--- a/man/EstimatorScore-class.Rd
+++ b/man/EstimatorScore-class.Rd
@@ -38,6 +38,9 @@ Centrality(interval = NULL)
 }
 \arguments{
 \item{shrinkage}{shrinkage factor for bump function.}
+
+\item{interval}{confidence interval with respect to which centrality of a point
+estimator should be evaluated.}
 }
 \value{
 an \code{EstimatorScore} object.
@@ -58,7 +61,8 @@ in \code{\link{adestr}}
 are given. To this end,
 let \eqn{\hat{\mu}} denote a point estimator, (\eqn{\hat{l}}, \eqn{\hat{u}})
 an interval estimator, denote the expected value of a random variable
-by \eqn{\mathbb{E}}, and let \eqn{\mu} be the real value of the underlying
+by \eqn{\mathbb{E}}, the probability of an event by \eqn{P},
+and let \eqn{\mu} be the real value of the underlying
 parameter to be estimated.
 \subsection{Scores for point estimators (\code{PointEstimatorScore}):}{
 \itemize{
@@ -66,6 +70,16 @@ parameter to be estimated.
 \item \code{Bias()}: \eqn{\mathbb{E}[\hat{\mu} - \mu]}
 \item \code{Variance()}: \eqn{\mathbb{E}[(\hat{\mu} - \mathbb{E}[\hat{\mu}])^2]}
 \item \code{MSE()}: \eqn{\mathbb{E}[(\hat{\mu} - mu)^2]}
+\item \code{OverestimationProbability()}: \eqn{P(\hat{\mu} > \mu)}
+\item \code{Centrality(interval)}: \eqn{\mathbb{E}[(\hat{\mu} - \hat{l}) + (\hat{\mu} - \hat{u}]}
+}
+}
+
+\subsection{Scores for confidence intervals (\code{IntervalEstimatorScore}):}{
+\itemize{
+\item \code{Coverage()}: \eqn{P(\hat{l} \leq \mu \leq \hat{u})}
+\item \code{Width()}: \eqn{\mathbb{E}[\hat{u} - \hat{l}]}
+\item \code{TestAgreement()}: \eqn{P\left( \left(\{0 < \hat{l} \text{ and } (c_{1, e} < Z_1 \text{ or } c_{2}(Z_1) < Z_2 ) \right) \text{ or } \left(\{\hat{l} \leq 0  \text{ and } ( Z_1 < c_{1, f} \text{ or } Z_2 \leq c_{2}(Z_1))\}\right)\right)}
 }
 }
 }
@@ -80,6 +94,17 @@ evaluate_estimator(
   sigma = 1,
   exact = FALSE
 )
+
+evaluate_estimator(
+  score = Coverage(),
+  estimator = StagewiseCombinationFunctionOrderingCI(),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu = c(0, 0.3, 0.6),
+  sigma = 1,
+  exact = FALSE
+)
+
 }
 \seealso{
 \code{\link{evaluate_estimator}}
diff --git a/man/IntervalEstimator-class.Rd b/man/IntervalEstimator-class.Rd
index a066f9c..b8cc217 100644
--- a/man/IntervalEstimator-class.Rd
+++ b/man/IntervalEstimator-class.Rd
@@ -49,17 +49,45 @@ NaiveCI(two_sided = TRUE)
 \item{mu1}{expected value of the normal distribution under the null hypothesis.}
 }
 \value{
-An object of class \code{IntervalEstimator}.
+an object of class \code{IntervalEstimator}.
 }
 \description{
-Interval estimators
+This is the parent class for all confidence intervals implemented in this package.
+Currently, only confidence intervals for the parameter \eqn{\mu} of a normal distribution
+are implemented. Details about the methods for calculating confidence intervals can be found in
+(our upcoming paper).
+}
+\details{
+The implemented confidence intervals are:
+\itemize{
+\item \code{MLEOrderingCI()}
+\item \code{LikelihoodRatioOrderingCI()}
+\item \code{ScoreTestOrderingCI()}
+\item \code{StagewiseCombinationFunctionOrderingCI()}
+}
+
+These confidence intervals are constructed by specifying an ordering of the sample space
+and finding the value of \eqn{\mu}, such that the observed sample is the
+\eqn{\alpha/2} (or (\eqn{1-\alpha/2})) quantile of the sample space according to the
+chosen ordering.
+Some of the implemented orderings are based on the work presented in
+\insertCite{emerson1990parameter}{adestr},
+\insertCite{@Sections 8.4 in @jennison1999group}{adestr},
+and \insertCite{@Sections 4.1.1 and 8.2.1 in @wassmer2016group}{adestr}.
 }
 \examples{
+# This is the definition of the 'naive' confidence interval for one-armed trials
 IntervalEstimator(
-  two_sided = FALSE,
-  l1 = \(smean1, ...) smean1 - 1,
-  u1 = \(smean1, ...) smean1 + 1,
-  l2 = \(smean2, ...) smean2 - 1,
-  u2 = \(smean2, ...) smean2 + 1,
-  label="My custom p-value")
+  two_sided = TRUE,
+  l1 = \(smean1, n1, sigma, ...) smean1 - qnorm(.95, sd = sigma/sqrt(n1)),
+  u1 = \(smean1, n1, sigma, ...) smean1 + qnorm(.95, sd = sigma/sqrt(n1)),
+  l2 = \(smean1, smean2, n1, n2, sigma, ...) smean2 - qnorm(.95, sd = sigma/sqrt(n1 + n2)),
+  u2 = \(smean1, smean2, n1, n2, sigma, ...) smean2 + qnorm(.95, sd = sigma/sqrt(n1 + n2)),
+  label="My custom CI")
+}
+\references{
+\insertAllCited{}
+}
+\seealso{
+\code{\link{evaluate_estimator}}
 }
diff --git a/man/PValue-class.Rd b/man/PValue-class.Rd
index 154f2a2..6b6f739 100644
--- a/man/PValue-class.Rd
+++ b/man/PValue-class.Rd
@@ -47,11 +47,38 @@ NeymanPearsonOrderingPValue(mu0 = 0, mu1 = 0.4)
 An object of class \code{PValue}.
 }
 \description{
-P-values
+This is the parent class for all p-values implemented in this package.
+Details about the methods for calculating p-values can be found in
+(our upcoming paper).
+}
+\details{
+The implemented p-values are:
+\itemize{
+\item \code{MLEOrderingPValue()}
+\item \code{LikelihoodRatioOrderingPValue()}
+\item \code{ScoreTestOrderingPValue()}
+\item \code{StagewiseCombinationFunctionOrderingPValue()}
+}
+
+The p-values are calculated by specifying an ordering of the sample space
+calculating the probability that a random sample under the null hypothesis is
+larger than the observed sample.
+Some of the implemented orderings are based on the work presented in
+\insertCite{emerson1990parameter}{adestr},
+\insertCite{@Sections 8.4 in @jennison1999group}{adestr},
+and \insertCite{@Sections 4.1.1 and 8.2.1 in @wassmer2016group}{adestr}.
 }
 \examples{
+# This is the definition of a 'naive' p-value based on a Z-test for a one-armed trial
 PValue(
-  g1 = \(smean1, ...) runif(length(smean1)),
-  g2 = \(smean2, ...) runif(length(smean2)),
+  g1 = \(smean1, n1, sigma, ...) pnorm(smean1*sqrt(n1)/sigma, lower.tail=FALSE),
+  g2 = \(smean1, smean2, n1, n2, ...) pnorm((n1 * smean1 + n2 * smean2)/(n1 + n2) *
+                                        sqrt(n1+n2)/sigma, lower.tail=FALSE),
   label="My custom p-value")
 }
+\references{
+\insertAllCited{}
+}
+\seealso{
+\link{plot_p}
+}
diff --git a/man/PointEstimator-class.Rd b/man/PointEstimator-class.Rd
index 706ae07..093c5d5 100644
--- a/man/PointEstimator-class.Rd
+++ b/man/PointEstimator-class.Rd
@@ -69,7 +69,7 @@ MedianUnbiasedNeymanPearsonOrdering(mu0 = 0, mu1 = 0.4)
 
 \item{label}{name of the estimator. Used in printing methods.}
 
-\item{w1}{weight of the first-stage sample mean.}
+\item{w1}{weight of the first-stage data.}
 
 \item{iterations}{number of bias reduction iterations. Defaults to 1.}
 
@@ -78,11 +78,79 @@ MedianUnbiasedNeymanPearsonOrdering(mu0 = 0, mu1 = 0.4)
 \item{mu1}{expected value of the normal distribution under the null hypothesis.}
 }
 \value{
-An object of class \code{PointEstimator}.
+an object of class \code{PointEstimator}.
 }
 \description{
-Point estimators
+This is the parent class for all point estimators implemented in this package.
+Currently, only estimators for the parameter \eqn{\mu} of a normal distribution
+are implemented.
+}
+\details{
+Details about the point estimators can be found in (our upcoming paper).
+\subsection{Sample Mean (\code{SampleMean()})}{
+
+The sample mean is the maximum likelihood estimator for the mean and probably the
+'most straightforward' of the implemented estimators.
+}
+
+\subsection{Fixed weighted sample means (\code{WeightedSampleMean()})}{
+
+The first- and second-stage (if available) sample means are combined via fixed, predefined
+weights. See \insertCite{brannath2006estimation}{adestr} and \insertCite{@Section 8.3.2 in @wassmer2016group}{adestr}.
+}
+
+\subsection{Adaptively weighted sample means (\code{AdaptivelyWeightedSampleMean()})}{
+
+The first- and second-stage (if available) sample means are combined via a combination of
+fixed and adaptively modified
+weights that depend on the standard error.
+See \insertCite{@Section 8.3.4 in @wassmer2016group}{adestr}.
+}
+
+\subsection{Minimizing peak variance in adaptively weighted sample means (\code{MinimizePeakVariance()})}{
+
+For this estimator, the weights of the adaptively weighted sample mean are chosen to
+minimize the variance of the estimator for the value of \eqn{\mu} which maximizes
+the expected sample size.
+}
+
+\subsection{(Pseudo) Rao-Blackwell estimators (\code{RaoBlackwell} and \code{PseudoRaoBlackwell})}{
+
+The conditional expectation of the first-stage sample mean given the overall sample
+mean and the second-stage sample size. See \insertCite{emerson1997computationally}{adestr}.
+}
+
+\subsection{A bias-reduced estimator (\code{BiasReduced()})}{
+
+This estimator is calculated by subtracting an estimate of the bias from the MLE.
+See \insertCite{whitehead1986bias}{adestr}.
+}
+
+\subsection{Median-unbiased estimators}{
+
+The implemented median-unbiased estimators are:
+\itemize{
+\item \code{MedianUnbiasedMLEOrdering()}
+\item \code{MedianUnbiasedLikelihoodRatioOrdering()}
+\item \code{MedianUnbiasedScoreTestOrdering()}
+\item \code{MedianUnbiasedStagewiseCombinationFunctionOrdering()}
+}
+
+These estimators are constructed by specifying an ordering of the sample space
+and finding the value of \eqn{\mu}, such that the observed sample is the
+median of the sample space according to the chosen ordering.
+Some of the implemented orderings are based on the work presented in
+\insertCite{emerson1990parameter}{adestr},
+\insertCite{@Sections 8.4 in @jennison1999group}{adestr},
+and \insertCite{@Sections 4.1.1 and 8.2.1 in @wassmer2016group}{adestr}.
+}
 }
 \examples{
 PointEstimator(g1 = \(smean1, ...) smean1,g2 = \(smean2, ...) smean2, label="My custom estimator")
 }
+\references{
+\insertAllCited{}
+}
+\seealso{
+\code{\link{evaluate_estimator}}
+}
diff --git a/man/adestr.Rd b/man/adestr.Rd
index 6cc5a47..4be71e4 100644
--- a/man/adestr.Rd
+++ b/man/adestr.Rd
@@ -6,26 +6,30 @@
 \alias{adestr-package}
 \title{adestr}
 \description{
-Point estimates, confidence intervals, and p-values for optimal adaptive two-stage designs
+Point estimates, confidence intervals, and p-values for optimal adaptive two-stage designs.
 }
 \details{
-This package implements methods to evaluate the performance characteristics of
-various point and interval estimators for optimal adaptive two-stage designs.
+This package implements methods to \link[adestr:evaluate_estimator]{evaluate the performance characteristics} of
+various \link[adestr:PointEstimator]{point} and \link[adestr:IntervalEstimator]{interval} estimators for optimal adaptive two-stage designs.
 Specifically, this package is written to interface with trial designs created by the \code{\link{adoptr}} package
 \insertCite{kunzmann2021adoptr,pilz2021optimal}{adestr}.
 Apart from the a priori evaluation of performance characteristics, this package also allows for the
-evaluation of the implemented estimators on real datasets, and it implements methods
-to calculate p-values.
+\link[adestr:analyze]{calculation of the values of the estimators} given real datasets, and it implements methods
+to calculate \link[adestr:PValue]{p-values}.
 }
 \references{
 \insertAllCited{}
 }
 \seealso{
-Useful links:
-\itemize{
-  \item \url{https://jan-imbi.github.io/adestr/}
-}
+\code{\link[adestr:evaluate_estimator]{evaluate_estimator}}
+
+\code{\link[adestr:analyze]{analyze}}
+
+\code{\link[adestr:Statistic]{Statistic}} \code{\link[adestr:PointEstimator]{PointEstimator}} \code{\link[adestr:IntervalEstimator]{IntervalEstimator}} \code{\link[adestr:PValue]{PValue}}
+
+\code{\link[adestr:plot,EstimatorScoreResultList-method]{plot}} \code{\link[adestr:plot_p]{plot_p}}
 
+\url{https://jan-imbi.github.io/adestr/}
 }
 \author{
 \strong{Maintainer}: Jan Meis \email{meis@imbi.uni-heidelberg.de} (\href{https://orcid.org/0000-0001-5407-7220}{ORCID})
diff --git a/man/analyze.Rd b/man/analyze.Rd
index 7f16471..c7536c0 100644
--- a/man/analyze.Rd
+++ b/man/analyze.Rd
@@ -28,7 +28,8 @@ analyze(
 \arguments{
 \item{data}{a data.frame containing the data to be analyzed.}
 
-\item{statistics}{a list of objects of class \code{\link{PointEstimator}}, \code{\link{ConfidenceInterval}} or
+\item{statistics}{a list of objects of class \code{\link{PointEstimator}},
+\code{\link{ConfidenceInterval}} or
 \code{\link{PValue}}.}
 
 \item{data_distribution}{object of class \code{Normal} or \code{Student}.}
@@ -52,20 +53,37 @@ The \code{analyze} function can be used calculate the values of a list of
 \link[adestr:ConfidenceInterval]{confidence intervals},
 and \link[adestr:PValue]{p-values} for a given dataset.
 }
+\details{
+Note that in \code{\link{adestr}}, statistics are codes as functions of the
+stage-wise sample means (and stage-wise sample variances if data_distribution is
+\code{\link{Student}}). In a first-step, the data is summarized to produce these
+parameters. Then, the list of statistics are evaluated at the values of these parameters.
+
+The output of the \code{analyze} function also displays information on the hypothesis
+test and the interim decision. If the \code{\link[adestr:Statistic-class]{statistics}} list is empty, this will
+be the only information displayed.
+}
 \examples{
-set.seed(321)
+set.seed(123)
 dat <- data.frame(
-  endpoint = c(rnorm(28, .2, 1), rnorm(28, 0, 1),
-               rnorm(23, .2, 1), rnorm(23, 0, 1)),
-  group = factor(rep(c("ctl", "trt", "ctl", "trt"),
-                     c(28,28,23,23))),
-  stage = rep(c(1L, 2L), c(56, 46))
-)
-analyze(
-  data = dat,
-  statistics = c(get_example_statistics()),
-  data_distribution = Normal(),
-  sigma = 1,
-  design = get_example_design()
+  endpoint = c(rnorm(28, 0.3)),
+  stage = rep(1, 28)
 )
+analyze(data = dat,
+        statistics = list(),
+        data_distribution = Normal(FALSE),
+        design = get_example_design(),
+        sigma = 1)
+
+# The results suggest recruiting 32 patients for the second stage
+dat <- rbind(
+  dat,
+  data.frame(
+    endpoint = rnorm(32, mean = 0.3),
+    stage = rep(2, 32)))
+analyze(data = dat,
+        statistics = get_example_statistics(),
+        data_distribution = Normal(FALSE),
+        design = get_example_design(),
+        sigma = 1)
 }
diff --git a/man/c-EstimatorScoreResult-method.Rd b/man/c-EstimatorScoreResult-method.Rd
new file mode 100644
index 0000000..61e254e
--- /dev/null
+++ b/man/c-EstimatorScoreResult-method.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/evaluate_estimator.R
+\name{c,EstimatorScoreResult-method}
+\alias{c,EstimatorScoreResult-method}
+\title{Combine EstimatoreScoreResult objects into a list}
+\usage{
+\S4method{c}{EstimatorScoreResult}(x, ...)
+}
+\arguments{
+\item{x}{an object of class EstimatorScoreResult.}
+
+\item{...}{additional arguments passed along to the \code{\link{list}} function}
+}
+\value{
+an object of class EstimatoreScoreResultList.
+}
+\description{
+Creates an object of class EstimatoreScoreResultList,
+which is a basically list with the respective
+EstimatoreScoreResult objects.
+}
diff --git a/man/c-EstimatorScoreResultList-method.Rd b/man/c-EstimatorScoreResultList-method.Rd
new file mode 100644
index 0000000..eddfe6f
--- /dev/null
+++ b/man/c-EstimatorScoreResultList-method.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/evaluate_estimator.R
+\name{c,EstimatorScoreResultList-method}
+\alias{c,EstimatorScoreResultList-method}
+\title{Combine EstimatoreScoreResult objects into a list}
+\usage{
+\S4method{c}{EstimatorScoreResultList}(x, ...)
+}
+\arguments{
+\item{x}{an object of class EstimatorScoreResult.}
+
+\item{...}{additional arguments passed along to the \code{\link{list}} function}
+}
+\value{
+an object of class EstimatoreScoreResultList.
+}
+\description{
+Creates an object of class EstimatoreScoreResultList,
+which is a basically list with the respective
+EstimatoreScoreResult objects.
+}
diff --git a/man/evaluate_estimator-methods.Rd b/man/evaluate_estimator-methods.Rd
index d13c02a..de74c77 100644
--- a/man/evaluate_estimator-methods.Rd
+++ b/man/evaluate_estimator-methods.Rd
@@ -328,7 +328,48 @@ with the full sampling distribution in a two-armed trial.}
 \code{EstimatorScoreResult} object containing the performance characteristics of the estimator.
 }
 \description{
-Evaluate performance characteristics of an estimator
+This function evaluates an \code{\link{EstimatorScore}} for a \code{\link{PointEstimator}}
+or and \code{\link{IntervalEstimator}} by integrating over the sampling distribution.
+}
+\details{
+\subsection{General}{
+
+First, a functional representation of the integrand is created by combining information
+from the \code{\link{EstimatorScore}} object (\code{score}) and the \code{\link{PointEstimator}} or
+\code{\link{IntervalEstimator}} object (\code{estimator}).
+The sampling distribution of a design is determined by the \code{\link{TwoStageDesign}} object
+(\code{design}) and the \code{\link{DataDistribution}} object (\code{data_distribution}),
+as well as the assumed parameters \eqn{\mu} (mu) and \eqn{\sigma} (sigma).
+The other parameters control various details of the integration problem.
+}
+
+\subsection{Other parameters}{
+
+For a two-armed \code{data_distribution},
+if \code{use_full_twoarm_sampling_distribution} is \code{TRUE}, the sample means
+for both groups are integrated independently. If \code{use_full_twoarm_sampling_distribution}
+is \code{FALSE}, only the difference in sample means is integrated.
+
+\code{true_parameter} controls which parameters is supposed to be estimated. This
+is usually \code{mu}, but could be set to \code{sigma} if one is interested in
+estimating the standard deviation.
+
+If the parameter \code{exact} is set to \code{FALSE}
+(the default), the continuous version of the second-stage sample-size function \code{\link{n2}}
+is used. Otherwise, an integer valued version of that function will be used,
+though this is considerably slower.
+
+The parameters \code{early_futility_part},
+\code{continuation_part} and \code{early_efficacy_part} control which parts
+of the sample-space should be integrated over (all default to \code{TRUE}).
+They can be used in conjunction with the parameter \code{conditional_integral},
+which enables the calculation of the expected value of performance score conditional
+on reaching any of the selected integration regions.
+
+Lastly, the paramters
+\code{tol}, \code{maxEval}, and \code{absError} control the integration accuracy.
+They are handed down to the \code{\link{hcubature}} function.
+}
 }
 \examples{
 evaluate_estimator(
@@ -340,4 +381,22 @@ evaluate_estimator(
   sigma = 1,
   exact = FALSE
 )
+
+evaluate_estimator(
+  score = Coverage(),
+  estimator = StagewiseCombinationFunctionOrderingCI(),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu = c(0, 0.3, 0.6),
+  sigma = 1,
+  exact = FALSE
+)
+
+}
+\seealso{
+\link{EstimatorScore}
+
+\link{PointEstimator} \link{IntervalEstimator}
+
+\link[adestr:plot,EstimatorScoreResultList-method]{plot}
 }
diff --git a/man/evaluate_estimator.Rd b/man/evaluate_estimator.Rd
index 8ebca3c..cd64200 100644
--- a/man/evaluate_estimator.Rd
+++ b/man/evaluate_estimator.Rd
@@ -63,7 +63,48 @@ with the full sampling distribution in a two-armed trial.}
 \code{EstimatorScoreResult} object containing the performance characteristics of the estimator.
 }
 \description{
-Evaluate performance characteristics of an estimator
+This function evaluates an \code{\link{EstimatorScore}} for a \code{\link{PointEstimator}}
+or and \code{\link{IntervalEstimator}} by integrating over the sampling distribution.
+}
+\details{
+\subsection{General}{
+
+First, a functional representation of the integrand is created by combining information
+from the \code{\link{EstimatorScore}} object (\code{score}) and the \code{\link{PointEstimator}} or
+\code{\link{IntervalEstimator}} object (\code{estimator}).
+The sampling distribution of a design is determined by the \code{\link{TwoStageDesign}} object
+(\code{design}) and the \code{\link{DataDistribution}} object (\code{data_distribution}),
+as well as the assumed parameters \eqn{\mu} (mu) and \eqn{\sigma} (sigma).
+The other parameters control various details of the integration problem.
+}
+
+\subsection{Other parameters}{
+
+For a two-armed \code{data_distribution},
+if \code{use_full_twoarm_sampling_distribution} is \code{TRUE}, the sample means
+for both groups are integrated independently. If \code{use_full_twoarm_sampling_distribution}
+is \code{FALSE}, only the difference in sample means is integrated.
+
+\code{true_parameter} controls which parameters is supposed to be estimated. This
+is usually \code{mu}, but could be set to \code{sigma} if one is interested in
+estimating the standard deviation.
+
+If the parameter \code{exact} is set to \code{FALSE}
+(the default), the continuous version of the second-stage sample-size function \code{\link{n2}}
+is used. Otherwise, an integer valued version of that function will be used,
+though this is considerably slower.
+
+The parameters \code{early_futility_part},
+\code{continuation_part} and \code{early_efficacy_part} control which parts
+of the sample-space should be integrated over (all default to \code{TRUE}).
+They can be used in conjunction with the parameter \code{conditional_integral},
+which enables the calculation of the expected value of performance score conditional
+on reaching any of the selected integration regions.
+
+Lastly, the paramters
+\code{tol}, \code{maxEval}, and \code{absError} control the integration accuracy.
+They are handed down to the \code{\link{hcubature}} function.
+}
 }
 \examples{
 evaluate_estimator(
@@ -75,4 +116,22 @@ evaluate_estimator(
   sigma = 1,
   exact = FALSE
 )
+
+evaluate_estimator(
+  score = Coverage(),
+  estimator = StagewiseCombinationFunctionOrderingCI(),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu = c(0, 0.3, 0.6),
+  sigma = 1,
+  exact = FALSE
+)
+
+}
+\seealso{
+\link{EstimatorScore}
+
+\link{PointEstimator} \link{IntervalEstimator}
+
+\link[adestr:plot,EstimatorScoreResultList-method]{plot}
 }
diff --git a/man/evaluate_scenarios_parallel.Rd b/man/evaluate_scenarios_parallel.Rd
index eeaeb8a..8b30e9b 100644
--- a/man/evaluate_scenarios_parallel.Rd
+++ b/man/evaluate_scenarios_parallel.Rd
@@ -65,7 +65,7 @@ and lists lists of various other design parameters. Each possible combination
 of the elements of the respective sublists is then used to create separate
 scenarios.
 These scenarios are than evaluated independelty of each other,
-allowing for parallelization via the \code{\link{future}} framework. For each scenario,
+allowing for parallelization via the \code{\link[future:future]{future}} framework. For each scenario,
 one call to the \code{\link{evaluate_estimator}} function is made.
 }
 \details{
diff --git a/man/figures/README-unnamed-chunk-4-1.png b/man/figures/README-unnamed-chunk-4-1.png
new file mode 100644
index 0000000..477a873
Binary files /dev/null and b/man/figures/README-unnamed-chunk-4-1.png differ
diff --git a/man/get_example_statistics.Rd b/man/get_example_statistics.Rd
index 08e178c..9ac35df 100644
--- a/man/get_example_statistics.Rd
+++ b/man/get_example_statistics.Rd
@@ -24,19 +24,26 @@ a list of estimators and pvalues.
 Generate a list of estimators and p-values to use in examples
 }
 \examples{
-set.seed(321)
+set.seed(123)
 dat <- data.frame(
-  endpoint = c(rnorm(28, .2, 1), rnorm(28, 0, 1),
-               rnorm(23, .2, 1), rnorm(23, 0, 1)),
-  group = factor(rep(c("ctl", "trt", "ctl", "trt"),
-                     c(28,28,23,23))),
-  stage = rep(c(1L, 2L), c(56, 46))
-)
-analyze(
-  data = dat,
-  statistics = c(get_example_statistics()),
-  data_distribution = Normal(),
-  sigma = 1,
-  design = get_example_design()
+  endpoint = c(rnorm(28, 0.3)),
+  stage = rep(1, 28)
 )
+analyze(data = dat,
+        statistics = list(),
+        data_distribution = Normal(FALSE),
+        design = get_example_design(),
+        sigma = 1)
+
+# The results suggest recruiting 32 patients for the second stage
+dat <- rbind(
+  dat,
+  data.frame(
+    endpoint = rnorm(32, mean = 0.3),
+    stage = rep(2, 32)))
+analyze(data = dat,
+        statistics = get_example_statistics(),
+        data_distribution = Normal(FALSE),
+        design = get_example_design(),
+        sigma = 1)
 }
diff --git a/man/get_statistics_from_paper.Rd b/man/get_statistics_from_paper.Rd
index e0f29b9..e9751c1 100644
--- a/man/get_statistics_from_paper.Rd
+++ b/man/get_statistics_from_paper.Rd
@@ -24,19 +24,26 @@ a list of estimators and pvalues.
 Generate a list of estimators and p-values to use in examples
 }
 \examples{
-set.seed(321)
+set.seed(123)
 dat <- data.frame(
-  endpoint = c(rnorm(28, .2, 1), rnorm(28, 0, 1),
-               rnorm(23, .2, 1), rnorm(23, 0, 1)),
-  group = factor(rep(c("ctl", "trt", "ctl", "trt"),
-                     c(28,28,23,23))),
-  stage = rep(c(1L, 2L), c(56, 46))
-)
-analyze(
-  data = dat,
-  statistics = c(get_example_statistics()),
-  data_distribution = Normal(),
-  sigma = 1,
-  design = get_example_design()
+  endpoint = c(rnorm(28, 0.3)),
+  stage = rep(1, 28)
 )
+analyze(data = dat,
+        statistics = list(),
+        data_distribution = Normal(FALSE),
+        design = get_example_design(),
+        sigma = 1)
+
+# The results suggest recruiting 32 patients for the second stage
+dat <- rbind(
+  dat,
+  data.frame(
+    endpoint = rnorm(32, mean = 0.3),
+    stage = rep(2, 32)))
+analyze(data = dat,
+        statistics = get_example_statistics(),
+        data_distribution = Normal(FALSE),
+        design = get_example_design(),
+        sigma = 1)
 }
diff --git a/man/plot-EstimatorScoreResult-method.Rd b/man/plot-EstimatorScoreResult-method.Rd
new file mode 100644
index 0000000..4ed5517
--- /dev/null
+++ b/man/plot-EstimatorScoreResult-method.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot.R
+\name{plot,EstimatorScoreResult-method}
+\alias{plot,EstimatorScoreResult-method}
+\title{Plot performance scores for point and interval estimators}
+\usage{
+\S4method{plot}{EstimatorScoreResult}(x, y, ...)
+}
+\arguments{
+\item{x}{an output object from evaluate_estimator (\code{EstimatorScoreResult}) or a list
+of such objects (\code{EstimatorScoreResultList}).}
+
+\item{y}{unused.}
+
+\item{...}{additional arguments handed down to ggplot.}
+}
+\value{
+a ggplot2 object visualizing the score values.
+}
+\description{
+This function extract the values of mu and the score values and a facet plot with
+one facet per score. If the input argument is a list, the different estimators
+will be displayed in the same facets, differentiated by color.
+}
+\examples{
+score_result1 <- evaluate_estimator(
+  MSE(),
+  estimator = SampleMean(),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu=seq(-.75, 1.32, 0.03),
+  sigma=1)
+# Plotting the result of evaluate_estimator
+plot(score_result1)
+
+score_result2 <- evaluate_estimator(
+  MSE(),
+  estimator = AdaptivelyWeightedSampleMean(w1 = 0.8),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu=seq(-.75, 1.32, 0.03),
+  sigma=1)
+# Plotting a list of different score results
+plot(c(score_result1, score_result2))
+}
diff --git a/man/plot-EstimatorScoreResultList-method.Rd b/man/plot-EstimatorScoreResultList-method.Rd
index ee06159..00d830b 100644
--- a/man/plot-EstimatorScoreResultList-method.Rd
+++ b/man/plot-EstimatorScoreResultList-method.Rd
@@ -7,10 +7,12 @@
 \S4method{plot}{EstimatorScoreResultList}(x, y, ...)
 }
 \arguments{
-\item{...}{additional arguments handed down to ggplot.}
+\item{x}{an output object from evaluate_estimator (\code{EstimatorScoreResult}) or a list
+of such objects (\code{EstimatorScoreResultList}).}
+
+\item{y}{unused.}
 
-\item{EstimatorScoreResultList}{An output object from evaluate_estimator or a list
-of such objects}
+\item{...}{additional arguments handed down to ggplot.}
 }
 \value{
 a ggplot2 object visualizing the score values.
diff --git a/man/plot-list-method.Rd b/man/plot-list-method.Rd
new file mode 100644
index 0000000..5606a11
--- /dev/null
+++ b/man/plot-list-method.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/plot.R
+\name{plot,list-method}
+\alias{plot,list-method}
+\title{Plot performance scores for point and interval estimators}
+\usage{
+\S4method{plot}{list}(x, y, ...)
+}
+\arguments{
+\item{x}{an output object from evaluate_estimator (\code{EstimatorScoreResult}) or a list
+of such objects (\code{EstimatorScoreResultList}).}
+
+\item{y}{unused.}
+
+\item{...}{additional arguments handed down to ggplot.}
+}
+\value{
+a ggplot2 object visualizing the score values.
+}
+\description{
+This function extract the values of mu and the score values and a facet plot with
+one facet per score. If the input argument is a list, the different estimators
+will be displayed in the same facets, differentiated by color.
+}
+\examples{
+score_result1 <- evaluate_estimator(
+  MSE(),
+  estimator = SampleMean(),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu=seq(-.75, 1.32, 0.03),
+  sigma=1)
+# Plotting the result of evaluate_estimator
+plot(score_result1)
+
+score_result2 <- evaluate_estimator(
+  MSE(),
+  estimator = AdaptivelyWeightedSampleMean(w1 = 0.8),
+  data_distribution = Normal(FALSE),
+  design = get_example_design(),
+  mu=seq(-.75, 1.32, 0.03),
+  sigma=1)
+# Plotting a list of different score results
+plot(c(score_result1, score_result2))
+}
diff --git a/man/plot_p.Rd b/man/plot_p.Rd
index bf328d8..0e1c608 100644
--- a/man/plot_p.Rd
+++ b/man/plot_p.Rd
@@ -52,7 +52,7 @@ The rejection boundary signals the line where
 \examples{
 plot_p(estimator = StagewiseCombinationFunctionOrderingPValue(),
   data_distribution = Normal(FALSE),
-  design = design,
+  design = get_example_design(),
   mu = 0,
   sigma = 1)
 }
diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd
index 3ff7b62..ee7f69e 100644
--- a/vignettes/Introduction.Rmd
+++ b/vignettes/Introduction.Rmd
@@ -66,7 +66,7 @@ You can read more about optimal adaptive designs fitted via the adoptr package h
 [kkmann.github.io/adoptr/articles/adoptr_jss.html](https://kkmann.github.io/adoptr/articles/adoptr_jss.html).
 
 
-# An introductory example
+# Example
 ## Evaluating the mean squared of an estimator
 Now that we have created an optimal adaptive design, we can investigate the performance
 characteristics of various estimators for the mean in that design.
@@ -103,26 +103,13 @@ mse_mle <- evaluate_estimator(
 )
 mse_weighted_sample_means <- evaluate_estimator(
   score = MSE(),
-  estimator = WeightedSampleMean(w1 = .5),
+  estimator = WeightedSampleMean(w1 = .8),
   data_distribution = Normal(two_armed = TRUE),
   design = design,
   mu = seq(-0.75, 1.32, .03),
   sigma = 1
 )
-plot_data <- data.frame(
-  MSE = c(mse_mle@results$MSE, mse_weighted_sample_means@results$MSE),
-  estimator = rep(
-    c(
-      mse_mle@estimator@label,
-      mse_weighted_sample_means@estimator@label
-    ),
-    each = length(mse_mle@results$MSE)
-  ),
-  mu = seq(-0.75, 1.32, .03)
-)
-library(ggplot2)
-ggplot(plot_data, aes(x = mu, y = MSE, color = estimator)) +
-  geom_line()
+plot(c(mse_mle, mse_weighted_sample_means))
 ```
 
 ## Analyzing datasets
@@ -130,16 +117,6 @@ Next, let us look at how to the package can be used to calculate estimates
 after data has been collected.
 
 The first stage data of a trial might look like this:
-set.seed(321)
-dat <- data.frame(
- endpoint = c(rnorm(28, .2, 1), rnorm(28, 0, 1),
-              rnorm(23, .2, 1), rnorm(23, 0, 1)),
- group = factor(rep(c("ctl", "trt", "ctl", "trt"),
-                    c(28,28,23,23))),
- stage = rep(c(1L, 2L), c(56, 46))
-)
-
-
 
 ```{r}
 set.seed(321)
@@ -151,24 +128,16 @@ dat <- data.frame(
 )
 head(dat)
 ```
-The first stage test Z-test statistic is:
 
 ```{r}
-Z <- (mean(dat$endpoint[1:56]) - mean(dat$endpoint[57:112])) * sqrt(56) / sqrt(2 * 1)
-Z
+analyze(data = dat,
+        statistics = list(),
+        data_distribution = Normal(two_armed = TRUE),
+        design = design,
+        sigma = 1)
 ```
+The results suggest recruiting 23 more patients per group for the second stage.
 
-The value of that statistic is inside the continuation region:
-
-```{r}
-c(design@c1f, design@c1e)
-```
-The second stage sample size function of the design can be queried to find the sample size for the
-second batch of patients (per group):
-
-```{r}
-n2(design, Z)
-```
 We will simulate 47 more patients per group:
 
 ```{r}
@@ -189,6 +158,7 @@ analyze(
  statistics = c(
    SampleMean(),
    BiasReduced(),
+   PseudoRaoBlackwell(),
    MedianUnbiasedStagewiseCombinationFunctionOrdering(),
    StagewiseCombinationFunctionOrderingCI(),
    StagewiseCombinationFunctionOrderingPValue()
@@ -205,6 +175,3 @@ Keep in mind that a difference of $\mu=0.3$ was used in the simulation.
 Note that while the median unbiased estimator performs well in this particular example, this
 is not universally true.
 
-# More details on the evaluation of performance characteristics
-
-