diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ef0ff5e0..e55b6017 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,7 +10,7 @@ jobs: fail-fast: false matrix: version: - - '1.6' + - '1.10' - '1' os: - ubuntu-latest diff --git a/Project.toml b/Project.toml index f2910362..fda01335 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,11 @@ name = "TMLE" uuid = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" authors = ["Olivier Labayle"] -version = "0.16.1" +version = "0.17.0" [deps] AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" +AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" @@ -18,6 +19,7 @@ MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" MetaGraphsNext = "fa8bd995-216d-47f1-8a91-f3b68fbeb377" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SplitApplyCombine = "03a91e81-4c3e-53e1-a0a4-9c0c8f19dd66" @@ -46,7 +48,7 @@ JSON = "0.21.4" LogExpFunctions = "0.3" MLJBase = "1.0.1" MLJGLMInterface = "0.3.4" -MLJModels = "0.15, 0.16" +MLJModels = "0.15, 0.16, 0.17" MetaGraphsNext = "0.7" Missings = "1.0" PrecompileTools = "1.1.1" @@ -55,7 +57,9 @@ TableOperations = "1.2" Tables = "1.6" YAML = "0.4.9" Zygote = "0.6.69" -julia = "1.6, 1.7, 1" +OrderedCollections = "1.6.3" +AutoHashEquals = "2.1.0" +julia = "1.10, 1" [extras] JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" diff --git a/docs/src/user_guide/estimands.md b/docs/src/user_guide/estimands.md index 82f5abbd..a393b011 100644 --- a/docs/src/user_guide/estimands.md +++ b/docs/src/user_guide/estimands.md @@ -119,11 +119,7 @@ statisticalΨ = ATE( ) ``` -- Factorial Treatments - -It is possible to generate a `ComposedEstimand` containing all linearly independent IATEs from a set of treatment values or from a dataset. For that purpose, use the `factorialEstimand` function. - -## The Interaction Average Treatment Effect +## The Average Interaction Effect - Causal Question: @@ -136,14 +132,14 @@ For a general higher-order definition, please refer to [Higher-order interaction For two points interaction with both treatment and control levels ``0`` and ``1`` for ease of notation: ```math -IATE_{0 \rightarrow 1, 0 \rightarrow 1}(P) = \mathbb{E}[Y|do(T_1=1, T_2=1)] - \mathbb{E}[Y|do(T_1=1, T_2=0)] \\ +AIE_{0 \rightarrow 1, 0 \rightarrow 1}(P) = \mathbb{E}[Y|do(T_1=1, T_2=1)] - \mathbb{E}[Y|do(T_1=1, T_2=0)] \\ - \mathbb{E}[Y|do(T_1=0, T_2=1)] + \mathbb{E}[Y|do(T_1=0, T_2=0)] ``` - Statistical Estimand (via backdoor adjustment): ```math -IATE_{0 \rightarrow 1, 0 \rightarrow 1}(P) = \mathbb{E}_{\textbf{W}}[\mathbb{E}[Y|T_1=1, T_2=1, \textbf{W}] - \mathbb{E}[Y|T_1=1, T_2=0, \textbf{W}] \\ +AIE_{0 \rightarrow 1, 0 \rightarrow 1}(P) = \mathbb{E}_{\textbf{W}}[\mathbb{E}[Y|T_1=1, T_2=1, \textbf{W}] - \mathbb{E}[Y|T_1=1, T_2=0, \textbf{W}] \\ - \mathbb{E}[Y|T_1=0, T_2=1, \textbf{W}] + \mathbb{E}[Y|T_1=0, T_2=0, \textbf{W}]] ``` @@ -152,7 +148,7 @@ IATE_{0 \rightarrow 1, 0 \rightarrow 1}(P) = \mathbb{E}_{\textbf{W}}[\mathbb{E}[ A causal estimand is given by: ```@example estimands -causalΨ = IATE( +causalΨ = AIE( outcome=:Y, treatment_values=( T₁=(case=1, control=0), @@ -170,7 +166,7 @@ statisticalΨ = identify(causalΨ, scm) or defined directly: ```@example estimands -statisticalΨ = IATE( +statisticalΨ = AIE( outcome=:Y, treatment_values=( T₁=(case=1, control=0), @@ -182,13 +178,11 @@ statisticalΨ = IATE( - Factorial Treatments -It is possible to generate a `ComposedEstimand` containing all linearly independent IATEs from a set of treatment values or from a dataset. For that purpose, use the `factorialEstimand` function. - -## Composed Estimands +It is possible to generate a `JointEstimand` containing all linearly independent AIEs from a set of treatment values or from a dataset. For that purpose, use the `factorialEstimand` function. -As a result of Julia's automatic differentiation facilities, given a set of predefined estimands ``(\Psi_1, ..., \Psi_k)``, we can automatically compute an estimator for $f(\Psi_1, ..., \Psi_k)$. This is done via the `ComposedEstimand` type. +## Joint And Composed Estimands -For example, the difference in ATE for a treatment with 3 levels (0, 1, 2) can be defined as follows: +A `JointEstimand` is simply a list of one dimensional estimands that are grouped together. For instance for a treatment `T` taking three possible values ``(0, 1, 2)`` we can define the two following Average Treatment Effects and a corresponding `JointEstimand`: ```julia ATE₁ = ATE( @@ -201,5 +195,23 @@ ATE₂ = ATE( treatment_values = (T = (control = 1, case = 2),), treatment_confounders = [:W] ) -ATEdiff = ComposedEstimand(-, (ATE₁, ATE₂)) +joint_estimand = JointEstimand(ATE₁, ATE₂) +``` + +You can easily generate joint estimands corresponding to Counterfactual Means, Average Treatment Effects or Average Interaction Effects by using the `factorialEstimand` function. + +To estimate a joint estimand you can use any of the estimators defined in this package exactly as you would do it for a one dimensional estimand. + +There are two main use cases for them that we now describe. + +### Joint Testing + +In some cases, like in factorial analyses where multiple versions of a treatment are tested, it may be of interest to know if any version of the versions has had an effect. This can be done via a Hotelling's T2 Test, which is simply a multivariate generalisation of the Student's T test. This is the default returned by the `significance_test` function provided in TMLE.jl and the result of the test is also printed to the REPL for any joint estimate. + +### Composition + +Once you have estimated a `JointEstimand` and have a `JointEstimate`, you may be interested to ask further questions. For instance whether two treatment versions have the same effect. This question is typically answered by testing if the difference in Average Treatment Effect is 0. Using the Delta Method and Julia's automatic differentiation, you don't need to explicitly define a semi-parametric estimator for it. You can simply call `compose`: + +```julia +ATEdiff = compose(-, joint_estimate) ``` diff --git a/docs/src/user_guide/estimation.md b/docs/src/user_guide/estimation.md index f676f48c..2acb18ed 100644 --- a/docs/src/user_guide/estimation.md +++ b/docs/src/user_guide/estimation.md @@ -110,7 +110,7 @@ Again, required nuisance functions are fitted and stored in the cache. ## Specifying Models -By default, TMLE.jl uses generalized linear models for the estimation of relevant and nuisance factors such as the outcome mean and the propensity score. However, this is not the recommended usage since the estimators' performance is closely related to how well we can estimate these factors. More sophisticated models can be provided using the `models` keyword argument of each estimator which is essentially a `NamedTuple` mapping variables' names to their respective model. +By default, TMLE.jl uses generalized linear models for the estimation of relevant and nuisance factors such as the outcome mean and the propensity score. However, this is not the recommended usage since the estimators' performance is closely related to how well we can estimate these factors. More sophisticated models can be provided using the `models` keyword argument of each estimator which is a `Dict{Symbol, Model}` mapping variables' names to their respective model. Rather than specifying a specific model for each variable it may be easier to override the default models using the `default_models` function: @@ -121,9 +121,9 @@ using MLJXGBoostInterface xgboost_regressor = XGBoostRegressor() xgboost_classifier = XGBoostClassifier() models = default_models( - Q_binary=xgboost_classifier, - Q_continuous=xgboost_regressor, - G=xgboost_classifier + Q_binary = xgboost_classifier, + Q_continuous = xgboost_regressor, + G = xgboost_classifier ) tmle_gboost = TMLEE(models=models) ``` @@ -140,19 +140,18 @@ stack_binary = Stack( lr=lr ) -models = ( - T₁ = with_encoder(xgboost_classifier), # T₁ with XGBoost prepended with a Continuous Encoder - default_models( # For all other variables use the following defaults - Q_binary=stack_binary, # A Super Learner - Q_continuous=xgboost_regressor, # An XGBoost +models = default_models( # For all non-specified variables use the following defaults + Q_binary = stack_binary, # A Super Learner + Q_continuous = xgboost_regressor, # An XGBoost + # T₁ with XGBoost prepended with a Continuous Encoder + T₁ = xgboost_classifier # Unspecified G defaults to Logistic Regression - )... ) tmle_custom = TMLEE(models=models) ``` -Notice that `with_encoder` is simply a shorthand to construct a pipeline with a `ContinuousEncoder` and that the resulting `models` is simply a `NamedTuple`. +Notice that `with_encoder` is simply a shorthand to construct a pipeline with a `ContinuousEncoder` and that the resulting `models` is simply a `Dict`. ## CV-Estimation @@ -196,10 +195,10 @@ result₃ nothing # hide ``` -This time only the model for `Y` is fitted again while reusing the models for `T₁` and `T₂`. Finally, let's see what happens if we estimate the `IATE` between `T₁` and `T₂`. +This time only the model for `Y` is fitted again while reusing the models for `T₁` and `T₂`. Finally, let's see what happens if we estimate the `AIE` between `T₁` and `T₂`. ```@example estimation -Ψ₄ = IATE( +Ψ₄ = AIE( outcome=:Y, treatment_values=( T₁=(case=true, control=false), @@ -218,18 +217,20 @@ nothing # hide All nuisance functions have been reused, only the fluctuation is fitted! -## Composing Estimands +## Joint Estimands and Composition -By leveraging the multivariate Central Limit Theorem and Julia's automatic differentiation facilities, we can estimate any estimand which is a function of already estimated estimands. By default, TMLE.jl will use [Zygote](https://fluxml.ai/Zygote.jl/latest/) but since we are using [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl) you can change the backend to your favorite AD system. +As explained in [Joint And Composed Estimands](@ref), a joint estimand is simply a collection of estimands. Here, we will illustrate that an Average Interaction Effect is also defined as a difference in partial Average Treatment Effects. -For instance, by definition of the ``IATE``, we should be able to retrieve: +More precisely, we would like to see if the left-hand side of this equation is equal to the right-hand side: ```math -IATE_{T_1=0 \rightarrow 1, T_2=0 \rightarrow 1} = ATE_{T_1=0 \rightarrow 1, T_2=0 \rightarrow 1} - ATE_{T_1=0, T_2=0 \rightarrow 1} - ATE_{T_1=0 \rightarrow 1, T_2=0} +AIE_{T_1=0 \rightarrow 1, T_2=0 \rightarrow 1} = ATE_{T_1=0 \rightarrow 1, T_2=0 \rightarrow 1} - ATE_{T_1=0, T_2=0 \rightarrow 1} - ATE_{T_1=0 \rightarrow 1, T_2=0} ``` +For that, we need to define a joint estimand of three components: + ```@example estimation -first_ate = ATE( +ATE₁ = ATE( outcome=:Y, treatment_values=( T₁=(case=true, control=false), @@ -239,9 +240,7 @@ first_ate = ATE( T₂=[:W₂₁, :W₂₂], ), ) -first_ate_result, cache = tmle(first_ate, dataset, cache=cache, verbosity=0); - -second_ate = ATE( +ATE₂ = ATE( outcome=:Y, treatment_values=( T₁=(case=false, control=false), @@ -251,15 +250,27 @@ second_ate = ATE( T₂=[:W₂₁, :W₂₂], ), ) -second_ate_result, cache = tmle(second_ate, dataset, cache=cache, verbosity=0); +joint_estimand = JointEstimand(Ψ₃, ATE₁, ATE₂) +``` -composed_iate_result = compose( - (x, y, z) -> x - y - z, - result₃, first_ate_result, second_ate_result -) +where the interaction `Ψ₃` was defined earlier. This joint estimand can be estimated like any other estimand using our estimator of choice: + +```@example estimation +joint_estimate, cache = tmle(joint_estimand, dataset, cache=cache, verbosity=0); +joint_estimate +``` + +The printed output is the result of a Hotelling's T2 Test which is the multivariate counterpart of the Student's T Test. It tells us whether any of the component of this joint estimand is different from 0. + +Then we can formally test our hypothesis by leveraging the multivariate Central Limit Theorem and Julia's automatic differentiation. + +```@example estimation +composed_result = compose((x, y, z) -> x - y - z, joint_estimate) isapprox( estimate(result₄), - estimate(composed_iate_result), + first(estimate(composed_result)), atol=0.1 ) ``` + +By default, TMLE.jl will use [Zygote](https://fluxml.ai/Zygote.jl/latest/) but since we are using [AbstractDifferentiation.jl](https://github.com/JuliaDiff/AbstractDifferentiation.jl) you can change the backend to your favorite AD system. diff --git a/docs/src/walk_through.md b/docs/src/walk_through.md index 71764c38..224b7e2c 100644 --- a/docs/src/walk_through.md +++ b/docs/src/walk_through.md @@ -108,10 +108,10 @@ marginal_ate_t1 = ATE( ) ``` -- The Interaction Average Treatment Effect: +- The Average Interaction Effect: ```@example walk-through -iate = IATE( +aie = AIE( outcome = :Y, treatment_values = ( T₁=(case=1, control=0), @@ -125,7 +125,7 @@ iate = IATE( Identification is the process by which a Causal Estimand is turned into a Statistical Estimand, that is, a quantity we may estimate from data. This is done via the `identify` function which also takes in the ``SCM``: ```@example walk-through -statistical_iate = identify(iate, scm) +statistical_aie = identify(aie, scm) ``` Alternatively, you can also directly define the statistical parameters (see [Estimands](@ref)). @@ -149,7 +149,7 @@ Statistical Estimands can be estimated without a ``SCM``, let's use the One-Step ```@example walk-through ose = OSE() -result, cache = ose(statistical_iate, dataset) +result, cache = ose(statistical_aie, dataset) result ``` @@ -160,3 +160,5 @@ Both TMLE and OSE asymptotically follow a Normal distribution. It means we can p ```@example walk-through OneSampleTTest(result) ``` + +If the estimate is high-dimensional, a `OneSampleHotellingT2Test` should be performed instead. Alternatively, the `significance_test` function will automatically select the appropriate test for the estimate and return its result. diff --git a/examples/double_robustness.jl b/examples/double_robustness.jl index 64b44997..f9a9bc5a 100644 --- a/examples/double_robustness.jl +++ b/examples/double_robustness.jl @@ -157,9 +157,9 @@ function tmle_inference(data) treatment_values=(Tcat=(case=1.0, control=0.0),), treatment_confounders=(Tcat=[:W],) ) - models = ( - Y = with_encoder(LinearRegressor()), - Tcat = with_encoder(LinearBinaryClassifier()) + models = Dict( + :Y => with_encoder(LinearRegressor()), + :Tcat => with_encoder(LinearBinaryClassifier()) ) tmle = TMLEE(models=models) result, _ = tmle(Ψ, data; verbosity=0) diff --git a/src/TMLE.jl b/src/TMLE.jl index 5510cb5f..b3a4e27d 100644 --- a/src/TMLE.jl +++ b/src/TMLE.jl @@ -20,17 +20,19 @@ using Graphs using MetaGraphsNext using Combinatorics using SplitApplyCombine +using OrderedCollections +using AutoHashEquals # ############################################################################# # EXPORTS # ############################################################################# export SCM, StaticSCM, add_equations!, add_equation!, parents, vertices -export CM, ATE, IATE +export CM, ATE, AIE export AVAILABLE_ESTIMANDS export factorialEstimand, factorialEstimands export TMLEE, OSE, NAIVE -export ComposedEstimand +export JointEstimand, ComposedEstimand export var, estimate, pvalue, confint, emptyIC export significance_test, OneSampleTTest, OneSampleZTest, OneSampleHotellingT2Test export compose @@ -48,8 +50,8 @@ include("utils.jl") include("scm.jl") include("adjustment.jl") include("estimands.jl") -include("estimators.jl") include("estimates.jl") +include("estimators.jl") include("treatment_transformer.jl") include("estimand_ordering.jl") @@ -61,6 +63,6 @@ include("counterfactual_mean_based/clever_covariate.jl") include("counterfactual_mean_based/gradient.jl") include("configuration.jl") - +include("testing.jl") end diff --git a/src/configuration.jl b/src/configuration.jl index 28dc34e3..38987244 100644 --- a/src/configuration.jl +++ b/src/configuration.jl @@ -35,11 +35,11 @@ from_dict!(x) = x from_dict!(v::AbstractVector) = [from_dict!(x) for x in v] """ - from_dict!(d::Dict) + from_dict!(d::AbstractDict) Converts a dictionary to a TMLE struct. """ -function from_dict!(d::Dict{T, Any}) where T +function from_dict!(d::AbstractDict{T, Any}) where T haskey(d, T(:type)) || return Dict(key => from_dict!(val) for (key, val) in d) constructor = eval(Meta.parse(pop!(d, :type))) return constructor(;(key => from_dict!(val) for (key, val) in d)...) diff --git a/src/counterfactual_mean_based/estimands.jl b/src/counterfactual_mean_based/estimands.jl index 884cd2d3..0bc627dd 100644 --- a/src/counterfactual_mean_based/estimands.jl +++ b/src/counterfactual_mean_based/estimands.jl @@ -33,7 +33,7 @@ variables(estimand::CMRelevantFactors) = const ESTIMANDS_DOCS = Dict( :CM => (formula="``CM(Y, T=t) = E[Y|do(T=t)]``",), :ATE => (formula="``ATE(Y, T, case, control) = E[Y|do(T=case)] - E[Y|do(T=control)``",), - :IATE => (formula="``IATE = E[Y|do(T₁=1, T₂=1)] - E[Y|do(T₁=1, T₂=0)] - E[Y|do(T₁=0, T₂=1)] + E[Y|do(T₁=0, T₂=0)]``",) + :AIE => (formula="``AIE = E[Y|do(T₁=1, T₂=1)] - E[Y|do(T₁=1, T₂=0)] - E[Y|do(T₁=0, T₂=1)] + E[Y|do(T₁=0, T₂=0)]``",) ) for (estimand, (formula,)) ∈ ESTIMANDS_DOCS @@ -41,9 +41,9 @@ for (estimand, (formula,)) ∈ ESTIMANDS_DOCS statistical_estimand = Symbol(:Statistical, estimand) ex = quote # Causal Estimand - struct $(causal_estimand) <: Estimand + @auto_hash_equals struct $(causal_estimand) <: Estimand outcome::Symbol - treatment_values::NamedTuple + treatment_values::OrderedDict function $(causal_estimand)(outcome, treatment_values) outcome = Symbol(outcome) @@ -52,17 +52,16 @@ for (estimand, (formula,)) ∈ ESTIMANDS_DOCS end end # Statistical Estimand - struct $(statistical_estimand) <: Estimand + @auto_hash_equals struct $(statistical_estimand) <: Estimand outcome::Symbol - treatment_values::NamedTuple - treatment_confounders::NamedTuple + treatment_values::OrderedDict + treatment_confounders::OrderedDict outcome_extra_covariates::Tuple{Vararg{Symbol}} function $(statistical_estimand)(outcome, treatment_values, treatment_confounders, outcome_extra_covariates) outcome = Symbol(outcome) treatment_values = get_treatment_specs(treatment_values) - treatment_variables = Tuple(keys(treatment_values)) - treatment_confounders = NamedTuple{treatment_variables}([confounders_values(treatment_confounders, T) for T ∈ treatment_variables]) + treatment_confounders = OrderedDict(T => confounders_values(treatment_confounders, T) for T ∈ (keys(treatment_values))) outcome_extra_covariates = unique_sorted_tuple(outcome_extra_covariates) return new(outcome, treatment_values, treatment_confounders, outcome_extra_covariates) end @@ -90,36 +89,39 @@ for (estimand, (formula,)) ∈ ESTIMANDS_DOCS eval(ex) end +const IATE = AIE + CausalCMCompositeEstimands = Union{(eval(Symbol(:Causal, x)) for x in keys(ESTIMANDS_DOCS))...} StatisticalCMCompositeEstimand = Union{(eval(Symbol(:Statistical, x)) for x in keys(ESTIMANDS_DOCS))...} const AVAILABLE_ESTIMANDS = [x[1] for x ∈ ESTIMANDS_DOCS] -indicator_fns(Ψ::StatisticalCM) = Dict(values(Ψ.treatment_values) => 1.) +indicator_fns(Ψ::StatisticalCM) = Dict(Tuple(values(Ψ.treatment_values)) => 1.) function indicator_fns(Ψ::StatisticalATE) case = [] control = [] - for treatment in Ψ.treatment_values + for treatment in values(Ψ.treatment_values) push!(case, treatment.case) push!(control, treatment.control) end return Dict(Tuple(case) => 1., Tuple(control) => -1.) end -ncases(value, Ψ::StatisticalIATE) = sum(value[i] == Ψ.treatment_values[i].case for i in eachindex(value)) +ncases(counterfactual_values, treatments_cases) = sum(counterfactual_values .== treatments_cases) -function indicator_fns(Ψ::StatisticalIATE) - N = length(treatments(Ψ)) +function indicator_fns(Ψ::StatisticalAIE) + N = length(Ψ.treatment_values) key_vals = Pair[] - for cf in Iterators.product((values(Ψ.treatment_values[T]) for T in treatments(Ψ))...) - push!(key_vals, cf => float((-1)^(N - ncases(cf, Ψ)))) + treatments_cases = Tuple(case_control.case for case_control ∈ values(Ψ.treatment_values)) + for cf_values in Iterators.product((values(case_control_nt) for case_control_nt in values(Ψ.treatment_values))...) + push!(key_vals, cf_values => float((-1)^(N - ncases(cf_values, treatments_cases)))) end return Dict(key_vals...) end -outcome_mean(Ψ::StatisticalCMCompositeEstimand) = ExpectedValue(Ψ.outcome, Tuple(union(Ψ.outcome_extra_covariates, keys(Ψ.treatment_confounders), (Ψ.treatment_confounders)...))) +outcome_mean(Ψ::StatisticalCMCompositeEstimand) = ExpectedValue(Ψ.outcome, Tuple(union(Ψ.outcome_extra_covariates, keys(Ψ.treatment_confounders), values(Ψ.treatment_confounders)...))) outcome_mean_key(Ψ::StatisticalCMCompositeEstimand) = variables(outcome_mean(Ψ)) @@ -148,36 +150,27 @@ function Base.show(io::IO, ::MIME"text/plain", Ψ::T) where T <: StatisticalCMCo println(io, param_string) end -function treatment_specs_to_dict(treatment_values::NamedTuple{T, <:Tuple{Vararg{NamedTuple}}}) where T - Dict(key => Dict(pairs(vals)) for (key, vals) in pairs(treatment_values)) -end +case_control_dict(case_control_nt::NamedTuple) = OrderedDict(pairs(case_control_nt)) +case_control_dict(value) = value -treatment_specs_to_dict(treatment_values::NamedTuple) = Dict(pairs(treatment_values)) +treatment_specs_to_dict(treatment_values) = OrderedDict(key => case_control_dict(case_control_nt) for (key, case_control_nt) in treatment_values) treatment_values(d::AbstractDict) = (;d...) treatment_values(d) = d -confounders_values(key_value_iterable::Union{NamedTuple, Dict}, T) = unique_sorted_tuple(key_value_iterable[T]) +confounders_values(key_value_iterable::Union{NamedTuple, AbstractDict}, key) = unique_sorted_tuple(key_value_iterable[key]) -confounders_values(iterable, T) = unique_sorted_tuple(iterable) +confounders_values(iterable, key) = unique_sorted_tuple(iterable) -get_treatment_specs(treatment_specs::NamedTuple{names, }) where names = - NamedTuple{Tuple(sort(collect(names)))}(treatment_specs) +confounders_to_dict(treatment_confounders) = Dict(key => collect(values) for (key, values) in treatment_confounders) -function get_treatment_specs(treatment_specs::NamedTuple{names, <:Tuple{Vararg{NamedTuple}}}) where names - case_control = ((case=v[:case], control=v[:control]) for v in values(treatment_specs)) - treatment_specs = (;zip(keys(treatment_specs), case_control)...) - sorted_names = Tuple(sort(collect(names))) - return NamedTuple{sorted_names}(treatment_specs) -end - -get_treatment_specs(treatment_specs::AbstractDict) = - get_treatment_specs((;(key => treatment_values(val) for (key, val) in treatment_specs)...)) +case_control_to_nt(scalar) = scalar -constructorname(T; prefix="TMLE.Causal") = replace(string(T), prefix => "") +case_control_to_nt(case_control_iter::Union{NamedTuple, AbstractDict}) = (control=case_control_iter[:control], case=case_control_iter[:case]) -treatment_confounders_to_dict(treatment_confounders::NamedTuple) = - Dict(key => collect(vals) for (key, vals) in pairs(treatment_confounders)) +get_treatment_specs(key_value_iterable) = sort(OrderedDict(Symbol(key) => case_control_to_nt(case_control_iter) for (key, case_control_iter) ∈ pairs(key_value_iterable))) + +constructorname(T; prefix="TMLE.Causal") = replace(string(T), prefix => "") """ to_dict(Ψ::T) where T <: CausalCMCompositeEstimands @@ -202,7 +195,7 @@ function to_dict(Ψ::T) where T <: StatisticalCMCompositeEstimand :type => constructorname(T; prefix="TMLE.Statistical"), :outcome => Ψ.outcome, :treatment_values => treatment_specs_to_dict(Ψ.treatment_values), - :treatment_confounders => treatment_confounders_to_dict(Ψ.treatment_confounders), + :treatment_confounders => confounders_to_dict(Ψ.treatment_confounders), :outcome_extra_covariates => collect(Ψ.outcome_extra_covariates) ) end @@ -211,13 +204,10 @@ identify(method, Ψ::StatisticalCMCompositeEstimand, scm) = Ψ function identify(method::BackdoorAdjustment, causal_estimand::T, scm::SCM) where T<:CausalCMCompositeEstimands # Treatment confounders - treatment_names = keys(causal_estimand.treatment_values) + treatment_names = collect(keys(causal_estimand.treatment_values)) treatment_codes = [code_for(scm.graph, treatment) for treatment ∈ treatment_names] confounders_codes = scm.graph.graph.badjlist[treatment_codes] - treatment_confounders = NamedTuple{treatment_names}( - [[scm.graph.vertex_labels[w] for w in confounders_codes[i]] - for i in eachindex(confounders_codes)] - ) + treatment_confounders = Dict(treatment_names[i] => [scm.graph.vertex_labels[w] for w in confounders_codes[i]] for i in eachindex(confounders_codes)) return statistical_type_from_causal_type(T)(; outcome=causal_estimand.outcome, @@ -240,27 +230,98 @@ We ensure that the values are sorted by frequency to maximize the number of estimands passing the positivity constraint. """ unique_treatment_values(dataset, colnames) = - (;(colname => get_treatment_values(dataset, colname) for colname in colnames)...) + sort(OrderedDict(colname => get_treatment_values(dataset, colname) for colname in colnames)) """ Generated from transitive treatment switches to create independent estimands. """ -get_treatment_settings(::Union{typeof(ATE), typeof(IATE)}, treatments_unique_values) = - [collect(zip(vals[1:end-1], vals[2:end])) for vals in values(treatments_unique_values)] +get_treatment_settings(::Union{typeof(ATE), typeof(AIE)}, treatments_unique_values)= + sort(OrderedDict(key => collect(zip(uniquevaluess[1:end-1], uniquevaluess[2:end])) for (key, uniquevaluess) in pairs(treatments_unique_values))) -get_treatment_settings(::typeof(CM), treatments_unique_values) = - values(treatments_unique_values) +get_treatment_settings(::typeof(CM), treatments_unique_values) = sort(OrderedDict(pairs(treatments_unique_values))) get_treatment_setting(combo::Tuple{Vararg{Tuple}}) = [NamedTuple{(:control, :case)}(treatment_control_case) for treatment_control_case ∈ combo] get_treatment_setting(combo) = collect(combo) -FACTORIAL_DOCSTRING = """ -The components of this estimand are generated from the treatment variables contrasts. +""" +If there is no dataset and the treatments_levels are a NamedTuple, then they are assumed correct. +""" +make_or_check_treatment_levels(treatments_levels::Union{AbstractDict, NamedTuple}, dataset::Nothing) = treatments_levels + +""" +If no dataset is provided, then a NamedTuple precising treatment levels is expected +""" +make_or_check_treatment_levels(treatments, dataset::Nothing) = + throw(ArgumentError("No dataset from which to infer treatment levels was provided. Either provide a `dataset` or a NamedTuple `treatments` e.g. (T=[0, 1, 2],)")) + +""" +If a list of treatments is provided as well as a dataset then the treatment_levels are infered from it. +""" +make_or_check_treatment_levels(treatments, dataset) = unique_treatment_values(dataset, treatments) + +""" +If a NamedTuple of treatments_levels is provided as well as a dataset then the treatment_levels are checked from the dataset. +""" +function make_or_check_treatment_levels(treatments_levels::Union{AbstractDict, NamedTuple}, dataset) + for (treatment, treatment_levels) in zip(keys(treatments_levels), values(treatments_levels)) + dataset_treatment_levels = Set(skipmissing(Tables.getcolumn(dataset, treatment))) + missing_levels = setdiff(treatment_levels, dataset_treatment_levels) + length(missing_levels) == 0 || + throw(ArgumentError(string("Not all levels provided for treatment ", treatment, " were found in the dataset: ", missing_levels))) + end + return treatments_levels +end + +function _factorialEstimand( + constructor, + treatments_settings, + outcome; + confounders=nothing, + outcome_extra_covariates=nothing, + freq_table=nothing, + positivity_constraint=nothing, + verbosity=1 + ) + names = keys(treatments_settings) + components = [] + for combo ∈ Iterators.product(values(treatments_settings)...) + Ψ = constructor( + outcome=outcome, + treatment_values=OrderedDict(zip(names, get_treatment_setting(combo))), + treatment_confounders = confounders, + outcome_extra_covariates=outcome_extra_covariates + ) + if satisfies_positivity(Ψ, freq_table; positivity_constraint=positivity_constraint) + push!(components, Ψ) + else + verbosity > 0 && @warn("Sub estimand", Ψ, " did not pass the positivity constraint, skipped.") + end + end + if length(components) == 0 + throw(ArgumentError("No component passed the positivity constraint.")) + end + return JointEstimand(components...) +end + +""" + factorialEstimand( + constructor::Union{typeof(CM), typeof(ATE), typeof(AIE)}, + treatments, outcome; + confounders=nothing, + dataset=nothing, + outcome_extra_covariates=(), + positivity_constraint=nothing, + freq_table=nothing, + verbosity=1 + ) + +Generates a factorial `JointEstimand` with components of type `constructor` (CM, ATE, AIE). + +For the ATE and the AIE, the generated components are restricted to the Cartesian Product of single treatment levels transitions. For example, consider two treatment variables T₁ and T₂ each taking three possible values (0, 1, 2). -For each treatment variable, the marginal transitive contrasts are defined by (0 → 1, 1 → 2). Note that (0 → 2) or (1 → 0) need not -be considered because they are linearly dependent on the other contrasts. Then, the cartesian product of treatment contrasts is taken, -resulting in a 2 x 2 = 4 dimensional joint estimand: +For each treatment variable, the single treatment levels transitions are defined by (0 → 1, 1 → 2). +Then, the Cartesian Product of these transitions is taken, resulting in a 2 x 2 = 4 dimensional joint estimand: - (T₁: 0 → 1, T₂: 0 → 1) - (T₁: 0 → 1, T₂: 1 → 2) @@ -269,120 +330,72 @@ resulting in a 2 x 2 = 4 dimensional joint estimand: # Return -A `ComposedEstimand` with causal or statistical components. +A `JointEstimand` with causal or statistical components. # Args -- `treatments_levels`: A NamedTuple providing the unique levels each treatment variable can take. +- `constructor`: CM, ATE or AIE. +- `treatments`: An AbstractDictionary/NamedTuple of treatment levels (e.g. `(T=(0, 1, 2),)`) or a treatment iterator, then a dataset must be provided to infer the levels from it. - `outcome`: The outcome variable. -- `confounders=nothing`: The generated components will inherit these confounding variables. -If `nothing`, causal estimands are generated. +- `confounders=nothing`: The generated components will inherit these confounding variables. If `nothing`, causal estimands are generated. - `outcome_extra_covariates=()`: The generated components will inherit these `outcome_extra_covariates`. -- `positivity_constraint=nothing`: Only components that pass the positivity constraint are added to the `ComposedEstimand` +- `dataset`: An optional dataset to enforce a positivity constraint and infer treatment levels. +- `positivity_constraint=nothing`: Only components that pass the positivity constraint are added to the `JointEstimand`. A `dataset` must then be provided. +- `freq_table`: This is only to be used by `factorialEstimands` to avoid unecessary computations. - `verbosity=1`: Verbosity level. -""" - -""" - factorialEstimand( - constructor::Union{typeof(ATE), typeof(IATE)}, - treatments_levels::NamedTuple{names}, - outcome; - confounders=nothing, - outcome_extra_covariates=(), - freq_table=nothing, - positivity_constraint=nothing, - verbosity=1 - ) where names - -Generate a `ComposedEstimand` from `treatments_levels`. $FACTORIAL_DOCSTRING # Examples: -Average Treatment Effects: +- An Average Treatment Effect with causal components: ```@example factorialEstimand(ATE, (T₁ = (0, 1), T₂=(0, 1, 2)), :Y₁) ``` +- An Average Interaction Effect with statistical components: + ```@example -factorial(ATE, (T₁ = (0, 1, 2), T₂=(0, 1, 2)), :Y₁, confounders=[:W₁, :W₂]) +factorial(AIE, (T₁ = (0, 1, 2), T₂=(0, 1, 2)), :Y₁, confounders=[:W₁, :W₂]) ``` +- With a dataset, the treatment levels can be infered and a positivity constraint enforced: Interactions: ```@example -factorialEstimand(IATE, (T₁ = (0, 1), T₂=(0, 1, 2)), :Y₁) +factorialEstimand(ATE, [:T₁, :T₂], :Y₁, + confounders=[:W₁, :W₂], + dataset=dataset, + positivity_constraint=0.1 +) ``` -```@example -factorialEstimand(IATE, (T₁ = (0, 1, 2), T₂=(0, 1, 2)), :Y₁, confounders=[:W₁, :W₂]) -""" -function factorialEstimand( - constructor::Union{typeof(CM), typeof(ATE), typeof(IATE)}, - treatments_levels::NamedTuple{names}, - outcome; - confounders=nothing, - outcome_extra_covariates=(), - freq_table=nothing, - positivity_constraint=nothing, - verbosity=1 - ) where names - treatments_settings = get_treatment_settings(constructor, treatments_levels) - components = [] - for combo ∈ Iterators.product(treatments_settings...) - Ψ = constructor( - outcome=outcome, - treatment_values=NamedTuple{names}(get_treatment_setting(combo)), - treatment_confounders = confounders, - outcome_extra_covariates=outcome_extra_covariates - ) - if satisfies_positivity(Ψ, freq_table; positivity_constraint=positivity_constraint) - push!(components, Ψ) - else - verbosity > 0 && @warn("Sub estimand", Ψ, " did not pass the positivity constraint, skipped.") - end - end - return ComposedEstimand(joint_estimand, Tuple(components)) -end - -""" -factorialEstimand( - constructor::Union{typeof(ATE), typeof(IATE)}, - dataset, treatments, outcome; - confounders=nothing, - outcome_extra_covariates=(), - positivity_constraint=nothing, - verbosity=1 - ) - -Identifies `treatment_levels` from `dataset` and construct the -factorialEstimand from it. """ function factorialEstimand( - constructor::Union{typeof(CM), typeof(ATE), typeof(IATE)}, - dataset, treatments, outcome; - confounders=nothing, + constructor::Union{typeof(CM), typeof(ATE), typeof(AIE)}, + treatments, outcome; + confounders=nothing, + dataset=nothing, outcome_extra_covariates=(), positivity_constraint=nothing, + freq_table=nothing, verbosity=1 ) - treatments_levels = unique_treatment_values(dataset, treatments) - freq_table = positivity_constraint !== nothing ? frequency_table(dataset, keys(treatments_levels)) : nothing - return factorialEstimand( - constructor, - treatments_levels, - outcome; - confounders=confounders, - outcome_extra_covariates=outcome_extra_covariates, + treatments_levels = make_or_check_treatment_levels(treatments, dataset) + freq_table = freq_table !== nothing ? freq_table : get_frequency_table(positivity_constraint, dataset, keys(treatments_levels)) + treatments_settings = get_treatment_settings(constructor, treatments_levels) + return _factorialEstimand( + constructor, treatments_settings, outcome; + confounders=confounders, + outcome_extra_covariates=outcome_extra_covariates, freq_table=freq_table, positivity_constraint=positivity_constraint, verbosity=verbosity - ) + ) end """ factorialEstimands( - constructor::Union{typeof(ATE), typeof(IATE)}, + constructor::Union{typeof(ATE), typeof(AIE)}, dataset, treatments, outcomes; confounders=nothing, outcome_extra_covariates=(), @@ -390,31 +403,30 @@ factorialEstimands( verbosity=1 ) -Identifies `treatment_levels` from `dataset` and a factorialEstimand -for each outcome in `outcomes`. +Generates a `JointEstimand` for each outcome in `outcomes`. See `factorialEstimand`. """ function factorialEstimands( - constructor::Union{typeof(CM), typeof(ATE), typeof(IATE)}, - dataset, treatments, outcomes; + constructor::Union{typeof(CM), typeof(ATE), typeof(AIE)}, + treatments, outcomes; + dataset=nothing, confounders=nothing, outcome_extra_covariates=(), positivity_constraint=nothing, verbosity=1 ) + treatments_levels = make_or_check_treatment_levels(treatments, dataset) + freq_table = get_frequency_table(positivity_constraint, dataset, keys(treatments_levels)) + treatments_settings = get_treatment_settings(constructor, treatments_levels) estimands = [] - treatments_levels = unique_treatment_values(dataset, treatments) - freq_table = positivity_constraint !== nothing ? frequency_table(dataset, keys(treatments_levels)) : nothing for outcome in outcomes - Ψ = factorialEstimand( - constructor, - treatments_levels, - outcome; - confounders=confounders, - outcome_extra_covariates=outcome_extra_covariates, + Ψ = _factorialEstimand( + constructor, treatments_settings, outcome; + confounders=confounders, + outcome_extra_covariates=outcome_extra_covariates, freq_table=freq_table, positivity_constraint=positivity_constraint, verbosity=verbosity-1 - ) + ) if length(Ψ.args) > 0 push!(estimands, Ψ) else @@ -427,9 +439,9 @@ function factorialEstimands( return estimands end -joint_levels(Ψ::StatisticalIATE) = Iterators.product(values(Ψ.treatment_values)...) +joint_levels(Ψ::StatisticalAIE) = Iterators.product(values(Ψ.treatment_values)...) joint_levels(Ψ::StatisticalATE) = - (Tuple(Ψ.treatment_values[T][c] for T ∈ keys(Ψ.treatment_values)) for c in (:case, :control)) + (Tuple(Ψ.treatment_values[T][c] for T ∈ keys(Ψ.treatment_values)) for c in (:control, :case)) -joint_levels(Ψ::StatisticalCM) = (values(Ψ.treatment_values),) \ No newline at end of file +joint_levels(Ψ::StatisticalCM) = (Tuple(values(Ψ.treatment_values)),) \ No newline at end of file diff --git a/src/counterfactual_mean_based/estimates.jl b/src/counterfactual_mean_based/estimates.jl index 4f575a4c..c5e7da5c 100644 --- a/src/counterfactual_mean_based/estimates.jl +++ b/src/counterfactual_mean_based/estimates.jl @@ -70,39 +70,9 @@ emptyIC(estimate; pval_threshold=nothing) = emptyIC(estimate, pval_threshold) Retrieves the final estimate: after the TMLE step. """ -Distributions.estimate(est::EICEstimate) = est.estimate +Distributions.estimate(Ψ̂::EICEstimate) = Ψ̂.estimate -""" - var(r::EICEstimate) - -Computes the estimated variance associated with the estimate. -""" -Statistics.var(est::EICEstimate) = - var(est.IC)/size(est.IC, 1) - - -""" - OneSampleZTest(r::EICEstimate, Ψ₀=0) - -Performs a Z test on the EICEstimate. -""" -HypothesisTests.OneSampleZTest(est::EICEstimate, Ψ₀=0) = - OneSampleZTest(est.estimate, est.std, est.n, Ψ₀) - -""" - OneSampleTTest(r::EICEstimate, Ψ₀=0) - -Performs a T test on the EICEstimate. -""" -HypothesisTests.OneSampleTTest(est::EICEstimate, Ψ₀=0) = - OneSampleTTest(est.estimate, est.std, est.n, Ψ₀) - -""" - significance_test(estimate::EICEstimate, Ψ₀=0) - -Performs a TTest -""" -significance_test(estimate::EICEstimate, Ψ₀=0) = OneSampleTTest(estimate, Ψ₀) +Statistics.std(Ψ̂::EICEstimate) = Ψ̂.std -Base.show(io::IO, mime::MIME"text/plain", est::Union{EICEstimate, ComposedEstimate}) = +Base.show(io::IO, mime::MIME"text/plain", est::Union{EICEstimate, JointEstimate, ComposedEstimate}) = show(io, mime, significance_test(est)) \ No newline at end of file diff --git a/src/counterfactual_mean_based/estimators.jl b/src/counterfactual_mean_based/estimators.jl index 2df3944b..d67dd5c7 100644 --- a/src/counterfactual_mean_based/estimators.jl +++ b/src/counterfactual_mean_based/estimators.jl @@ -23,7 +23,7 @@ Base.showerror(io::IO, e::FitFailedError) = print(io, e.msg) struct CMRelevantFactorsEstimator <: Estimator resampling::Union{Nothing, ResamplingStrategy} - models::NamedTuple + models::Dict end CMRelevantFactorsEstimator(;models, resampling=nothing) = @@ -152,7 +152,7 @@ end ##################################################################### mutable struct TMLEE <: Estimator - models::NamedTuple + models::Dict resampling::Union{Nothing, ResamplingStrategy} ps_lowerbound::Union{Float64, Nothing} weighted::Bool @@ -168,7 +168,7 @@ function that can be applied to estimate estimands for a dataset. # Arguments -- models: A NamedTuple{variables}(models) where the `variables` are the outcome variables modeled by the `models`. +- models: A Dict(variable => model, ...) where the `variables` are the outcome variables modeled by the `models`. - resampling: Outer resampling strategy. Setting it to `nothing` (default) falls back to vanilla TMLE while any valid `MLJ.ResamplingStrategy` will result in CV-TMLE. - ps_lowerbound: Lowerbound for the propensity score to avoid division by 0. The special value `nothing` will @@ -237,7 +237,7 @@ gradient_and_estimate(::TMLEE, Ψ, factors, dataset; ps_lowerbound=1e-8) = ##################################################################### mutable struct OSE <: Estimator - models::NamedTuple + models::Dict resampling::Union{Nothing, ResamplingStrategy} ps_lowerbound::Union{Float64, Nothing} machine_cache::Bool @@ -251,7 +251,7 @@ function that can be applied to estimate estimands for a dataset. # Arguments -- models: A NamedTuple{variables}(models) where the `variables` are the outcome variables modeled by the `models`. +- models: A Dict(variable => model, ...) where the `variables` are the outcome variables modeled by the `models`. - resampling: Outer resampling strategy. Setting it to `nothing` (default) falls back to vanilla estimation while any valid `MLJ.ResamplingStrategy` will result in CV-OSE. - ps_lowerbound: Lowerbound for the propensity score to avoid division by 0. The special value `nothing` will @@ -262,7 +262,7 @@ result in a data adaptive definition as described in [here](https://pubmed.ncbi. ```julia using MLJLinearModels -models = (Y = LinearRegressor(), T = LogisticClassifier()) +models = Dict(:Y => LinearRegressor(), :T => LogisticClassifier()) ose = OSE() Ψ̂ₙ, cache = ose(Ψ, dataset) ``` diff --git a/src/estimands.jl b/src/estimands.jl index b41c2afa..f131e54d 100644 --- a/src/estimands.jl +++ b/src/estimands.jl @@ -26,7 +26,7 @@ AbsentLevelError(treatment_name, val, levels) = ArgumentError(string( Checks the case/control values defining the treatment contrast are present in the dataset levels. -Note: This method is for estimands like the ATE or IATE that have case/control treatment settings represented as +Note: This method is for estimands like the ATE or AIE that have case/control treatment settings represented as `NamedTuple`. """ function check_treatment_settings(settings::NamedTuple, levels, treatment_name) @@ -56,10 +56,10 @@ end Makes sure the defined treatment levels are present in the dataset. """ function check_treatment_levels(Ψ::Estimand, dataset) - for treatment_name in treatments(Ψ) - treatment_levels = levels(Tables.getcolumn(dataset, treatment_name)) - treatment_settings = getproperty(Ψ.treatment_values, treatment_name) - check_treatment_settings(treatment_settings, treatment_levels, treatment_name) + for T in treatments(Ψ) + treatment_levels = levels(Tables.getcolumn(dataset, T)) + treatment_settings = Ψ.treatment_values[T] + check_treatment_settings(treatment_settings, treatment_levels, T) end end @@ -103,44 +103,72 @@ a Conditional Distribution because they are estimated in the same way. const ExpectedValue = ConditionalDistribution ##################################################################### -### ComposedEstimand ### +### JointEstimand ### ##################################################################### -struct ComposedEstimand <: Estimand - f::Function +@auto_hash_equals struct JointEstimand <: Estimand args::Tuple + JointEstimand(args...) = new(Tuple(args)) end -ComposedEstimand(f::String, args::AbstractVector) = ComposedEstimand(eval(Meta.parse(f)), Tuple(args)) - -ComposedEstimand(;f, args) = ComposedEstimand(f, args) +JointEstimand(;args) = JointEstimand(args...) -function to_dict(Ψ::ComposedEstimand) - fname = string(nameof(Ψ.f)) - startswith(fname, "#") && - throw(ArgumentError("The function of a ComposedEstimand cannot be anonymous to be converted to a dictionary.")) +function to_dict(Ψ::JointEstimand) return Dict( - :type => string(ComposedEstimand), - :f => fname, + :type => string(JointEstimand), :args => [to_dict(x) for x in Ψ.args] ) end -propensity_score_key(Ψ::ComposedEstimand) = Tuple(unique(Iterators.flatten(propensity_score_key(arg) for arg in Ψ.args))) -outcome_mean_key(Ψ::ComposedEstimand) = Tuple(unique(outcome_mean_key(arg) for arg in Ψ.args)) +propensity_score_key(Ψ::JointEstimand) = Tuple(unique(Iterators.flatten(propensity_score_key(arg) for arg in Ψ.args))) +outcome_mean_key(Ψ::JointEstimand) = Tuple(unique(outcome_mean_key(arg) for arg in Ψ.args)) -n_uniques_nuisance_functions(Ψ::ComposedEstimand) = length(propensity_score_key(Ψ)) + length(outcome_mean_key(Ψ)) +n_uniques_nuisance_functions(Ψ::JointEstimand) = length(propensity_score_key(Ψ)) + length(outcome_mean_key(Ψ)) -nuisance_functions_iterator(Ψ::ComposedEstimand) = +nuisance_functions_iterator(Ψ::JointEstimand) = Iterators.flatten(nuisance_functions_iterator(arg) for arg in Ψ.args) -identify(method::AdjustmentMethod, Ψ::ComposedEstimand, scm) = - ComposedEstimand(Ψ.f, Tuple(identify(method, arg, scm) for arg ∈ Ψ.args)) +identify(method::AdjustmentMethod, Ψ::JointEstimand, scm) = + JointEstimand((identify(method, arg, scm) for arg ∈ Ψ.args)...) -function string_repr(estimand::ComposedEstimand) +function string_repr(estimand::JointEstimand) string( - "Composed Estimand applying function `", estimand.f, "` to: \n", - "-----------------\n- ", + "Joint Estimand:\n", + "--------------\n- ", join((string_repr(arg) for arg in estimand.args), "\n- ") ) end + + +##################################################################### +### Composed Estimand ### +##################################################################### + +@auto_hash_equals struct ComposedEstimand <: Estimand + f::Function + estimand::JointEstimand +end + +ComposedEstimand(;f, estimand) = ComposedEstimand(f, estimand) + +ComposedEstimand(f::String, estimand) = ComposedEstimand(eval(Meta.parse(f)), estimand) + +function to_dict(Ψ::ComposedEstimand) + fname = string(parentmodule(Ψ.f), ".", nameof(Ψ.f)) + occursin("#", fname,) && + throw(ArgumentError("The function of a ComposedEstimand cannot be anonymous to be converted to a dictionary.")) + return Dict( + :type => string(ComposedEstimand), + :f => fname, + :estimand => to_dict(Ψ.estimand) + ) +end + +function string_repr(Ψ::ComposedEstimand) + firstline = string("Composed Estimand applying function `", Ψ.f , "` to :\n") + string( + firstline, + repeat("-", length(firstline)-2), "\n- ", + join((string_repr(arg) for arg in Ψ.estimand.args), "\n- ") + ) +end \ No newline at end of file diff --git a/src/estimates.jl b/src/estimates.jl index 175b5706..95efaa2f 100644 --- a/src/estimates.jl +++ b/src/estimates.jl @@ -148,13 +148,12 @@ function compute_offset(estimate::ConditionalDistributionEstimate, X) end ##################################################################### -### Composed Estimate ### +### Joint Estimate ### ##################################################################### -struct ComposedEstimate{T<:AbstractFloat} <: Estimate - estimand::ComposedEstimand +struct JointEstimate{T<:AbstractFloat} <: Estimate + estimand::JointEstimand estimates::Tuple - estimate::Array{T} cov::Matrix{T} n::Int end @@ -162,84 +161,56 @@ end to_matrix(x::Matrix) = x to_matrix(x) = reduce(hcat, x) -ComposedEstimate(;estimand, estimates, estimate, cov, n) = - ComposedEstimate(estimand, Tuple(estimates), collect(estimate), to_matrix(cov), n) +JointEstimate(;estimand, estimates, cov, n) = + JointEstimate(estimand, Tuple(estimates), to_matrix(cov), n) """ - Distributions.estimate(r::ComposedEstimate) + Distributions.estimate(r::JointEstimate) Retrieves the final estimate: after the TMLE step. """ -Distributions.estimate(est::ComposedEstimate) = - length(est.estimate) == 1 ? est.estimate[1] : est.estimate +Distributions.estimate(Ψ̂::JointEstimate) = [x.estimate for x in Ψ̂.estimates] +Statistics.std(Ψ̂::JointEstimate) = sqrt(only(Ψ̂.cov)) -""" - var(r::ComposedEstimate) - -Computes the estimated variance associated with the estimate. -""" -Statistics.var(est::ComposedEstimate) = - length(est.cov) == 1 ? est.cov[1] / est.n : est.cov ./ est.n - -""" - OneSampleTTest(r::ComposedEstimate, Ψ₀=0) - -Performs a T test on the ComposedEstimate. -""" -function HypothesisTests.OneSampleTTest(estimate::ComposedEstimate, Ψ₀=0) - @assert length(estimate.estimate) == 1 "OneSampleTTest is only implemeted for real-valued statistics." - return OneSampleTTest(estimate.estimate[1], sqrt(estimate.cov[1]), estimate.n, Ψ₀) +function emptyIC(estimate::JointEstimate, pval_threshold) + emptied_estimates = Tuple(emptyIC(e, pval_threshold) for e in estimate.estimates) + JointEstimate(estimate.estimand, emptied_estimates, estimate.cov, estimate.n) end -function HypothesisTests.OneSampleHotellingT2Test(estimate::ComposedEstimate, Ψ₀=zeros(size(estimate.estimate, 1))) - x̄ = estimate.estimate - S = estimate.cov - n, p = estimate.n, length(x̄) - p == length(Ψ₀) || - throw(DimensionMismatch("Number of variables does not match number of means")) - n > 0 || throw(ArgumentError("The input must be non-empty")) - - T² = n * HypothesisTests.At_Binv_A(x̄ .- Ψ₀, S) - F = (n - p) * T² / (p * (n - 1)) - return OneSampleHotellingT2Test(T², F, n, p, Ψ₀, x̄, S) -end +to_dict(estimate::JointEstimate) = Dict( + :type => string(JointEstimate), + :estimand => to_dict(estimate.estimand), + :estimates => [to_dict(e) for e in estimate.estimates], + :cov => estimate.cov, + :n => estimate.n +) -""" - OneSampleZTest(r::ComposedEstimate, Ψ₀=0) -Performs a T test on the ComposedEstimate. -""" -function HypothesisTests.OneSampleZTest(estimate::ComposedEstimate, Ψ₀=0) - @assert length(estimate.estimate) == 1 "OneSampleTTest is only implemeted for real-valued statistics." - return OneSampleZTest(estimate.estimate[1], sqrt(estimate.cov[1]), estimate.n, Ψ₀) +##################################################################### +### Composed Estimate ### +##################################################################### +struct ComposedEstimate{T<:AbstractFloat} <: Estimate + estimand::ComposedEstimand + estimates::Vector{T} + cov::Matrix{T} + n::Int end -""" - significance_test(estimate::ComposedEstimate, Ψ₀=zeros(size(estimate.estimate, 1))) +ComposedEstimate(Ψ, estimate::Real, cov, n) = ComposedEstimate(Ψ, [estimate], cov, n) -Performs a TTest if the estimate is one dimensional and a HotellingT2Test otherwise. -""" -function significance_test(estimate::ComposedEstimate, Ψ₀=zeros(size(estimate.estimate, 1))) - if length(estimate.estimate) == 1 - Ψ₀ = Ψ₀ isa AbstractArray ? first(Ψ₀) : Ψ₀ - return OneSampleTTest(estimate, Ψ₀) - else - return OneSampleHotellingT2Test(estimate, Ψ₀) - end -end +ComposedEstimate(;estimand, estimates, cov, n) = ComposedEstimate(estimand, estimates, cov, n) -function emptyIC(estimate::ComposedEstimate, pval_threshold) - emptied_estimates = Tuple(emptyIC(e, pval_threshold) for e in estimate.estimates) - ComposedEstimate(estimate.estimand, emptied_estimates, estimate.estimate, estimate.cov, estimate.n) -end +Distributions.estimate(Ψ̂::ComposedEstimate) = Ψ̂.estimates +Statistics.std(Ψ̂::ComposedEstimate) = sqrt(only(Ψ̂.cov)) -to_dict(estimate::ComposedEstimate) = Dict( +function to_dict(Ψ̂::ComposedEstimate) + Dict( :type => string(ComposedEstimate), - :estimand => to_dict(estimate.estimand), - :estimates => [to_dict(e) for e in estimate.estimates], - :estimate => estimate.estimate, - :cov => estimate.cov, - :n => estimate.n -) \ No newline at end of file + :estimand => to_dict(Ψ̂.estimand), + :estimates => Ψ̂.estimates, + :cov => Ψ̂.cov, + :n => Ψ̂.n +) +end \ No newline at end of file diff --git a/src/estimators.jl b/src/estimators.jl index a7fb1cbb..c4334929 100644 --- a/src/estimators.jl +++ b/src/estimators.jl @@ -87,21 +87,22 @@ ConditionalDistributionEstimator(train_validation_indices, model) = SampleSplitMLConditionalDistributionEstimator(model, train_validation_indices) ##################################################################### -### ComposedEstimand Estimator ### +### JointEstimand Estimator ### ##################################################################### """ - (estimator::Estimator)(Ψ::ComposedEstimand, dataset; cache=Dict(), verbosity=1) + (estimator::Estimator)(Ψ::JointEstimand, dataset; cache=Dict(), verbosity=1) Estimates all components of Ψ and then Ψ itself. """ -function (estimator::Estimator)(Ψ::ComposedEstimand, dataset; cache=Dict(), verbosity=1, backend=AD.ZygoteBackend()) +function (estimator::Estimator)(Ψ::JointEstimand, dataset; cache=Dict(), verbosity=1) estimates = map(Ψ.args) do estimand estimate, _ = estimator(estimand, dataset; cache=cache, verbosity=verbosity) estimate end - f₀, σ₀, n = _compose(Ψ.f, estimates...; backend=backend) - return ComposedEstimate(Ψ, estimates, f₀, σ₀, n), cache + Σ = covariance_matrix(estimates...) + n = size(first(estimates).IC, 1) + return JointEstimate(Ψ, estimates, Σ, n), cache end """ @@ -157,20 +158,14 @@ f(x, y) = [x^2 - y, y - 3x] compose(f, res₁, res₂) ``` """ -function compose(f, estimates...; backend=AD.ZygoteBackend()) - f₀, σ₀, n = _compose(f, estimates...; backend=backend) - estimand = ComposedEstimand(f, Tuple(e.estimand for e in estimates)) - return ComposedEstimate(estimand, estimates, f₀, σ₀, n) -end - -function _compose(f, estimates...; backend=AD.ZygoteBackend()) - Σ = covariance_matrix(estimates...) - point_estimates = [r.estimate for r in estimates] - f₀, Js = AD.value_and_jacobian(backend, f, point_estimates...) +function compose(f, Ψ̂::JointEstimate; backend=AD.ZygoteBackend()) + point_estimate = estimate(Ψ̂) + Σ = Ψ̂.cov + f₀, Js = AD.value_and_jacobian(backend, f, point_estimate...) J = hcat(Js...) - n = size(first(estimates).IC, 1) σ₀ = J * Σ * J' - return collect(f₀), σ₀, n + estimand = ComposedEstimand(f, Ψ̂.estimand) + return ComposedEstimate(estimand, f₀, σ₀, Ψ̂.n) end function covariance_matrix(estimates...) diff --git a/src/scm.jl b/src/scm.jl index 9bdb393f..97c61b0e 100644 --- a/src/scm.jl +++ b/src/scm.jl @@ -41,7 +41,7 @@ Base.show(io::IO, ::MIME"text/plain", scm::SCM) = println(io, string_repr(scm)) split_outcome_parent_pair(outcome_parents_pair::Pair) = outcome_parents_pair -split_outcome_parent_pair(outcome_parents_pair::Dict{T, Any}) where T = outcome_parents_pair[T(:outcome)], outcome_parents_pair[T(:parents)] +split_outcome_parent_pair(outcome_parents_pair::AbstractDict{T, Any}) where T = outcome_parents_pair[T(:outcome)], outcome_parents_pair[T(:parents)] function add_equations!(scm::SCM, equations...) for outcome_parents_pair in equations diff --git a/src/testing.jl b/src/testing.jl new file mode 100644 index 00000000..66fa8b05 --- /dev/null +++ b/src/testing.jl @@ -0,0 +1,38 @@ +single_dimensional_value(Ψ̂) = only(estimate(Ψ̂)) + +HypothesisTests.OneSampleTTest(Ψ̂, Ψ₀=0) = OneSampleTTest(single_dimensional_value(Ψ̂), std(Ψ̂), Ψ̂.n, Ψ₀) + +HypothesisTests.OneSampleZTest(Ψ̂, Ψ₀=0) = OneSampleZTest(single_dimensional_value(Ψ̂), std(Ψ̂), Ψ̂.n, Ψ₀) + +function HypothesisTests.OneSampleHotellingT2Test(Ψ̂, Ψ₀=zeros(size(Ψ̂.estimates, 1))) + x̄ = estimate(Ψ̂) + S = Ψ̂.cov + n, p = Ψ̂.n, length(x̄) + p == length(Ψ₀) || + throw(DimensionMismatch("Number of variables does not match number of means")) + n > 0 || throw(ArgumentError("The input must be non-empty")) + + T² = n * HypothesisTests.At_Binv_A(x̄ .- Ψ₀, S) + F = (n - p) * T² / (p * (n - 1)) + return OneSampleHotellingT2Test(T², F, n, p, Ψ₀, x̄, S) +end + +""" + significance_test(estimate::EICEstimate, Ψ₀=0) + +Performs a TTest +""" +significance_test(estimate::EICEstimate, Ψ₀=0) = OneSampleTTest(estimate, Ψ₀) + +""" + significance_test(estimate::JointEstimate, Ψ₀=zeros(size(estimate.estimate, 1))) + +Performs a TTest if the estimate is one dimensional and a HotellingT2Test otherwise. +""" +function significance_test(Ψ̂::Union{JointEstimate, ComposedEstimate}, Ψ₀=zeros(size(Ψ̂.estimates, 1))) + if length(Ψ̂.estimates) == 1 + return OneSampleTTest(Ψ̂, only(Ψ₀)) + else + return OneSampleHotellingT2Test(Ψ̂, Ψ₀) + end +end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index c23d9aa1..79434f8b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -82,7 +82,7 @@ end """ default_models(;Q_binary=LinearBinaryClassifier(), Q_continuous=LinearRegressor(), G=LinearBinaryClassifier()) = ( -Create a NamedTuple containing default models to be used by downstream estimators. +Create a Dictionary containing default models to be used by downstream estimators. Each provided model is prepended (in a `MLJ.Pipeline`) with an `MLJ.ContinuousEncoder`. By default: @@ -96,17 +96,18 @@ The following changes the default `Q_binary` to a `LogisticClassifier` and provi ```julia using MLJLinearModels -models = ( - special_y = RidgeRegressor(), - default_models(Q_binary=LogisticClassifier())... +models = default_models( + Q_binary = LogisticClassifier(), + special_y = RidgeRegressor() ) ``` """ -default_models(;Q_binary=LinearBinaryClassifier(), Q_continuous=LinearRegressor(), G=LinearBinaryClassifier()) = ( - Q_binary_default = with_encoder(Q_binary), - Q_continuous_default = with_encoder(Q_continuous), - G_default = with_encoder(G) +default_models(;Q_binary=LinearBinaryClassifier(), Q_continuous=LinearRegressor(), G=LinearBinaryClassifier(), kwargs...) = Dict( + :Q_binary_default => with_encoder(Q_binary), + :Q_continuous_default => with_encoder(Q_continuous), + :G_default => with_encoder(G), + (key => with_encoder(val) for (key, val) in kwargs)... ) is_binary(dataset, columnname) = Set(skipmissing(Tables.getcolumn(dataset, columnname))) == Set([0, 1]) @@ -122,7 +123,16 @@ end satisfies_positivity(Ψ, freq_table::Nothing; positivity_constraint=nothing) = true -function frequency_table(dataset, colnames) +get_frequency_table(positivity_constraint::Nothing, dataset::Nothing, colnames) = nothing + +get_frequency_table(positivity_constraint::Nothing, dataset, colnames) = nothing + +get_frequency_table(positivity_constraint, dataset::Nothing, colnames) = + throw(ArgumentError("A dataset should be provided to enforce a positivity constraint.")) + +get_frequency_table(positivity_constraint, dataset, colnames) = get_frequency_table(dataset, colnames) + +function get_frequency_table(dataset, colnames) iterator = zip((Tables.getcolumn(dataset, colname) for colname in sort(collect(colnames)))...) counts = groupcount(x -> x, iterator) for key in keys(counts) diff --git a/test/Project.toml b/test/Project.toml index e175e2de..8fa84d0a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -11,6 +11,7 @@ MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" diff --git a/test/adjustment.jl b/test/adjustment.jl index 04dac9cd..015ab18b 100644 --- a/test/adjustment.jl +++ b/test/adjustment.jl @@ -13,7 +13,7 @@ using TMLE CM(outcome=:Y₁, treatment_values=(T₁=1,)), ATE(outcome=:Y₁, treatment_values=(T₁=(case=1, control=0),)), ATE(outcome=:Y₁, treatment_values=(T₁=(case=1, control=0), T₂=(case=1, control=0))), - IATE(outcome=:Y₁, treatment_values=(T₁=(case=1, control=0), T₂=(case=1, control=0))), + AIE(outcome=:Y₁, treatment_values=(T₁=(case=1, control=0), T₂=(case=1, control=0))), ] method = BackdoorAdjustment(outcome_extra_covariates=[:C]) statistical_estimands = [identify(method, estimand, scm) for estimand in causal_estimands] @@ -23,10 +23,10 @@ using TMLE @test statistical_estimand.treatment_values == causal_estimand.treatment_values @test statistical_estimand.outcome_extra_covariates == (:C,) end - @test statistical_estimands[1].treatment_confounders == (T₁=(:W₁, :W₂),) - @test statistical_estimands[2].treatment_confounders == (T₁=(:W₁, :W₂),) - @test statistical_estimands[3].treatment_confounders == (T₁=(:W₁, :W₂), T₂=(:W₁, :W₂)) - @test statistical_estimands[4].treatment_confounders == (T₁=(:W₁, :W₂), T₂=(:W₁, :W₂)) + @test statistical_estimands[1].treatment_confounders == Dict(:T₁ => (:W₁, :W₂),) + @test statistical_estimands[2].treatment_confounders == Dict(:T₁ => (:W₁, :W₂),) + @test statistical_estimands[3].treatment_confounders == Dict(:T₁ => (:W₁, :W₂), :T₂ => (:W₁, :W₂)) + @test statistical_estimands[4].treatment_confounders == Dict(:T₁ => (:W₁, :W₂), :T₂ => (:W₁, :W₂)) end @testset "Test TMLE.to_dict" begin diff --git a/test/composition.jl b/test/composition.jl index b9688e71..cbdb90bb 100644 --- a/test/composition.jl +++ b/test/composition.jl @@ -38,63 +38,37 @@ end @test Σ == cov(X) end -@testset "Test to_dict and from_dict! ComposedEstimand" begin - ATE₁ = ATE( - outcome=:Y, - treatment_values = (T=(case=1, control=0),), - treatment_confounders = (T=[:W],) - ) - ATE₂ = ATE( - outcome=:Y, - treatment_values = (T=(case=2, control=1),), - treatment_confounders = (T=[:W],) - ) - diff = ComposedEstimand(-, (ATE₁, ATE₂)) - d = TMLE.to_dict(diff) - diff_from_dict = TMLE.from_dict!(d) - @test diff_from_dict == diff - - # Anonymous function will raise - anonymousdiff = ComposedEstimand((x,y) -> x - y, (ATE₁, ATE₂)) - @test_throws ArgumentError TMLE.to_dict(anonymousdiff) -end @testset "Test composition CM(1) - CM(0) = ATE(1,0)" begin dataset = make_dataset(;n=1000) - # Counterfactual Mean T = 1 + CM₀ = CM( + outcome = :Y, + treatment_values = (T=0,), + treatment_confounders = (T=[:W],) + ) CM₁ = CM( outcome = :Y, treatment_values = (T=1,), treatment_confounders = (T=[:W],) ) - models = ( - Y = with_encoder(LinearRegressor()), - T = LogisticClassifier(lambda=0) + mydiff(x, y) = y - x + + jointestimand = JointEstimand(CM₀, CM₁) + models = Dict( + :Y => with_encoder(LinearRegressor()), + :T => LogisticClassifier(lambda=0) ) tmle = TMLEE(models=models) ose = OSE(models=models) cache = Dict() - CM_tmle_result₁, cache = tmle(CM₁, dataset; cache=cache, verbosity=0) - CM_ose_result₁, cache = ose(CM₁, dataset; cache=cache, verbosity=0) - # Counterfactual Mean T = 0 - CM₀ = CM( - outcome = :Y, - treatment_values = (T=0,), - treatment_confounders = (T=[:W],) - ) - # Estimate Individually - CM_tmle_result₀, cache = tmle(CM₀, dataset; cache=cache, verbosity=0) - CM_ose_result₀, cache = ose(CM₀, dataset; cache=cache, verbosity=0) - # Compose estimates - CM_result_composed_tmle = compose(-, CM_tmle_result₁, CM_tmle_result₀); - CM_result_composed_ose = compose(-, CM_ose_result₁, CM_ose_result₀); - # Estimate via ComposedEstimand - composed_estimand = ComposedEstimand(-, (CM₁, CM₀)) - composed_estimate, cache = tmle(composed_estimand, dataset; cache=cache, verbosity=0) - @test composed_estimate.estimand == CM_result_composed_tmle.estimand - @test CM_result_composed_tmle.estimate == composed_estimate.estimate - @test CM_result_composed_tmle.cov == composed_estimate.cov - @test CM_result_composed_tmle.n == composed_estimate.n + # Via Composition + joint_tmle, cache = tmle(jointestimand, dataset; cache=cache, verbosity=0) + diff_tmle = compose(mydiff, joint_tmle) + @test significance_test(diff_tmle) isa TMLE.OneSampleTTest + joint_ose, cache = ose(jointestimand, dataset; cache=cache, verbosity=0) + diff_ose = compose(mydiff, joint_ose) + @test significance_test(diff_ose) isa TMLE.OneSampleTTest + # Via ATE ATE₁₀ = ATE( outcome = :Y, @@ -102,61 +76,68 @@ end treatment_confounders = (T=[:W],) ) # Check composed TMLE - ATE_tmle_result₁₀, cache = tmle(ATE₁₀, dataset; cache=cache, verbosity=0) - @test estimate(ATE_tmle_result₁₀) ≈ estimate(CM_result_composed_tmle) atol = 1e-7 + ATE_tmle, cache = tmle(ATE₁₀, dataset; cache=cache, verbosity=0) + @test estimate(ATE_tmle) ≈ first(estimate(diff_tmle)) atol = 1e-7 # T Test - composed_confint = collect(confint(OneSampleTTest(CM_result_composed_tmle))) - tmle_confint = collect(confint(OneSampleTTest(ATE_tmle_result₁₀))) - @test tmle_confint ≈ composed_confint atol=1e-4 + diff_confint = collect(confint(OneSampleTTest(diff_tmle))) + ATE_confint = collect(confint(OneSampleTTest(ATE_tmle))) + @test ATE_confint ≈ diff_confint atol=1e-4 # Z Test - composed_confint = collect(confint(OneSampleZTest(CM_result_composed_tmle))) - tmle_confint = collect(confint(OneSampleZTest(ATE_tmle_result₁₀))) - @test tmle_confint ≈ composed_confint atol=1e-4 - # Variance - @test var(ATE_tmle_result₁₀) ≈ var(CM_result_composed_tmle) atol = 1e-3 + diff_confint = collect(confint(OneSampleZTest(diff_tmle))) + ATE_confint = collect(confint(OneSampleZTest(ATE_tmle))) + @test ATE_confint ≈ diff_confint atol=1e-4 # Check composed OSE - ATE_ose_result₁₀, cache = ose(ATE₁₀, dataset; cache=cache, verbosity=0) - @test estimate(ATE_ose_result₁₀) ≈ estimate(CM_result_composed_ose) atol = 1e-7 + ATE_ose, cache = ose(ATE₁₀, dataset; cache=cache, verbosity=0) + @test estimate(ATE_ose) ≈ only(estimate(diff_ose)) atol = 1e-7 # T Test - composed_confint = collect(confint(OneSampleTTest(CM_result_composed_ose))) - ose_confint = collect(confint(OneSampleTTest(ATE_ose_result₁₀))) - @test ose_confint ≈ composed_confint atol=1e-4 + diff_confint = collect(confint(OneSampleTTest(diff_ose))) + ATE_confint = collect(confint(OneSampleTTest(ATE_ose))) + @test ATE_confint ≈ diff_confint atol=1e-4 # Z Test - composed_confint = collect(confint(OneSampleZTest(CM_result_composed_ose))) - ose_confint = collect(confint(OneSampleZTest(ATE_ose_result₁₀))) - @test ose_confint ≈ composed_confint atol=1e-4 - # Variance - @test var(ATE_ose_result₁₀) ≈ var(CM_result_composed_ose) atol = 1e-3 + diff_confint = collect(confint(OneSampleZTest(diff_ose))) + ATE_confint = collect(confint(OneSampleZTest(ATE_ose))) + @test ATE_confint ≈ diff_confint atol=1e-4 end @testset "Test compose multidimensional function" begin dataset = make_dataset(;n=1000) - models = ( - Y = with_encoder(LinearRegressor()), - T = LogisticClassifier(lambda=0) + models = Dict( + :Y => with_encoder(LinearRegressor()), + :T => LogisticClassifier(lambda=0) ) tmle = TMLEE(models=models) cache = Dict() - CM₁ = CM( + joint = JointEstimand( + CM( outcome = :Y, treatment_values = (T=1,), treatment_confounders = (T=[:W],) - ) - CM_result₁, cache = tmle(CM₁, dataset; cache=cache, verbosity=0) - - CM₀ = CM( + ), + CM( outcome = :Y, treatment_values = (T=0,), - treatment_confounders = (T=[:W],) + treatment_confounders = (T=[:W],)) ) - CM_result₀, cache = tmle(CM₀, dataset; cache=cache, verbosity=0) - f(x, y) = [x^2 - y, x/y, 2x + 3y] - CM_result_composed = compose(f, CM_result₁, CM_result₀) - @test estimate(CM_result_composed) == f(estimate(CM_result₁), TMLE.estimate(CM_result₀)) - @test size(var(CM_result_composed)) == (3, 3) + joint_estimate, cache = tmle(joint, dataset; cache=cache, verbosity=0) + + Main.eval(:(f(x, y) = [x^2 - y, 2x + 3y])) + + composed_estimate = compose(Main.f, joint_estimate) + @test significance_test(composed_estimate) isa TMLE.OneSampleHotellingT2Test + @test estimate(composed_estimate) == Main.f(estimate(joint_estimate)...) + @test size(composed_estimate.cov) == (2, 2) + + composed_estimate_dict = TMLE.to_dict(composed_estimate) + println(composed_estimate_dict[:estimand]) + @test composed_estimate_dict isa Dict + composed_estimate_from_dict = TMLE.from_dict!(composed_estimate_dict) + @test composed_estimate_from_dict.estimand == composed_estimate.estimand + @test composed_estimate_from_dict.estimates == composed_estimate.estimates + @test composed_estimate_from_dict.cov == composed_estimate.cov + @test composed_estimate_from_dict.n == composed_estimate.n end @testset "Test Joint Interaction" begin @@ -181,27 +162,28 @@ end T₂ = categorical(T₂), Y = Y ) - IATE₁ = IATE( + AIE₁ = AIE( outcome = :Y, treatment_values = (T₁=(case=2, control=1), T₂=(case=2, control=1)), treatment_confounders = (T₁ = [:W], T₂ = [:W]) ) - IATE₂ = IATE( + AIE₂ = AIE( outcome = :Y, treatment_values = (T₁=(case=3, control=1), T₂=(case=3, control=1)), treatment_confounders = (T₁ = [:W], T₂ = [:W]) ) - IATE₃ = IATE( + AIE₃ = AIE( outcome = :Y, treatment_values = (T₁=(case=3, control=2), T₂=(case=3, control=2)), treatment_confounders = (T₁ = [:W], T₂ = [:W]) ) - jointIATE = ComposedEstimand(TMLE.joint_estimand, (IATE₁, IATE₂, IATE₃)) + jointAIE = JointEstimand(AIE₁, AIE₂, AIE₃) + ose = OSE(models=TMLE.default_models(G=LogisticClassifier(), Q_continuous=LinearRegressor())) - jointEstimate, _ = ose(jointIATE, dataset, verbosity=0) + jointEstimate, _ = ose(jointAIE, dataset, verbosity=0) testres = significance_test(jointEstimate) - @test testres.x̄ ≈ jointEstimate.estimate + @test testres.x̄ ≈ estimate(jointEstimate) @test pvalue(testres) < 1e-10 emptied_estimate = TMLE.emptyIC(jointEstimate) @@ -229,7 +211,7 @@ end @test jointEstimate.estimand == jointEstimate_fromdict.estimand @test jointEstimate.cov == jointEstimate_fromdict.cov - @test jointEstimate.estimate == jointEstimate_fromdict.estimate + @test estimate(jointEstimate) == estimate(jointEstimate_fromdict) @test jointEstimate.n == jointEstimate_fromdict.n @test length(jointEstimate_fromdict.estimates) == 3 @@ -240,7 +222,7 @@ end from_json = TMLE.read_json(filename, use_mmap=false) @test jointEstimate.estimand == from_json.estimand @test jointEstimate.cov == from_json.cov - @test jointEstimate.estimate == from_json.estimate + @test estimate(jointEstimate) == estimate(from_json) @test jointEstimate.n == from_json.n @test length(jointEstimate_fromdict.estimates) == 3 end diff --git a/test/configuration.jl b/test/configuration.jl index fae25097..96b0f016 100644 --- a/test/configuration.jl +++ b/test/configuration.jl @@ -22,7 +22,7 @@ end yamlfilename = mktemp()[1] jsonfilename = mktemp()[1] estimands = [ - IATE( + AIE( outcome=:Y1, treatment_values= ( T1 = (case = 1, control = 0), @@ -81,7 +81,7 @@ end jsonfilename = mktemp()[1] # With a StaticSCM, some Causal estimands and an Adjustment Method estimands = [ - IATE( + AIE( outcome=:Y1, treatment_values= ( T1 = (case = 1, control = 0), @@ -140,7 +140,7 @@ end treatment_values = (T=(case=2, control=1),), treatment_confounders = (T=[:W],) ) - diff = ComposedEstimand(-, (ATE₁, ATE₂)) + diff = JointEstimand(-, (ATE₁, ATE₂)) estimands = [ATE₁, ATE₂, diff] jlsfile = mktemp()[1] serialize(jlsfile, estimands) diff --git a/test/counterfactual_mean_based/3points_interactions.jl b/test/counterfactual_mean_based/3points_interactions.jl index 9f032841..8a12a93c 100644 --- a/test/counterfactual_mean_based/3points_interactions.jl +++ b/test/counterfactual_mean_based/3points_interactions.jl @@ -27,7 +27,7 @@ end @testset "Test 3-points interactions" begin dataset, Ψ₀ = dataset_scm_and_truth(;n=1000) - Ψ = IATE( + Ψ = AIE( outcome = :Y, treatment_values = ( T₁=(case=true, control=false), @@ -36,11 +36,11 @@ end ), treatment_confounders = (T₁=[:W], T₂=[:W], T₃=[:W]) ) - models = ( - Y = with_encoder(InteractionTransformer(order=3) |> LinearRegressor()), - T₁ = LogisticClassifier(lambda=0), - T₂ = LogisticClassifier(lambda=0), - T₃ = LogisticClassifier(lambda=0) + models = Dict( + :Y => with_encoder(InteractionTransformer(order=3) |> LinearRegressor()), + :T₁ => LogisticClassifier(lambda=0), + :T₂ => LogisticClassifier(lambda=0), + :T₃ => LogisticClassifier(lambda=0) ) tmle = TMLEE(models=models, machine_cache=true) diff --git a/test/counterfactual_mean_based/clever_covariate.jl b/test/counterfactual_mean_based/clever_covariate.jl index 2b7e131d..41d9c4fa 100644 --- a/test/counterfactual_mean_based/clever_covariate.jl +++ b/test/counterfactual_mean_based/clever_covariate.jl @@ -53,7 +53,7 @@ end end @testset "Test clever_covariate_and_weights: 2 treatments" begin - Ψ = IATE( + Ψ = AIE( outcome = :Y, treatment_values=( T₁=(case=1, control=0), @@ -90,7 +90,7 @@ end @testset "Test compute_offset, clever_covariate_and_weights: 3 treatments" begin ## Third case: 3 Treatment variables - Ψ = IATE( + Ψ = AIE( outcome =:Y, treatment_values=( T₁=(case="a", control="b"), diff --git a/test/counterfactual_mean_based/double_robustness_ate.jl b/test/counterfactual_mean_based/double_robustness_ate.jl index acc73e4d..c043cb2e 100644 --- a/test/counterfactual_mean_based/double_robustness_ate.jl +++ b/test/counterfactual_mean_based/double_robustness_ate.jl @@ -113,9 +113,9 @@ end ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(MLJModels.DeterministicConstantRegressor()), - T = with_encoder(LogisticClassifier(lambda=0)) + models = Dict( + :Y => with_encoder(MLJModels.DeterministicConstantRegressor()), + :T => with_encoder(LogisticClassifier(lambda=0)) ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -127,14 +127,14 @@ end @test emptyIC(results.tmle, pval_threshold=0.9pval).IC == [] @test emptyIC(results.tmle, pval_threshold=1.1pval) === results.tmle # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result == 0 # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(TreatmentTransformer() |> LinearRegressor()), - T = with_encoder(ConstantClassifier()) + models = Dict( + :Y => with_encoder(TreatmentTransformer() |> LinearRegressor()), + :T => with_encoder(ConstantClassifier()) ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -149,22 +149,22 @@ end treatment_confounders = (T=[:W],) ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(ConstantClassifier()), - T = with_encoder(LogisticClassifier(lambda=0)) + models = Dict( + :Y => with_encoder(ConstantClassifier()), + :T => with_encoder(LogisticClassifier(lambda=0)) ) dr_estimators = double_robust_estimators(models, resampling=StratifiedCV()) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) test_mean_inf_curve_almost_zero(results.tmle; atol=1e-6) test_mean_inf_curve_almost_zero(results.ose; atol=1e-6) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result == 0 # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(LogisticClassifier(lambda=0)), - T = with_encoder(ConstantClassifier()) + models = Dict( + :Y => with_encoder(LogisticClassifier(lambda=0)), + :T => with_encoder(ConstantClassifier()) ) dr_estimators = double_robust_estimators(models, resampling=StratifiedCV()) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -180,9 +180,9 @@ end treatment_confounders = (T=[:W₁, :W₂, :W₃],) ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(MLJModels.DeterministicConstantRegressor()), - T = with_encoder(LogisticClassifier(lambda=0)) + models = Dict( + :Y => with_encoder(MLJModels.DeterministicConstantRegressor()), + :T => with_encoder(LogisticClassifier(lambda=0)) ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -190,14 +190,14 @@ end test_mean_inf_curve_almost_zero(results.tmle; atol=1e-10) test_mean_inf_curve_almost_zero(results.ose; atol=1e-10) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result == 0 # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(LinearRegressor()), - T = with_encoder(ConstantClassifier()) + models = Dict( + :Y => with_encoder(LinearRegressor()), + :T => with_encoder(ConstantClassifier()) ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -219,20 +219,20 @@ end ) ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(MLJModels.DeterministicConstantRegressor()), - T₁ = with_encoder(LogisticClassifier(lambda=0)), - T₂ = with_encoder(LogisticClassifier(lambda=0)) + models = Dict( + :Y => with_encoder(MLJModels.DeterministicConstantRegressor()), + :T₁ => with_encoder(LogisticClassifier(lambda=0)), + :T₂ => with_encoder(LogisticClassifier(lambda=0)) ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, ATE₁₁₋₀₁, dataset; verbosity=0) test_mean_inf_curve_almost_zero(results.tmle; atol=1e-10) test_mean_inf_curve_almost_zero(results.ose; atol=1e-10) # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(LinearRegressor()), - T₁ = with_encoder(ConstantClassifier()), - T₂ = with_encoder(ConstantClassifier()) + models = Dict( + :Y => with_encoder(LinearRegressor()), + :T₁ => with_encoder(ConstantClassifier()), + :T₂ => with_encoder(ConstantClassifier()) ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, ATE₁₁₋₀₁, dataset; verbosity=0) @@ -256,10 +256,10 @@ end test_mean_inf_curve_almost_zero(results.ose; atol=1e-10) # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(MLJModels.DeterministicConstantRegressor()), - T₁ = with_encoder(LogisticClassifier(lambda=0)), - T₂ = with_encoder(LogisticClassifier(lambda=0)), + models = Dict( + :Y => with_encoder(MLJModels.DeterministicConstantRegressor()), + :T₁ => with_encoder(LogisticClassifier(lambda=0)), + :T₂ => with_encoder(LogisticClassifier(lambda=0)), ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, ATE₁₁₋₀₀, dataset; verbosity=0) diff --git a/test/counterfactual_mean_based/double_robustness_iate.jl b/test/counterfactual_mean_based/double_robustness_iate.jl index 9afabba5..823e134b 100644 --- a/test/counterfactual_mean_based/double_robustness_iate.jl +++ b/test/counterfactual_mean_based/double_robustness_iate.jl @@ -39,7 +39,7 @@ function binary_outcome_binary_treatment_pb(;n=100) W = float(W) dataset = (T₁=T₁, T₂=T₂, W₁=W[:, 1], W₂=W[:, 2], W₃=W[:, 3], Y=y) dataset = coerce(dataset, autotype(dataset)) - # Compute the theoretical IATE + # Compute the theoretical AIE Wcomb = [1 1 1; 1 1 0; 1 0 0; @@ -48,16 +48,16 @@ function binary_outcome_binary_treatment_pb(;n=100) 0 0 0; 0 0 1; 0 1 1] - IATE = 0 + AIE = 0 for i in 1:8 w = reshape(Wcomb[i, :], 1, 3) temp = μy_fn(w, [1], [1])[1] temp += μy_fn(w, [0], [0])[1] temp -= μy_fn(w, [1], [0])[1] temp -= μy_fn(w, [0], [1])[1] - IATE += temp*0.5*0.5*0.5 + AIE += temp*0.5*0.5*0.5 end - return dataset, IATE + return dataset, AIE end @@ -94,7 +94,7 @@ function binary_outcome_categorical_treatment_pb(;n=100) μy = μy_fn(W, T, Hmach) y = [rand(rng, Bernoulli(μy[i])) for i in 1:n] dataset = (T₁=T.T₁, T₂=T.T₂, W₁=W[:, 1], W₂=W[:, 2], W₃=W[:, 3], Y=categorical(y)) - # Compute the theoretical IATE for the query + # Compute the theoretical AIE for the query # (CC, AT) against (CG, AA) Wcomb = [1 1 1; 1 1 0; @@ -104,7 +104,7 @@ function binary_outcome_categorical_treatment_pb(;n=100) 0 0 0; 0 0 1; 0 1 1] - IATE = 0 + AIE = 0 levels₁ = levels(T.T₁) levels₂ = levels(T.T₂) for i in 1:8 @@ -113,9 +113,9 @@ function binary_outcome_categorical_treatment_pb(;n=100) temp += μy_fn(w, (T₁=categorical(["CG"], levels=levels₁), T₂=categorical(["AA"], levels=levels₂)), Hmach)[1] temp -= μy_fn(w, (T₁=categorical(["CC"], levels=levels₁), T₂=categorical(["AA"], levels=levels₂)), Hmach)[1] temp -= μy_fn(w, (T₁=categorical(["CG"], levels=levels₁), T₂=categorical(["AT"], levels=levels₂)), Hmach)[1] - IATE += temp*0.5*0.5*0.5 + AIE += temp*0.5*0.5*0.5 end - return dataset, IATE + return dataset, AIE end @@ -151,21 +151,21 @@ function continuous_outcome_binary_treatment_pb(;n=100) 0 0 0; 0 0 1; 0 1 1] - IATE = 0 + AIE = 0 for i in 1:8 w = reshape(Wcomb[i, :], 1, 3) temp = μy_fn(w, [1], [1])[1] temp += μy_fn(w, [0], [0])[1] temp -= μy_fn(w, [1], [0])[1] temp -= μy_fn(w, [0], [1])[1] - IATE += temp*0.5*0.5*0.5 + AIE += temp*0.5*0.5*0.5 end - return dataset, IATE + return dataset, AIE end -@testset "Test Double Robustness IATE on binary_outcome_binary_treatment_pb" begin +@testset "Test Double Robustness AIE on binary_outcome_binary_treatment_pb" begin dataset, Ψ₀ = binary_outcome_binary_treatment_pb(n=10_000) - Ψ = IATE( + Ψ = AIE( outcome=:Y, treatment_values = ( T₁=(case=true, control=false), @@ -177,39 +177,39 @@ end ) ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(ConstantClassifier()), - T₁ = with_encoder(LogisticClassifier(lambda=0)), - T₂ = with_encoder(LogisticClassifier(lambda=0)), + models = Dict( + :Y => with_encoder(ConstantClassifier()), + :T₁ => with_encoder(LogisticClassifier(lambda=0)), + :T₂ => with_encoder(LogisticClassifier(lambda=0)), ) dr_estimators = double_robust_estimators(models, resampling=StratifiedCV()) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) test_mean_inf_curve_almost_zero(results.tmle; atol=1e-9) test_mean_inf_curve_almost_zero(results.ose; atol=1e-9) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result == 0 # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(LogisticClassifier(lambda=0)), - T₁ = with_encoder(ConstantClassifier()), - T₂ = with_encoder(ConstantClassifier()), + models = Dict( + :Y => with_encoder(LogisticClassifier(lambda=0)), + :T₁ => with_encoder(ConstantClassifier()), + :T₂ => with_encoder(ConstantClassifier()), ) dr_estimators = double_robust_estimators(models, resampling=StratifiedCV()) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) test_mean_inf_curve_almost_zero(results.tmle; atol=1e-9) test_mean_inf_curve_almost_zero(results.ose; atol=1e-9) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result ≈ -0.0 atol=1e-1 end -@testset "Test Double Robustness IATE on continuous_outcome_binary_treatment_pb" begin +@testset "Test Double Robustness AIE on continuous_outcome_binary_treatment_pb" begin dataset, Ψ₀ = continuous_outcome_binary_treatment_pb(n=10_000) - Ψ = IATE( + Ψ = AIE( outcome = :Y, treatment_values = ( T₁=(case=true, control=false), @@ -221,10 +221,10 @@ end ) ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(MLJModels.DeterministicConstantRegressor()), - T₁ = with_encoder(LogisticClassifier(lambda=0)), - T₂ = with_encoder(LogisticClassifier(lambda=0)), + models = Dict( + :Y => with_encoder(MLJModels.DeterministicConstantRegressor()), + :T₁ => with_encoder(LogisticClassifier(lambda=0)), + :T₂ => with_encoder(LogisticClassifier(lambda=0)), ) dr_estimators = double_robust_estimators(models) @@ -232,15 +232,15 @@ end test_mean_inf_curve_almost_zero(results.tmle; atol=1e-10) test_mean_inf_curve_almost_zero(results.ose; atol=1e-10) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result == 0 # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(cont_interacter), - T₁ = with_encoder(ConstantClassifier()), - T₂ = with_encoder(ConstantClassifier()), + models = Dict( + :Y => with_encoder(cont_interacter), + :T₁ => with_encoder(ConstantClassifier()), + :T₂ => with_encoder(ConstantClassifier()), ) dr_estimators = double_robust_estimators(models) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -248,9 +248,9 @@ end end -@testset "Test Double Robustness IATE on binary_outcome_categorical_treatment_pb" begin +@testset "Test Double Robustness AIE on binary_outcome_categorical_treatment_pb" begin dataset, Ψ₀ = binary_outcome_categorical_treatment_pb(n=30_000) - Ψ = IATE( + Ψ = AIE( outcome=:Y, treatment_values= ( T₁=(case="CC", control="CG"), @@ -262,10 +262,10 @@ end ) ) # When Q is misspecified but G is well specified - models = ( - Y = with_encoder(ConstantClassifier()), - T₁ = with_encoder(LogisticClassifier(lambda=0)), - T₂ = with_encoder(LogisticClassifier(lambda=0)) + models = Dict( + :Y => with_encoder(ConstantClassifier()), + :T₁ => with_encoder(LogisticClassifier(lambda=0)), + :T₂ => with_encoder(LogisticClassifier(lambda=0)) ) dr_estimators = double_robust_estimators(models, resampling=StratifiedCV()) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -273,15 +273,15 @@ end test_mean_inf_curve_almost_zero(results.ose; atol=1e-10) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result == 0 # When Q is well specified but G is misspecified - models = ( - Y = with_encoder(cat_interacter), - T₁ = with_encoder(ConstantClassifier()), - T₂ = with_encoder(ConstantClassifier()), + models = Dict( + :Y => with_encoder(cat_interacter), + :T₁ => with_encoder(ConstantClassifier()), + :T₂ => with_encoder(ConstantClassifier()), ) dr_estimators = double_robust_estimators(models, resampling=StratifiedCV()) results, cache = test_coverage_and_get_results(dr_estimators, Ψ, Ψ₀, dataset; verbosity=0) @@ -289,7 +289,7 @@ end test_mean_inf_curve_almost_zero(results.ose; atol=1e-10) # The initial estimate is far away - naive = NAIVE(models.Y) + naive = NAIVE(models[:Y]) naive_result, cache = naive(Ψ, dataset; cache=cache, verbosity=0) @test naive_result ≈ -0.02 atol=1e-2 end diff --git a/test/counterfactual_mean_based/estimands.jl b/test/counterfactual_mean_based/estimands.jl index bbabe480..bb78da40 100644 --- a/test/counterfactual_mean_based/estimands.jl +++ b/test/counterfactual_mean_based/estimands.jl @@ -2,6 +2,7 @@ module TestEstimands using Test using TMLE +using OrderedCollections @testset "Test StatisticalCMCompositeEstimand" begin dataset = ( W = [1, 2, 3, 4, 5, 6, 7, 8], @@ -13,8 +14,8 @@ using TMLE # Counterfactual Mean Ψ = CM( outcome=:Y, - treatment_values=(T₁="A", T₂=1), - treatment_confounders=(T₁=[:W], T₂=[:W]) + treatment_values=Dict("T₁" => "A", :T₂ => 1), + treatment_confounders=(T₁=[:W], T₂=["W"]) ) indicator_fns = TMLE.indicator_fns(Ψ) @test indicator_fns == Dict(("A", 1) => 1.) @@ -36,14 +37,14 @@ using TMLE ) indic_values = TMLE.indicator_values(indicator_fns, TMLE.selectcols(dataset, TMLE.treatments(Ψ))) @test indic_values == [0.0, -1.0, 1.0, 0.0, 0.0, -1.0, 1.0, 0.0] - # 2-points IATE - Ψ = IATE( + # 2-points AIE + Ψ = AIE( outcome=:Y, - treatment_values=( - T₁=(case="A", control="B"), - T₂=(case=1, control=0) + treatment_values=Dict( + :T₁ => (case="A", control="B"), + :T₂ => (case=1, control=0) ), - treatment_confounders=(T₁=[:W], T₂=[:W]) + treatment_confounders=Dict(:T₁ => [:W], :T₂ => [:W]) ) indicator_fns = TMLE.indicator_fns(Ψ) @test indicator_fns == Dict( @@ -54,8 +55,8 @@ using TMLE ) indic_values = TMLE.indicator_values(indicator_fns, TMLE.selectcols(dataset, TMLE.treatments(Ψ))) @test indic_values == [-1.0, 1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0] - # 3-points IATE - Ψ = IATE( + # 3-points AIE + Ψ = AIE( outcome=:Y, treatment_values=( T₁=(case="A", control="B"), @@ -92,17 +93,20 @@ end ) Ψ₂ = ATE( outcome=:Y, - treatment_values = (T₁=(case=1, control=0), T₂=(case=0, control=1)), - treatment_confounders = ( - T₁ = [:W₀], - T₂ = [:W₂, :W₁] + treatment_values = Dict("T₁"=>(case=1, control=0), "T₂"=>(case=0, control=1)), + treatment_confounders = Dict( + :T₁ => [:W₀], + :T₂ => [:W₂, :W₁] ), outcome_extra_covariates = [:A, :Z] ) @test Ψ₁ == Ψ₂ @test Ψ₁.outcome == :Y - @test Ψ₁.treatment_values == (T₁=(case=1, control=0), T₂=(case=0, control=1)) - @test Ψ₁.treatment_confounders == (T₁ = (:W₀,), T₂ = (:W₁, :W₂)) + @test Ψ₁.treatment_values == OrderedDict( + :T₁ => (control=0, case=1), + :T₂ => (control=1, case=0) + ) + @test Ψ₁.treatment_confounders == OrderedDict(:T₁ => (:W₀,), :T₂ => (:W₁, :W₂)) @test Ψ₁.outcome_extra_covariates == (:A, :Z) # CM @@ -118,8 +122,8 @@ end ) @test Ψ₁ == Ψ₂ @test Ψ₁.outcome == :Y - @test Ψ₁.treatment_values == (T₁=1, T₂=0) - @test Ψ₁.treatment_confounders == (T₁ = (:W₀,), T₂ = (:W₁, :W₂)) + @test Ψ₁.treatment_values == OrderedDict(:T₁ => 1, :T₂ => 0) + @test Ψ₁.treatment_confounders == OrderedDict(:T₁ => (:W₀,), :T₂ => (:W₁, :W₂)) @test Ψ₁.outcome_extra_covariates == () end @@ -138,7 +142,7 @@ end d = TMLE.to_dict(Ψ) @test d == Dict( :type => "CM", - :treatment_values => Dict(:T₁=>1, :T₂=>"AC"), + :treatment_values => Dict(:T₁ => 1, :T₂ => "AC"), :outcome => :y ) Ψreconstructed = TMLE.from_dict!(d) @@ -146,10 +150,10 @@ end # Causal ATE Ψ = ATE( outcome=:y, - treatment_values=( - T₁=(case="A", control="B"), - T₂=(case=1, control=0), - T₃=(case="C", control="D") + treatment_values=Dict( + "T₁" => (case="A", control="B"), + "T₂" => (case=1, control=0), + "T₃" => (case="C", control="D") ), ) d = TMLE.to_dict(Ψ) @@ -182,8 +186,8 @@ end Ψreconstructed = TMLE.from_dict!(d) @test Ψreconstructed == Ψ - # Statistical IATE - Ψ = IATE( + # Statistical AIE + Ψ = AIE( outcome=:y, treatment_values = (T₁=1, T₂="AC"), treatment_confounders = (T₁=[:W₁₁, :W₁₂], T₂=[:W₁₂, :W₂₂]), @@ -192,7 +196,7 @@ end d = TMLE.to_dict(Ψ) @test d == Dict( :outcome_extra_covariates => [:C], - :type => "IATE", + :type => "AIE", :treatment_values => Dict(:T₁=>1, :T₂=>"AC"), :outcome => :y, :treatment_confounders => Dict(:T₁=>[:W₁₁, :W₁₂], :T₂=>[:W₁₂, :W₂₂]) @@ -203,13 +207,22 @@ end @testset "Test control_case_settings" begin treatments_unique_values = (T₁=(1, 0, 2),) - @test TMLE.get_treatment_settings(ATE, treatments_unique_values) == [[(1, 0), (0, 2)]] - @test TMLE.get_treatment_settings(IATE, treatments_unique_values) == [[(1, 0), (0, 2)]] - @test TMLE.get_treatment_settings(CM, treatments_unique_values) == ((1, 0, 2), ) - treatments_unique_values = (T₁=(1, 0, 2), T₂=["AC", "CC"]) - @test TMLE.get_treatment_settings(ATE, treatments_unique_values) == [[(1, 0), (0, 2)], [("AC", "CC")]] - @test TMLE.get_treatment_settings(IATE, treatments_unique_values) == [[(1, 0), (0, 2)], [("AC", "CC")]] - @test TMLE.get_treatment_settings(CM, treatments_unique_values) == ((1, 0, 2), ["AC", "CC"]) + @test TMLE.get_treatment_settings(ATE, treatments_unique_values) == OrderedDict(:T₁ => [(1, 0), (0, 2)],) + @test TMLE.get_treatment_settings(AIE, treatments_unique_values) == OrderedDict(:T₁ => [(1, 0), (0, 2)],) + @test TMLE.get_treatment_settings(CM, treatments_unique_values) == OrderedDict(:T₁ => (1, 0, 2), ) + treatments_unique_values = Dict(:T₁ => (1, 0, 2), :T₂ => ["AC", "CC"]) + @test TMLE.get_treatment_settings(ATE, treatments_unique_values) == OrderedDict( + :T₁ => [(1, 0), (0, 2)], + :T₂ => [("AC", "CC")] + ) + @test TMLE.get_treatment_settings(AIE, treatments_unique_values) == OrderedDict( + :T₁ => [(1, 0), (0, 2)], + :T₂ => [("AC", "CC")] + ) + @test TMLE.get_treatment_settings(CM, treatments_unique_values) == OrderedDict( + :T₁ => (1, 0, 2), + :T₂ => ["AC", "CC"] + ) end @testset "Test unique_treatment_values" begin @@ -217,10 +230,32 @@ end T₁ = ["AC", missing, "AC", "CC", "CC", "AA", "CC"], T₂ = [1, missing, 1, 2, 2, 3, 2] ) - # most frequent to least frequent - @test TMLE.unique_treatment_values(dataset, (:T₁, :T₂)) == ( - T₁ = ["CC", "AC", "AA"], - T₂ = [2, 1, 3], + # most frequent to least frequent and sorted by keys + infered_values = TMLE.unique_treatment_values(dataset, (:T₂, :T₁)) + @test infered_values == OrderedDict( + :T₁ => ["CC", "AC", "AA"], + :T₂ => [2, 1, 3], + ) + @test collect(keys(infered_values)) == [:T₁, :T₂] +end + +@testset "factorialEstimand errors" begin + dataset = ( + T₁ = [0, 1, 2, missing], + T₂ = ["AC", "CC", missing, "AA"], + ) + # Levels not in dataset + msg = "Not all levels provided for treatment T₁ were found in the dataset: [3]" + @test_throws ArgumentError(msg) factorialEstimand( + CM, (T₁=(0, 3),), :Y₁, + dataset=dataset, + verbosity=0 + ) + # No dataset and no levels + msg = "No dataset from which to infer treatment levels was provided. Either provide a `dataset` or a NamedTuple `treatments` e.g. (T=[0, 1, 2],)" + @test_throws ArgumentError(msg) factorialEstimand( + CM, (:T₁, :T₂), :Y₁, + verbosity=0 ) end @@ -234,20 +269,15 @@ end Y₁ = [1, 2, 3, 4], Y₂ = [1, 2, 3, 4] ) - composedCM = factorialEstimand(CM, dataset, [:T₁], :Y₁, verbosity=0) - @test composedCM == TMLE.ComposedEstimand( - TMLE.joint_estimand, - ( + jointCM = factorialEstimand(CM, [:T₁], :Y₁, dataset=dataset, verbosity=0) + @test jointCM == TMLE.JointEstimand( TMLE.CausalCM(:Y₁, (T₁ = 0,)), TMLE.CausalCM(:Y₁, (T₁ = 1,)), TMLE.CausalCM(:Y₁, (T₁ = 2,)) - ) ) - composedCM = factorialEstimand(CM, dataset, [:T₁, :T₂], :Y₁, verbosity=0) - @test composedCM == TMLE.ComposedEstimand( - TMLE.joint_estimand, - ( + jointCM = factorialEstimand(CM, (:T₁, :T₂), :Y₁, dataset=dataset, verbosity=0) + @test jointCM == TMLE.JointEstimand( TMLE.CausalCM(:Y₁, (T₁ = 0, T₂ = "AC")), TMLE.CausalCM(:Y₁, (T₁ = 1, T₂ = "AC")), TMLE.CausalCM(:Y₁, (T₁ = 2, T₂ = "AC")), @@ -257,7 +287,6 @@ end TMLE.CausalCM(:Y₁, (T₁ = 0, T₂ = "AA")), TMLE.CausalCM(:Y₁, (T₁ = 1, T₂ = "AA")), TMLE.CausalCM(:Y₁, (T₁ = 2, T₂ = "AA")) - ) ) end @@ -272,24 +301,20 @@ end Y₂ = [1, 2, 3, 4] ) # No confounders, 1 treatment, no extra covariate: 3 causal ATEs - composedATE = factorialEstimand(ATE, dataset, [:T₁], :Y₁, verbosity=0) - @test composedATE == ComposedEstimand( - TMLE.joint_estimand, - ( - TMLE.CausalATE(:Y₁, (T₁ = (case = 1, control = 0),)), - TMLE.CausalATE(:Y₁, (T₁ = (case = 2, control = 1),)) - ) + jointATE = factorialEstimand(ATE, [:T₁], :Y₁, dataset=dataset, verbosity=0) + @test jointATE == JointEstimand( + TMLE.CausalATE(:Y₁, (T₁ = (case = 1, control = 0),)), + TMLE.CausalATE(:Y₁, (T₁ = (case = 2, control = 1),)) ) # 2 treatments - composedATE = factorialEstimand(ATE, dataset, [:T₁, :T₂], :Y₁; + jointATE = factorialEstimand(ATE, [:T₁, :T₂], :Y₁; + dataset=dataset, confounders=[:W₁, :W₂], outcome_extra_covariates=[:C], verbosity=0 ) ## 4 expected different treatment settings - @test composedATE == ComposedEstimand( - TMLE.joint_estimand, - ( + @test jointATE == JointEstimand( TMLE.StatisticalATE( outcome = :Y₁, treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = "CC", control = "AC")), @@ -314,19 +339,19 @@ end treatment_confounders = (:W₁, :W₂), outcome_extra_covariates=[:C] ), - ) ) # positivity constraint - composedATE = factorialEstimand(ATE, dataset, [:T₁, :T₂], :Y₁; + jointATE = factorialEstimand(ATE, [:T₁, :T₂], :Y₁; + dataset=dataset, confounders=[:W₁, :W₂], outcome_extra_covariates=[:C], positivity_constraint=0.1, verbosity=0 ) - @test length(composedATE.args) == 1 + @test length(jointATE.args) == 1 end -@testset "Test factorial IATE" begin +@testset "Test factorial AIE" begin dataset = ( T₁ = [0, 1, 2, missing], T₂ = ["AC", "CC", missing, "AA"], @@ -337,72 +362,68 @@ end Y₂ = [1, 2, 3, 4] ) # From dataset - composedIATE = factorialEstimand(IATE, dataset, [:T₁, :T₂], :Y₁, + jointAIE = factorialEstimand(AIE, [:T₁, :T₂], :Y₁; + dataset=dataset, confounders=[:W₁], outcome_extra_covariates=[:C], verbosity=0 ) - @test composedIATE == ComposedEstimand( - TMLE.joint_estimand, - ( - TMLE.StatisticalIATE( + @test jointAIE == JointEstimand( + TMLE.StatisticalAIE( outcome = :Y₁, treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = "CC", control = "AC")), treatment_confounders = (:W₁,), outcome_extra_covariates = (:C,) ), - TMLE.StatisticalIATE( + TMLE.StatisticalAIE( outcome = :Y₁, treatment_values = (T₁ = (case = 2, control = 1), T₂ = (case = "CC", control = "AC")), treatment_confounders = (:W₁,), outcome_extra_covariates = (:C,) ), - TMLE.StatisticalIATE( + TMLE.StatisticalAIE( outcome = :Y₁, treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = "AA", control = "CC")), treatment_confounders = (:W₁,), outcome_extra_covariates = (:C,) ), - TMLE.StatisticalIATE( + TMLE.StatisticalAIE( outcome = :Y₁, treatment_values = (T₁ = (case = 2, control = 1), T₂ = (case = "AA", control = "CC")), treatment_confounders = (:W₁,), outcome_extra_covariates = (:C,) ) - ) ) # From unique values - composedIATE = factorialEstimand(IATE, (T₁ = (0, 1), T₂=(0, 1, 2), T₃=(0, 1, 2)), :Y₁, verbosity=0) - @test composedIATE == ComposedEstimand( - TMLE.joint_estimand, - ( - TMLE.CausalIATE( + jointAIE_from_dict = factorialEstimand(AIE, Dict(:T₁ => (0, 1), :T₂ => (0, 1, 2), :T₃ => (0, 1, 2)), :Y₁, verbosity=0) + jointAIE_from_nt = factorialEstimand(AIE, (T₁ = (0, 1), T₂ = (0, 1, 2), T₃ = (0, 1, 2)), :Y₁, verbosity=0) + @test jointAIE_from_dict == jointAIE_from_nt == JointEstimand( + TMLE.CausalAIE( outcome = :Y₁, - treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = 1, control = 0), T₃ = (case = 1, control = 0)) + treatment_values = (T₁ = (control = 0, case = 1), T₂ = (control = 0, case = 1), T₃ = (control = 0, case = 1)) ), - TMLE.CausalIATE( + TMLE.CausalAIE( outcome = :Y₁, - treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = 2, control = 1), T₃ = (case = 1, control = 0)) + treatment_values = (T₁ = (control = 0, case = 1), T₂ = (control = 1, case = 2), T₃ = (control = 0, case = 1)) ), - TMLE.CausalIATE( + TMLE.CausalAIE( outcome = :Y₁, - treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = 1, control = 0), T₃ = (case = 2, control = 1)) + treatment_values = (T₁ = (control = 0, case = 1), T₂ = (control = 0, case = 1), T₃ = (control = 1, case = 2)) ), - TMLE.CausalIATE( + TMLE.CausalAIE( outcome = :Y₁, - treatment_values = (T₁ = (case = 1, control = 0), T₂ = (case = 2, control = 1), T₃ = (case = 2, control = 1)) + treatment_values = (T₁ = (control = 0, case = 1), T₂ = (control = 1, case = 2), T₃ = (control = 1, case = 2)) ) - ) ) # positivity constraint - composedIATE = factorialEstimand(IATE, dataset, [:T₁, :T₂], :Y₁, + @test_throws ArgumentError("No component passed the positivity constraint.") factorialEstimand(AIE, [:T₁, :T₂], :Y₁; + dataset=dataset, confounders=[:W₁], outcome_extra_covariates=[:C], positivity_constraint=0.1, verbosity=0 ) - @test length(composedIATE.args) == 0 end @testset "Test factorialEstimands" begin @@ -415,7 +436,8 @@ end Y₁ = [1, 2, 3, 4], Y₂ = [1, 2, 3, 4] ) - factorial_ates = factorialEstimands(ATE, dataset, [:T₁, :T₂], [:Y₁, :Y₂], + factorial_ates = factorialEstimands(ATE, [:T₁, :T₂], [:Y₁, :Y₂], + dataset=dataset, confounders=[:W₁, :W₂], outcome_extra_covariates=[:C], positivity_constraint=0.1, @@ -423,13 +445,13 @@ end ) @test length(factorial_ates) == 2 # Nothing passes the threshold - factorial_ates = factorialEstimands(ATE, dataset, [:T₁, :T₂], [:Y₁, :Y₂], + @test_throws ArgumentError("No component passed the positivity constraint.") factorialEstimands(ATE, [:T₁, :T₂], [:Y₁, :Y₂], + dataset=dataset, confounders=[:W₁, :W₂], outcome_extra_covariates=[:C], positivity_constraint=0.3, verbosity=0 ) - @test length(factorial_ates) == 0 end end diff --git a/test/counterfactual_mean_based/estimators_and_estimates.jl b/test/counterfactual_mean_based/estimators_and_estimates.jl index 4e4e6858..8154f4c8 100644 --- a/test/counterfactual_mean_based/estimators_and_estimates.jl +++ b/test/counterfactual_mean_based/estimators_and_estimates.jl @@ -30,9 +30,9 @@ end G = (TMLE.ConditionalDistribution(:T₁, [:W]),) η = TMLE.CMRelevantFactors(outcome_mean=Q, propensity_score=G) # Estimator - models = ( - Y = with_encoder(LinearRegressor()), - T₁ = LogisticClassifier() + models = Dict( + :Y => with_encoder(LinearRegressor()), + :T₁ => LogisticClassifier() ) η̂ = TMLE.CMRelevantFactorsEstimator(models=models) # Estimate @@ -50,18 +50,18 @@ end @test fitted_params(η̂ₙ.propensity_score[1].machine) isa NamedTuple # Both models unchanged, η̂ₙ is fully reused - new_models = ( - Y = with_encoder(LinearRegressor()), - T₁ = LogisticClassifier() + new_models = Dict( + :Y => with_encoder(LinearRegressor()), + :T₁ => LogisticClassifier() ) new_η̂ = TMLE.CMRelevantFactorsEstimator(models=new_models) @test TMLE.key(η, new_η̂) == TMLE.key(η, η̂) full_reuse_log = (:info, TMLE.reuse_string(η)) @test_logs full_reuse_log new_η̂(η, dataset; cache=cache, verbosity=1) # Changing one model, only the other one is refitted - new_models = ( - Y = with_encoder(LinearRegressor()), - T₁ = LogisticClassifier(fit_intercept=false) + new_models = Dict( + :Y => with_encoder(LinearRegressor()), + :T₁ => LogisticClassifier(fit_intercept=false) ) new_η̂ = TMLE.CMRelevantFactorsEstimator(models=new_models) @test TMLE.key(η, new_η̂) != TMLE.key(η, η̂) @@ -88,9 +88,9 @@ end G = (TMLE.ConditionalDistribution(:T₁, [:W]),) η = TMLE.CMRelevantFactors(outcome_mean=Q, propensity_score=G) # Propensity score model is ill-defined - models = ( - Y = with_encoder(LinearRegressor()), - T₁ = LinearRegressor() + models = Dict( + :Y => with_encoder(LinearRegressor()), + :T₁ => LinearRegressor() ) η̂ = TMLE.CMRelevantFactorsEstimator(models=models) try @@ -102,9 +102,9 @@ end @test e.msg == TMLE.propensity_score_fit_error_msg(G[1]) end # Outcome Mean model is ill-defined - models = ( - Y = LogisticClassifier(), - T₁ = LogisticClassifier(fit_intercept=false) + models = Dict( + :Y => LogisticClassifier(), + :T₁ => LogisticClassifier(fit_intercept=false) ) η̂ = TMLE.CMRelevantFactorsEstimator(models=models) try diff --git a/test/counterfactual_mean_based/fluctuation.jl b/test/counterfactual_mean_based/fluctuation.jl index 6f263c33..e7ca4255 100644 --- a/test/counterfactual_mean_based/fluctuation.jl +++ b/test/counterfactual_mean_based/fluctuation.jl @@ -25,7 +25,10 @@ using DataFrames ) η̂ = TMLE.CMRelevantFactorsEstimator( nothing, - (Y=with_encoder(ConstantRegressor()), T = ConstantClassifier()) + Dict( + :Y => with_encoder(ConstantRegressor()), + :T => ConstantClassifier() + ) ) η̂ₙ = η̂(η, dataset, verbosity = 0) X = dataset[!, collect(η̂ₙ.outcome_mean.estimand.parents)] @@ -66,7 +69,7 @@ using DataFrames end @testset "Test Fluctuation with 2 Treatments" begin - Ψ = IATE( + Ψ = AIE( outcome =:Y, treatment_values=( T₁=(case=1, control=0), @@ -94,10 +97,10 @@ end ) η̂ = TMLE.CMRelevantFactorsEstimator( nothing, - ( - Y = with_encoder(ConstantClassifier()), - T₁ = ConstantClassifier(), - T₂ = ConstantClassifier() + Dict( + :Y => with_encoder(ConstantClassifier()), + :T₁ => ConstantClassifier(), + :T₂ => ConstantClassifier() ) ) η̂ₙ = η̂(η, dataset, verbosity = 0) diff --git a/test/counterfactual_mean_based/gradient.jl b/test/counterfactual_mean_based/gradient.jl index 8a2e7f7a..95c5dfda 100644 --- a/test/counterfactual_mean_based/gradient.jl +++ b/test/counterfactual_mean_based/gradient.jl @@ -38,7 +38,9 @@ end ) η̂ = TMLE.CMRelevantFactorsEstimator( nothing, - (Y=with_encoder(InteractionTransformer(order=2) |> LinearRegressor()), T = LogisticClassifier()) + Dict( + :Y => with_encoder(InteractionTransformer(order=2) |> LinearRegressor()), + :T => LogisticClassifier()) ) η̂ₙ = η̂(η, dataset, verbosity = 0) # Retrieve conditional distributions and fitted_params diff --git a/test/data/parameters.yaml b/test/data/parameters.yaml index 9a1fd785..06b2843b 100644 --- a/test/data/parameters.yaml +++ b/test/data/parameters.yaml @@ -10,7 +10,7 @@ Y: - Y2 Estimands: - - name: IATE + - name: AIE T1: case: 1 control: 0 diff --git a/test/data/parameters_with_covariates.yaml b/test/data/parameters_with_covariates.yaml index 6d678d2b..654e5183 100644 --- a/test/data/parameters_with_covariates.yaml +++ b/test/data/parameters_with_covariates.yaml @@ -11,7 +11,7 @@ Y: - Y1 Estimands: - - name: IATE + - name: AIE T1: case: 2 control: 1 diff --git a/test/estimand_ordering.jl b/test/estimand_ordering.jl index d7bbc430..9e1cc85a 100644 --- a/test/estimand_ordering.jl +++ b/test/estimand_ordering.jl @@ -28,7 +28,7 @@ causal_estimands = [ outcome=:Y₁, treatment_values=(T₂=(case=1, control=0),) ), - IATE( + AIE( outcome=:Y₁, treatment_values=(T₁=(case=2, control=0), T₂=(case=1, control=0)) ), @@ -106,7 +106,7 @@ end @test TMLE.evaluate_proxy_costs(ordering_from_groups_with_brute_force, η_counts) == (3, 9) end -@testset "Test ordering strategies with Composed Estimands" begin +@testset "Test ordering strategies with Joint Estimands" begin ATE₁ = ATE( outcome=:Y₁, treatment_values=(T₁=(case=1, control=0),) @@ -115,12 +115,12 @@ end outcome=:Y₁, treatment_values=(T₁=(case=2, control=1),) ) - diff = ComposedEstimand(-, (ATE₁, ATE₂)) + joint = JointEstimand(ATE₁, ATE₂) ATE₃ = ATE( outcome=:Y₁, treatment_values=(T₂=(case=1, control=0),) ) - estimands = [identify(x, scm) for x in [ATE₁, ATE₃, diff, ATE₂]] + estimands = [identify(x, scm) for x in [ATE₁, ATE₃, joint, ATE₂]] η_counts = TMLE.nuisance_function_counts(estimands) η_counts == Dict( TMLE.ConditionalDistribution(:Y₁, (:T₂, :W₂)) => 1, diff --git a/test/estimands.jl b/test/estimands.jl index cf91c52e..cc52bdc9 100644 --- a/test/estimands.jl +++ b/test/estimands.jl @@ -27,7 +27,7 @@ end @test TMLE.variables(η) == (:Y, :T, :W, :T₁, :W₁, :T₂, :W₂₁, :W₂₂) end -@testset "Test ComposedEstimand" begin +@testset "Test JointEstimand and ComposedEstimand" begin ATE₁ = ATE( outcome=:Y, treatment_values = (T₁=(case=1, control=0), T₂=(case=1, control=0)), @@ -38,10 +38,27 @@ end treatment_values = (T₁=(case=2, control=1), T₂=(case=2, control=1)), treatment_confounders = (T₁=[:W], T₂=[:W]) ) - diff = ComposedEstimand(-, (ATE₁, ATE₂)) + # JointEstimand + joint = JointEstimand(ATE₁, ATE₂) - @test TMLE.propensity_score_key(diff) == ((:T₁, :W), (:T₂, :W)) - @test TMLE.outcome_mean_key(diff) == ((:Y, :T₁, :T₂, :W),) + @test TMLE.propensity_score_key(joint) == ((:T₁, :W), (:T₂, :W)) + @test TMLE.outcome_mean_key(joint) == ((:Y, :T₁, :T₂, :W),) + + joint_dict = TMLE.to_dict(joint) + joint_from_dict = TMLE.from_dict!(joint_dict) + @test joint_from_dict == joint + + # ComposedEstimand + Main.eval(:(difference(x, y) = x - y)) + composed = ComposedEstimand(Main.difference, joint) + composed_dict = TMLE.to_dict(composed) + composed_from_dict = TMLE.from_dict!(composed_dict) + @test composed_from_dict == composed + + # Anonymous function will raise + diff = ComposedEstimand((x,y) -> x - y, joint) + msg = "The function of a ComposedEstimand cannot be anonymous to be converted to a dictionary." + @test_throws ArgumentError(msg) TMLE.to_dict(diff) end diff --git a/test/estimators_and_estimates.jl b/test/estimators_and_estimates.jl index f0c146f7..b5bb3838 100644 --- a/test/estimators_and_estimates.jl +++ b/test/estimators_and_estimates.jl @@ -66,7 +66,7 @@ end ## Deterministic Model model = MLJLinearModels.LinearRegressor() estimator = TMLE.MLConditionalDistributionEstimator(model) - conditional_density_estimate = estimator(estimand, continuous_dataset; cache=Dict(), verbosity=verbosity) + conditional_density_estimate = estimator(estimand, continuous_dataset; cache=Dict(), verbosity=0) ŷ = predict(conditional_density_estimate, continuous_dataset) @test ŷ isa Vector{Float64} μ̂ = TMLE.expected_value(conditional_density_estimate, continuous_dataset) @@ -137,7 +137,7 @@ end model, train_validation_indices ) - conditional_density_estimate = estimator(estimand, continuous_dataset; verbosity=verbosity) + conditional_density_estimate = estimator(estimand, continuous_dataset; verbosity=0) ŷ = predict(conditional_density_estimate, continuous_dataset) @test ŷ isa Vector{Distributions.Normal{Float64}} μ̂ = TMLE.expected_value(conditional_density_estimate, continuous_dataset) @@ -152,7 +152,7 @@ end model, train_validation_indices ) - conditional_density_estimate = estimator(estimand, continuous_dataset; verbosity=verbosity) + conditional_density_estimate = estimator(estimand, continuous_dataset; verbosity=0) ŷ = predict(conditional_density_estimate, continuous_dataset) @test ŷ isa Vector{Float64} μ̂ = TMLE.expected_value(conditional_density_estimate, continuous_dataset) diff --git a/test/missing_management.jl b/test/missing_management.jl index 82491a3a..ca6dff33 100644 --- a/test/missing_management.jl +++ b/test/missing_management.jl @@ -58,7 +58,7 @@ end outcome=:Y, treatment_values=(T=(case=1, control=0),), treatment_confounders=(T=[:W],)) - models=(Y=with_encoder(LinearRegressor()), T=LogisticClassifier(lambda=0)) + models = Dict(:Y => with_encoder(LinearRegressor()), :T => LogisticClassifier(lambda=0)) tmle = TMLEE(models=models, machine_cache=true) tmle_result, cache = tmle(Ψ, dataset; verbosity=0) test_coverage(tmle_result, 1) diff --git a/test/utils.jl b/test/utils.jl index db4caa22..b3244fcd 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -51,13 +51,20 @@ end @test !isordered(cfT.T₂) end -@testset "Test positivity_constraint" begin +@testset "Test positivity_constraint & get_frequency_table" begin + # get_frequency_table + ## When no positivity constraint is provided then get_frequency_table returns nothing + @test TMLE.get_frequency_table(nothing, nothing, [1, 2]) === nothing + @test TMLE.get_frequency_table(nothing, "toto", [1, 2]) === nothing + ## An error is thrown if no dataset is provided but a positivity constraint is given + @test_throws ArgumentError("A dataset should be provided to enforce a positivity constraint.") TMLE.get_frequency_table(0.1, nothing, [1, 2]) + ## when both positivity constraint and datasets are provided dataset = ( A = [1, 1, 0, 1, 0, 2, 2, 1], B = ["AC", "CC", "AA", "AA", "AA", "AA", "AA", "AA"] ) - # One variable - frequency_table = TMLE.frequency_table(dataset, [:A]) + ### One variable + frequency_table = TMLE.get_frequency_table(0.1, dataset, [:A]) @test frequency_table[(0,)] == 0.25 @test frequency_table[(1,)] == 0.5 @test frequency_table[(2,)] == 0.25 @@ -76,13 +83,13 @@ end treatment_values= (A = (case=1, control=0),), treatment_confounders = (A=[],) ) - @test collect(TMLE.joint_levels(Ψ)) == [(1,), (0,)] + @test collect(TMLE.joint_levels(Ψ)) == [(0,), (1,)] @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.2) == true @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.3) == false - # Two variables - ## Treatments are sorted: [:B, :A] -> [:A, :B] - frequency_table = TMLE.frequency_table(dataset, [:B, :A]) + ## Two variables + ### Treatments are sorted: [:B, :A] -> [:A, :B] + frequency_table = TMLE.get_frequency_table(dataset, [:B, :A]) @test frequency_table[(1, "CC")] == 0.125 @test frequency_table[(1, "AA")] == 0.25 @test frequency_table[(0, "AA")] == 0.25 @@ -103,18 +110,18 @@ end treatment_values = (B=(case="AA", control="AC"), A=(case=1, control=1),), treatment_confounders = (B = (), A = (),) ) - @test collect(TMLE.joint_levels(Ψ)) == [(1, "AA"), (1, "AC")] + @test collect(TMLE.joint_levels(Ψ)) == [(1, "AC"), (1, "AA")] @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.1) == true @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.2) == false - Ψ = IATE( + Ψ = AIE( outcome = :toto, treatment_values = (B=(case="AC", control="AA"), A=(case=1, control=0),), treatment_confounders = (B=(), A=()), ) @test collect(TMLE.joint_levels(Ψ)) == [ - (1, "AC") (1, "AA") - (0, "AC") (0, "AA")] + (0, "AA") (0, "AC") + (1, "AA") (1, "AC")] @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=1.) == false frequency_table = Dict( @@ -128,7 +135,7 @@ end @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.3) == false @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.1) == true - Ψ = IATE( + Ψ = AIE( outcome = :toto, treatment_values = (B=(case="AC", control="AA"), A=(case=1, control=0), C=(control=0, case=2)), treatment_confounders = (B=(), A=(), C=())