From 2fcc6bb58459243ff03e0b4d091e187fdf1b1759 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 5 Sep 2022 16:12:41 +0200
Subject: [PATCH 01/15] create DoubleMLIRMBLP class and basic unit tests

Best linear Predictor to construct CATEs and GATEs
---
 doubleml/__init__.py                |   4 +-
 doubleml/double_ml_blp.py           | 185 ++++++++++++++++++++++++++++
 doubleml/tests/_utils_blp_manual.py | 127 +++++++++++++++++++
 doubleml/tests/test_blp.py          |  92 ++++++++++++++
 4 files changed, 407 insertions(+), 1 deletion(-)
 create mode 100644 doubleml/double_ml_blp.py
 create mode 100644 doubleml/tests/_utils_blp_manual.py
 create mode 100644 doubleml/tests/test_blp.py

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index d5b7c420..ccd491ab 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -5,12 +5,14 @@
 from .double_ml_irm import DoubleMLIRM
 from .double_ml_iivm import DoubleMLIIVM
 from .double_ml_data import DoubleMLData, DoubleMLClusterData
+from .double_ml_blp import DoubleMLIRMBLP
 
 __all__ = ['DoubleMLPLR',
            'DoubleMLPLIV',
            'DoubleMLIRM',
            'DoubleMLIIVM',
            'DoubleMLData',
-           'DoubleMLClusterData']
+           'DoubleMLClusterData',
+           'DoubleMLIRMBLP']
 
 __version__ = get_distribution('doubleml').version
diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
new file mode 100644
index 00000000..d3666e76
--- /dev/null
+++ b/doubleml/double_ml_blp.py
@@ -0,0 +1,185 @@
+import statsmodels.api as sm
+import numpy as np
+import pandas as pd
+import doubleml as dml
+
+from scipy.stats import norm
+from scipy.linalg import sqrtm
+
+class DoubleMLIRMBLP:
+    """Best Linear Predictor for DoubleML IRM models
+
+        Parameters
+        ----------
+        obj_dml_irm : :class:`DoubleMLIRM` object
+            The :class:`DoubleMLIRM` object providing the interactive regression model with fitted nuisance functions.
+
+        basis : :class:`pandas.DataFrame`
+        The basis for estimating the best linear predictor. Has to correspond to the observations of the IRM model.
+    """
+
+    def __init__(self,
+                 obj_dml_irm,
+                 basis):
+
+        # check and pick up obj_dml_irm
+        if not isinstance(obj_dml_irm, dml.DoubleMLIRM):
+            raise TypeError('The model must be of DoubleMLIRM type. '
+                            f'{str(obj_dml_irm)} of type {str(type(obj_dml_irm))} was passed.')
+
+        if not isinstance(basis, pd.DataFrame):
+            raise TypeError('The basis must be of DataFrame type. '
+                            f'{str(basis)} of type {str(type(basis))} was passed.')
+
+        self._dml_irm = obj_dml_irm
+        self._basis = basis
+
+        # initialize the orthogonal signal, the score and the covariance
+        self._orth_signal = None
+        self._blp_model = None
+        self._blp_omega = None
+
+    @property
+    def blp_model(self):
+        """
+        Best-Linear-Predictor model.
+        """
+        return self._blp_model
+
+    @property
+    def orth_signal(self):
+        """
+        Orthogonal signal.
+        """
+        return self._orth_signal
+
+    @property
+    def blp_omega(self):
+        """
+        Covariance matrix.
+        """
+        return self._blp_omega
+
+    def fit(self):
+        """
+        Estimate DoubleML models.
+
+        Returns
+        -------
+        self : object
+        """
+        # get the orthogonal signal from the IRM model
+        self._orth_signal = self._dml_irm.psi_b.reshape(-1, 1)
+        # fit the best-linear-predictor of the orthogonal signal with respect to the grid
+        self._blp_model = sm.OLS(self._orth_signal, self._basis).fit()
+        self._blp_omega = self._blp_model.cov_HC0
+
+        return self
+
+    def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
+        """
+        Confidence intervals for BLP for DoubleML IRM.
+
+        Parameters
+        ----------
+        basis : :class:`pandas.DataFrame`
+            The basis for constructing the confidence interval. Has to have the same form as the basis from
+             the construction.
+
+        joint : bool
+            Indicates whether joint confidence intervals are computed.
+            Default is ``False``
+
+        level : float
+            The confidence level.
+            Default is ``0.95``.
+
+        n_rep_boot : int
+            The number of bootstrap repetitions (only relevant for joint confidence intervals).
+            Default is ``500``.
+
+        Returns
+        -------
+        df_ci : pd.DataFrame
+            A data frame with the confidence interval(s).
+        """
+        alpha = 1 - level
+        # blp of the orthogonal signal
+        g_hat = self._blp_model.predict(basis)
+
+        # calculate se for basis elements
+        # check this again (calculation of HC0 should include scaling with sample size)
+        # se_scaling = np.sqrt(self._dml_irm._dml_data.n_obs)
+        se_scaling = 1
+        blp_se = np.sqrt((basis.to_numpy().dot(self._blp_omega) * basis.to_numpy()).sum(axis=1)) / se_scaling
+
+        if joint:
+            # calculate the maximum t-statistic with bootstrap
+            normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
+            bootstrap_samples = np.multiply(basis.to_numpy().dot(np.dot(sqrtm(self._blp_omega), normal_samples)).T,
+                                            (blp_se * se_scaling))
+
+            max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
+
+            # Lower simultaneous CI
+            g_hat_lower = g_hat - max_t_stat * blp_se
+            # Upper simultaneous CI
+            g_hat_upper = g_hat + max_t_stat * blp_se
+
+        else:
+            # Lower point-wise CI
+            g_hat_lower = g_hat + norm.ppf(q=alpha / 2) * blp_se
+            # Upper point-wise CI
+            g_hat_upper = g_hat + norm.ppf(q=1 - alpha / 2) * blp_se
+
+        ci = np.vstack((g_hat_lower, g_hat, g_hat_upper)).T
+        df_ci = pd.DataFrame(ci,
+                         columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
+                         index=basis.index)
+        return df_ci
+
+
+from doubleml.tests._utils_blp_manual import create_spline_basis, create_synthetic_data
+
+# DGP constants
+np.random.seed(123)
+n = 2000
+n_w = 10
+support_size = 5
+n_x = 1
+
+# Create data
+data, covariates = create_synthetic_data(n=n, n_w=n_w, support_size=support_size, n_x=n_x, constant=True)
+data_dml_base = dml.DoubleMLData(data,
+                                 y_col='y',
+                                 d_cols='t',
+                                 x_cols=covariates)
+
+# First stage estimation
+# Lasso regression
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+randomForest_reg = RandomForestRegressor(n_estimators=500)
+randomForest_class = RandomForestClassifier(n_estimators=500)
+
+np.random.seed(123)
+
+dml_irm = dml.DoubleMLIRM(data_dml_base,
+                          ml_g=randomForest_reg,
+                          ml_m=randomForest_class,
+                          trimming_threshold=0.01,
+                          n_folds=5
+                          )
+print("Training first stage")
+dml_irm.fit(store_predictions=True)
+
+spline_basis = create_spline_basis(X=data["x"], knots=3, degree=2)
+
+cate = DoubleMLIRMBLP(dml_irm, basis=spline_basis).fit()
+
+print(cate.confint(spline_basis, joint=False))
+
+groups = pd.DataFrame(np.vstack([data["x"] <= 0.2, (data["x"] >= 0.2) & (data["x"] <= 0.7), data["x"] >= 0.7]).T,
+             columns=['Group 1', 'Group 2', 'Group 3'])
+
+gate = DoubleMLIRMBLP(dml_irm, basis=groups).fit()
+print(gate.confint(groups))
diff --git a/doubleml/tests/_utils_blp_manual.py b/doubleml/tests/_utils_blp_manual.py
new file mode 100644
index 00000000..42cb37c9
--- /dev/null
+++ b/doubleml/tests/_utils_blp_manual.py
@@ -0,0 +1,127 @@
+import numpy as np
+import statsmodels.api as sm
+from scipy.linalg import sqrtm
+from scipy.stats import norm
+import pandas as pd
+import patsy
+
+
+def fit_blp(obj_dml_irm, basis):
+    orth_signal = obj_dml_irm.psi_b.reshape(-1, 1)
+    blp_model = sm.OLS(orth_signal, basis).fit()
+
+    return blp_model
+
+
+def blp_confint(obj_dml_irm, blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
+    alpha = 1 - level
+    g_hat = blp_model.predict(basis)
+
+    blp_omega = blp_model.cov_HC0
+
+    # check this again (calculation of HC0 should include scaling with sample size)
+    # se_scaling = np.sqrt(self._dml_irm._dml_data.n_obs)
+    se_scaling = 1
+    blp_se = np.sqrt((basis.dot(blp_omega) * basis).sum(axis=1)) / se_scaling
+
+    if joint:
+        # calculate the maximum t-statistic with bootstrap
+        normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
+        bootstrap_samples = np.multiply(basis.dot(np.dot(sqrtm(blp_omega), normal_samples)).T,
+                                        (blp_se * se_scaling))
+
+        max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
+
+        # Lower simultaneous CI
+        g_hat_lower = g_hat - max_t_stat * blp_se
+        # Upper simultaneous CI
+        g_hat_upper = g_hat + max_t_stat * blp_se
+
+    else:
+        # Lower point-wise CI
+        g_hat_lower = g_hat + norm.ppf(q=alpha / 2) * blp_se
+        # Upper point-wise CI
+        g_hat_upper = g_hat + norm.ppf(q=1 - alpha / 2) * blp_se
+
+    ci = np.vstack((g_hat_lower, g_hat, g_hat_upper)).T
+    df_ci = pd.DataFrame(ci,
+                         columns=['{:.1f} %'.format(alpha / 2 * 100), 'effect',
+                                  '{:.1f} %'.format((1 - alpha / 2) * 100)],
+                         index=basis.index)
+    return df_ci
+
+
+def create_spline_basis(X, knots=5, degree=3):
+    if type(knots) == int:
+        X_splines = patsy.bs(X, df=knots + degree, degree=degree)
+    else:
+        X_splines = patsy.bs(X, knots=knots, degree=degree)
+    return X_splines
+
+
+def create_synthetic_data(n=200, n_w=30, support_size=5, n_x=1, constant=True):
+    """
+    Creates a synthetic example based on example 2 of https://github.com/microsoft/EconML/blob/master/notebooks/Double%20Machine%20Learning%20Examples.ipynb
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of samples.
+        Default is ``200``.
+
+    n_w : int
+        Dimension of covariates.
+        Default is ``30``.
+
+    support_size : int
+        Number of relevant covariates.
+        Default is ``5``.
+
+    n_x : int
+        Dimension of treatment variable.
+        Default is ``1``.
+
+    Returns
+    -------
+     data : pd.DataFrame
+            A data frame.
+
+    """
+    # Outcome support
+    # With the next two lines we are effectively choosing the matrix gamma in the example
+    support_Y = np.random.choice(np.arange(n_w), size=support_size, replace=False)
+    coefs_Y = np.random.uniform(0, 1, size=support_size)
+    # Define the function to generate the noise
+    epsilon_sample = lambda n: np.random.uniform(-1, 1, size=n)
+    # Treatment support
+    # Assuming the matrices gamma and beta have the same non-zero components
+    support_T = support_Y
+    coefs_T = np.random.uniform(0, 1, size=support_size)
+    # Define the function to generate the noise
+    eta_sample = lambda n: np.random.uniform(-1, 1, size=n)
+
+    # Generate controls, covariates, treatments and outcomes
+    W = np.random.normal(0, 1, size=(n, n_w))
+    X = np.random.uniform(0, 1, size=(n, n_x))
+    # Heterogeneous treatment effects
+    if constant:
+        TE = np.array([3 for x_i in X])
+    else:
+        TE = np.array([np.exp(4 + 2 * x_i) for x_i in X])
+    # Define treatment
+    log_odds = np.dot(W[:, support_T], coefs_T) + eta_sample(n)
+    T_sigmoid = 1 / (1 + np.exp(-log_odds))
+    T = np.array([np.random.binomial(1, p) for p in T_sigmoid])
+    # Define the outcome
+    Y = TE * T + np.dot(W[:, support_Y], coefs_Y) + epsilon_sample(n)
+
+    # Now we build the dataset
+    y_df = pd.DataFrame({'y': Y})
+    x_df = pd.DataFrame({'x': X.reshape(-1)})
+    t_df = pd.DataFrame({'t': T})
+    w_df = pd.DataFrame(data=W, index=np.arange(W.shape[0]), columns=[f'w_{i}' for i in range(W.shape[1])])
+
+    data = pd.concat([y_df, x_df, t_df, w_df], axis=1)
+
+    covariates = list(w_df.columns.values) + list(x_df.columns.values)
+    return data, covariates
\ No newline at end of file
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
new file mode 100644
index 00000000..0f75483b
--- /dev/null
+++ b/doubleml/tests/test_blp.py
@@ -0,0 +1,92 @@
+import numpy as np
+import pytest
+
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+import doubleml as dml
+
+from ._utils_blp_manual import fit_blp, blp_confint, create_spline_basis, create_synthetic_data
+
+
+@pytest.fixture(scope='module',
+                params=[True])
+def constant(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=[True, False])
+def ci_joint(request):
+    return request.param
+
+
+@pytest.fixture(scope='module')
+def dml_blp_fixture(constant, ci_joint):
+    np.random.seed(42)
+    n = 200
+    n_w = 10
+    support_size = 5
+    n_x = 1
+
+    # Create data
+    data, covariates = create_synthetic_data(n=n, n_w=n_w, support_size=support_size, n_x=n_x, constant=constant)
+    data_dml_base = dml.DoubleMLData(data,
+                                     y_col='y',
+                                     d_cols='t',
+                                     x_cols=covariates)
+
+    # First stage estimation
+    ml_m = RandomForestRegressor(n_estimators=100)
+    ml_g = RandomForestClassifier(n_estimators=100)
+
+    np.random.seed(42)
+    dml_irm = dml.DoubleMLIRM(data_dml_base,
+                              ml_g=ml_m,
+                              ml_m=ml_g,
+                              trimming_threshold=0.05,
+                              n_folds=5)
+
+    dml_irm.fit(store_predictions=True)
+
+    spline_basis = create_spline_basis(X=data["x"])
+
+    cate = dml.double_ml_blp.DoubleMLIRMBLP(dml_irm, basis=spline_basis).fit()
+    cate_manual = fit_blp(dml_irm, spline_basis)
+
+    np.random.seed(42)
+    ci = cate.confint(spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
+    np.random.seed(42)
+    ci_manual = blp_confint(dml_irm, cate_manual, spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
+
+    res_dict = {'coef': cate.blp_model.params,
+                'coef_manual': cate_manual.params,
+                'values': cate.blp_model.fittedvalues,
+                'values_manual':  cate_manual.fittedvalues,
+                'omega': cate.blp_omega,
+                'omega_manual': cate_manual.cov_HC0,
+                'ci': ci,
+                'ci_manual': ci_manual}
+
+    return res_dict
+
+
+def test_dml_blp_coef(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['coef'],
+                       dml_blp_fixture['coef_manual'],
+                       rtol=1e-9, atol=1e-4)
+
+
+def test_dml_blp_values(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['values'],
+                       dml_blp_fixture['values_manual'],
+                       rtol=1e-9, atol=1e-4)
+
+
+def test_dml_blp_omega(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['omega'],
+                       dml_blp_fixture['omega_manual'],
+                       rtol=1e-9, atol=1e-4)
+
+def test_dml_blp_ci(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['ci'],
+                       dml_blp_fixture['ci_manual'],
+                       rtol=1e-9, atol=1e-4)

From a25d3bc1cfed55452cff0d527738a5fbb4e26ac9 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 6 Sep 2022 11:14:11 +0200
Subject: [PATCH 02/15] create cate and gate method for irm

Use the DoubleMLIRMBLP class to create CATE and GATE estimates for DoubleMLIRM objects.
---
 doubleml/double_ml_blp.py           | 90 +++++++++--------------------
 doubleml/double_ml_irm.py           | 65 ++++++++++++++++++++-
 doubleml/tests/_utils_blp_manual.py | 61 +++++++++++++++++--
 doubleml/tests/test_blp.py          | 11 +++-
 4 files changed, 153 insertions(+), 74 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index d3666e76..8644252a 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -1,7 +1,6 @@
 import statsmodels.api as sm
 import numpy as np
 import pandas as pd
-import doubleml as dml
 
 from scipy.stats import norm
 from scipy.linalg import sqrtm
@@ -11,31 +10,43 @@ class DoubleMLIRMBLP:
 
         Parameters
         ----------
-        obj_dml_irm : :class:`DoubleMLIRM` object
-            The :class:`DoubleMLIRM` object providing the interactive regression model with fitted nuisance functions.
+        orth_signal : :class:`numpy.array`
+            The orthogonal signal to be predicted. Has to be of shape (n,).
 
         basis : :class:`pandas.DataFrame`
-        The basis for estimating the best linear predictor. Has to correspond to the observations of the IRM model.
+            The basis for estimating the best linear predictor. Has to have the shape (n,d),
+            where d is the number of predictors.
     """
 
     def __init__(self,
-                 obj_dml_irm,
+                 orth_signal,
                  basis):
 
-        # check and pick up obj_dml_irm
-        if not isinstance(obj_dml_irm, dml.DoubleMLIRM):
-            raise TypeError('The model must be of DoubleMLIRM type. '
-                            f'{str(obj_dml_irm)} of type {str(type(obj_dml_irm))} was passed.')
+        if not isinstance(orth_signal, np.ndarray):
+            raise TypeError('The signal must be of np.ndarray type. '
+                            f'{str(orth_signal)} of type {str(type(orth_signal))} was passed.')
+
+        if orth_signal.ndim != 1:
+            raise ValueError('The signal must be of one dimensional. '
+                             f'{str(orth_signal)} of dimensions {str(orth_signal.ndim)} was passed.')
 
         if not isinstance(basis, pd.DataFrame):
             raise TypeError('The basis must be of DataFrame type. '
                             f'{str(basis)} of type {str(type(basis))} was passed.')
 
-        self._dml_irm = obj_dml_irm
+        if not basis.columns.is_unique:
+            raise ValueError('Invalid pd.DataFrame: '
+                             'Contains duplicate column names.')
+
+        if orth_signal.shape[0] != basis.shape[0]:
+            raise ValueError('Incompatible input dimensions.'
+                             f'{str(orth_signal)} of shape {str(orth_signal.shape)} and'
+                             f'{str(basis)} of shape {str(basis.shape)} were passed.')
+
+        self._orth_signal = orth_signal
         self._basis = basis
 
-        # initialize the orthogonal signal, the score and the covariance
-        self._orth_signal = None
+        # initialize the score and the covariance
         self._blp_model = None
         self._blp_omega = None
 
@@ -68,8 +79,7 @@ def fit(self):
         -------
         self : object
         """
-        # get the orthogonal signal from the IRM model
-        self._orth_signal = self._dml_irm.psi_b.reshape(-1, 1)
+
         # fit the best-linear-predictor of the orthogonal signal with respect to the grid
         self._blp_model = sm.OLS(self._orth_signal, self._basis).fit()
         self._blp_omega = self._blp_model.cov_HC0
@@ -84,7 +94,7 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
         ----------
         basis : :class:`pandas.DataFrame`
             The basis for constructing the confidence interval. Has to have the same form as the basis from
-             the construction.
+            the construction.
 
         joint : bool
             Indicates whether joint confidence intervals are computed.
@@ -108,8 +118,6 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
         g_hat = self._blp_model.predict(basis)
 
         # calculate se for basis elements
-        # check this again (calculation of HC0 should include scaling with sample size)
-        # se_scaling = np.sqrt(self._dml_irm._dml_data.n_obs)
         se_scaling = 1
         blp_se = np.sqrt((basis.to_numpy().dot(self._blp_omega) * basis.to_numpy()).sum(axis=1)) / se_scaling
 
@@ -136,50 +144,4 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
         df_ci = pd.DataFrame(ci,
                          columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
                          index=basis.index)
-        return df_ci
-
-
-from doubleml.tests._utils_blp_manual import create_spline_basis, create_synthetic_data
-
-# DGP constants
-np.random.seed(123)
-n = 2000
-n_w = 10
-support_size = 5
-n_x = 1
-
-# Create data
-data, covariates = create_synthetic_data(n=n, n_w=n_w, support_size=support_size, n_x=n_x, constant=True)
-data_dml_base = dml.DoubleMLData(data,
-                                 y_col='y',
-                                 d_cols='t',
-                                 x_cols=covariates)
-
-# First stage estimation
-# Lasso regression
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-randomForest_reg = RandomForestRegressor(n_estimators=500)
-randomForest_class = RandomForestClassifier(n_estimators=500)
-
-np.random.seed(123)
-
-dml_irm = dml.DoubleMLIRM(data_dml_base,
-                          ml_g=randomForest_reg,
-                          ml_m=randomForest_class,
-                          trimming_threshold=0.01,
-                          n_folds=5
-                          )
-print("Training first stage")
-dml_irm.fit(store_predictions=True)
-
-spline_basis = create_spline_basis(X=data["x"], knots=3, degree=2)
-
-cate = DoubleMLIRMBLP(dml_irm, basis=spline_basis).fit()
-
-print(cate.confint(spline_basis, joint=False))
-
-groups = pd.DataFrame(np.vstack([data["x"] <= 0.2, (data["x"] >= 0.2) & (data["x"] <= 0.7), data["x"] >= 0.7]).T,
-             columns=['Group 1', 'Group 2', 'Group 3'])
-
-gate = DoubleMLIRMBLP(dml_irm, basis=groups).fit()
-print(gate.confint(groups))
+        return df_ci
\ No newline at end of file
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 4ecd2b78..85d449e7 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -3,8 +3,9 @@
 from sklearn.utils.multiclass import type_of_target
 
 from .double_ml import DoubleML
-from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions
+from .double_ml_blp import DoubleMLIRMBLP
 
+from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions
 
 class DoubleMLIRM(DoubleML):
     """Double machine learning for interactive regression models
@@ -309,3 +310,65 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
                'tune_res': tune_res}
 
         return res
+
+    def cate(self, basis):
+        """
+        Calculate conditional average treatment effects (CATE) for a given basis.
+
+        Parameters
+        ----------
+        basis : :class:`pandas.DataFrame`
+            The basis for estimating the best linear predictor. Has to have the shape (n,d),
+            where d is the number of predictors.
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLIRMBLP`
+            Best linear Predictor model.
+        """
+        # define the orthogonal signal
+        orth_signal = self.psi_b.reshape(-1)
+
+        model = DoubleMLIRMBLP(orth_signal, basis=basis).fit()
+
+        return model
+
+    def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
+        """
+        Calculate group average treatment effects (GATE) for a given basis.
+
+        Parameters
+        ----------
+        groups : :class:`pandas.DataFrame`
+            The basis for estimating the best linear predictor. Has to have the shape (n,d),
+            where d is the number of groups.
+
+        joint : bool
+            Indicates whether joint confidence intervals are computed.
+            Default is ``False``
+
+        level : float
+            The confidence level.
+            Default is ``0.95``.
+
+        n_rep_boot : int
+            Number of bootstrap samples for joint confidence interval.
+            Default is ``500``.
+
+        Returns
+        -------
+        df_ci : pd.DataFrame
+            A data frame with the confidence interval(s).
+        """
+
+
+        # define the orthogonal signal
+        orth_signal = self.psi_b.reshape(-1)
+        model = DoubleMLIRMBLP(orth_signal, basis=groups).fit()
+
+        unique_groups = groups.drop_duplicates(keep='last')
+        df_ci = model.confint(unique_groups, joint, level, n_rep_boot)
+        df_ci.index = unique_groups.columns.values
+
+        return df_ci
+
diff --git a/doubleml/tests/_utils_blp_manual.py b/doubleml/tests/_utils_blp_manual.py
index 42cb37c9..2d560b70 100644
--- a/doubleml/tests/_utils_blp_manual.py
+++ b/doubleml/tests/_utils_blp_manual.py
@@ -6,21 +6,18 @@
 import patsy
 
 
-def fit_blp(obj_dml_irm, basis):
-    orth_signal = obj_dml_irm.psi_b.reshape(-1, 1)
+def fit_blp(orth_signal, basis):
     blp_model = sm.OLS(orth_signal, basis).fit()
 
     return blp_model
 
 
-def blp_confint(obj_dml_irm, blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
+def blp_confint(blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
     alpha = 1 - level
     g_hat = blp_model.predict(basis)
 
     blp_omega = blp_model.cov_HC0
 
-    # check this again (calculation of HC0 should include scaling with sample size)
-    # se_scaling = np.sqrt(self._dml_irm._dml_data.n_obs)
     se_scaling = 1
     blp_se = np.sqrt((basis.dot(blp_omega) * basis).sum(axis=1)) / se_scaling
 
@@ -124,4 +121,56 @@ def create_synthetic_data(n=200, n_w=30, support_size=5, n_x=1, constant=True):
     data = pd.concat([y_df, x_df, t_df, w_df], axis=1)
 
     covariates = list(w_df.columns.values) + list(x_df.columns.values)
-    return data, covariates
\ No newline at end of file
+    return data, covariates
+
+
+import doubleml as dml
+from doubleml.tests._utils_blp_manual import create_spline_basis, create_synthetic_data
+
+# DGP constants
+np.random.seed(123)
+n = 2000
+n_w = 10
+support_size = 5
+n_x = 1
+
+# Create data
+data, covariates = create_synthetic_data(n=n, n_w=n_w, support_size=support_size, n_x=n_x, constant=True)
+data_dml_base = dml.DoubleMLData(data,
+                                 y_col='y',
+                                 d_cols='t',
+                                 x_cols=covariates)
+
+# First stage estimation
+# Lasso regression
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+randomForest_reg = RandomForestRegressor(n_estimators=500)
+randomForest_class = RandomForestClassifier(n_estimators=500)
+
+np.random.seed(123)
+
+dml_irm = dml.DoubleMLIRM(data_dml_base,
+                          ml_g=randomForest_reg,
+                          ml_m=randomForest_class,
+                          trimming_threshold=0.01,
+                          n_folds=5
+                          )
+print("Training first stage")
+dml_irm.fit(store_predictions=True)
+
+spline_basis = create_spline_basis(X=data["x"], knots=3, degree=2)
+
+
+
+# get the orthogonal signal from the IRM model
+#orth_signal = dml_irm.psi_b.reshape(-1)
+#cate = DoubleMLIRMBLP(orth_signal, basis=spline_basis).fit()
+cate = dml_irm.cate(spline_basis)
+
+print(cate.confint(spline_basis, joint=False))
+
+groups = pd.DataFrame(np.vstack([data["x"] <= 0.2, (data["x"] >= 0.2) & (data["x"] <= 0.7), data["x"] >= 0.7]).T,
+             columns=['Group 1', 'Group 2', 'Group 3'])
+
+gate = dml_irm.gate(groups)
+print(gate)
\ No newline at end of file
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index 0f75483b..6ad7217a 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -49,13 +49,17 @@ def dml_blp_fixture(constant, ci_joint):
 
     spline_basis = create_spline_basis(X=data["x"])
 
-    cate = dml.double_ml_blp.DoubleMLIRMBLP(dml_irm, basis=spline_basis).fit()
-    cate_manual = fit_blp(dml_irm, spline_basis)
+    # cate
+    cate = dml_irm.cate(spline_basis)
+
+    # get the orthogonal signal from the IRM model
+    orth_signal = dml_irm.psi_b.reshape(-1)
+    cate_manual = fit_blp(orth_signal, spline_basis)
 
     np.random.seed(42)
     ci = cate.confint(spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
     np.random.seed(42)
-    ci_manual = blp_confint(dml_irm, cate_manual, spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
+    ci_manual = blp_confint(cate_manual, spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
 
     res_dict = {'coef': cate.blp_model.params,
                 'coef_manual': cate_manual.params,
@@ -90,3 +94,4 @@ def test_dml_blp_ci(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['ci'],
                        dml_blp_fixture['ci_manual'],
                        rtol=1e-9, atol=1e-4)
+

From fff77acf17f245587a0b0136638ee5d3a1d7e27b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 8 Sep 2022 15:49:13 +0200
Subject: [PATCH 03/15] restructure basis unit tests

---
 .gitignore                          |   1 +
 doubleml/double_ml_blp.py           |  33 +++++--
 doubleml/double_ml_irm.py           |  23 +++--
 doubleml/tests/_utils_blp_manual.py | 134 +---------------------------
 doubleml/tests/test_blp.py          |  58 ++++++------
 5 files changed, 74 insertions(+), 175 deletions(-)

diff --git a/.gitignore b/.gitignore
index 14d18a6a..69699106 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,4 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+*.idea
diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 8644252a..642d1f40 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -5,6 +5,7 @@
 from scipy.stats import norm
 from scipy.linalg import sqrtm
 
+
 class DoubleMLIRMBLP:
     """Best Linear Predictor for DoubleML IRM models
 
@@ -24,15 +25,15 @@ def __init__(self,
 
         if not isinstance(orth_signal, np.ndarray):
             raise TypeError('The signal must be of np.ndarray type. '
-                            f'{str(orth_signal)} of type {str(type(orth_signal))} was passed.')
+                            f'Signal of type {str(type(orth_signal))} was passed.')
 
         if orth_signal.ndim != 1:
             raise ValueError('The signal must be of one dimensional. '
-                             f'{str(orth_signal)} of dimensions {str(orth_signal.ndim)} was passed.')
+                             f'Signal of dimensions {str(orth_signal.ndim)} was passed.')
 
         if not isinstance(basis, pd.DataFrame):
             raise TypeError('The basis must be of DataFrame type. '
-                            f'{str(basis)} of type {str(type(basis))} was passed.')
+                            f'Basis of type {str(type(basis))} was passed.')
 
         if not basis.columns.is_unique:
             raise ValueError('Invalid pd.DataFrame: '
@@ -40,8 +41,8 @@ def __init__(self,
 
         if orth_signal.shape[0] != basis.shape[0]:
             raise ValueError('Incompatible input dimensions.'
-                             f'{str(orth_signal)} of shape {str(orth_signal.shape)} and'
-                             f'{str(basis)} of shape {str(basis.shape)} were passed.')
+                             f'Signal of shape {str(orth_signal.shape)} and'
+                             f'basis of shape {str(basis.shape)} were passed.')
 
         self._orth_signal = orth_signal
         self._basis = basis
@@ -64,6 +65,13 @@ def orth_signal(self):
         """
         return self._orth_signal
 
+    @property
+    def basis(self):
+        """
+        Basis.
+        """
+        return self._basis
+
     @property
     def blp_omega(self):
         """
@@ -113,6 +121,17 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
         df_ci : pd.DataFrame
             A data frame with the confidence interval(s).
         """
+        if not isinstance(joint, bool):
+            raise TypeError('joint must be True or False. '
+                            f'Got {str(joint)}.')
+
+        if not isinstance(level, float):
+            raise TypeError('The confidence level must be of float type. '
+                            f'{str(level)} of type {str(type(level))} was passed.')
+        if (level <= 0) | (level >= 1):
+            raise ValueError('The confidence level must be in (0,1). '
+                             f'{str(level)} was passed.')
+
         alpha = 1 - level
         # blp of the orthogonal signal
         g_hat = self._blp_model.predict(basis)
@@ -142,6 +161,6 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
 
         ci = np.vstack((g_hat_lower, g_hat, g_hat_upper)).T
         df_ci = pd.DataFrame(ci,
-                         columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
-                         index=basis.index)
+                             columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
+                             index=basis.index)
         return df_ci
\ No newline at end of file
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 85d449e7..329dacb1 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
@@ -328,7 +329,7 @@ def cate(self, basis):
         """
         # define the orthogonal signal
         orth_signal = self.psi_b.reshape(-1)
-
+        # fit the best linear predictor
         model = DoubleMLIRMBLP(orth_signal, basis=basis).fit()
 
         return model
@@ -340,8 +341,8 @@ def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
         Parameters
         ----------
         groups : :class:`pandas.DataFrame`
-            The basis for estimating the best linear predictor. Has to have the shape (n,d),
-            where d is the number of groups.
+            The group indicator for estimating the best linear predictor. Has to have the shape (n,d),
+            where d is the number of groups or (n,1) and contain the corresponding groups.
 
         joint : bool
             Indicates whether joint confidence intervals are computed.
@@ -361,14 +362,26 @@ def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
             A data frame with the confidence interval(s).
         """
 
+        if not isinstance(groups, pd.DataFrame):
+            raise TypeError('Groups must be of DataFrame type. '
+                            f'Groups of type {str(type(groups))} was passed.')
+
+        if not all(groups.dtypes == bool) or all(groups.dtypes == int):
+            if groups.shape[1] == 1:
+                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+            else:
+                raise TypeError('Columns must be of of bool or int type or the data frame only should contain'
+                                'one column.')
 
         # define the orthogonal signal
         orth_signal = self.psi_b.reshape(-1)
+        # fit the best linear predictor
         model = DoubleMLIRMBLP(orth_signal, basis=groups).fit()
 
-        unique_groups = groups.drop_duplicates(keep='last')
+        # reduce to unique groups and create confidence interval
+        unique_groups = pd.DataFrame(np.diag(v=np.full((groups.shape[1]), True)))
         df_ci = model.confint(unique_groups, joint, level, n_rep_boot)
-        df_ci.index = unique_groups.columns.values
+        df_ci.index = groups.columns.values
 
         return df_ci
 
diff --git a/doubleml/tests/_utils_blp_manual.py b/doubleml/tests/_utils_blp_manual.py
index 2d560b70..213a2b16 100644
--- a/doubleml/tests/_utils_blp_manual.py
+++ b/doubleml/tests/_utils_blp_manual.py
@@ -18,14 +18,12 @@ def blp_confint(blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
 
     blp_omega = blp_model.cov_HC0
 
-    se_scaling = 1
-    blp_se = np.sqrt((basis.dot(blp_omega) * basis).sum(axis=1)) / se_scaling
+    blp_se = np.sqrt((basis.dot(blp_omega) * basis).sum(axis=1))
 
     if joint:
         # calculate the maximum t-statistic with bootstrap
         normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
-        bootstrap_samples = np.multiply(basis.dot(np.dot(sqrtm(blp_omega), normal_samples)).T,
-                                        (blp_se * se_scaling))
+        bootstrap_samples = np.multiply(basis.dot(np.dot(sqrtm(blp_omega), normal_samples)).T, blp_se)
 
         max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
 
@@ -46,131 +44,3 @@ def blp_confint(blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
                                   '{:.1f} %'.format((1 - alpha / 2) * 100)],
                          index=basis.index)
     return df_ci
-
-
-def create_spline_basis(X, knots=5, degree=3):
-    if type(knots) == int:
-        X_splines = patsy.bs(X, df=knots + degree, degree=degree)
-    else:
-        X_splines = patsy.bs(X, knots=knots, degree=degree)
-    return X_splines
-
-
-def create_synthetic_data(n=200, n_w=30, support_size=5, n_x=1, constant=True):
-    """
-    Creates a synthetic example based on example 2 of https://github.com/microsoft/EconML/blob/master/notebooks/Double%20Machine%20Learning%20Examples.ipynb
-
-    Parameters
-    ----------
-    n_samples : int
-        Number of samples.
-        Default is ``200``.
-
-    n_w : int
-        Dimension of covariates.
-        Default is ``30``.
-
-    support_size : int
-        Number of relevant covariates.
-        Default is ``5``.
-
-    n_x : int
-        Dimension of treatment variable.
-        Default is ``1``.
-
-    Returns
-    -------
-     data : pd.DataFrame
-            A data frame.
-
-    """
-    # Outcome support
-    # With the next two lines we are effectively choosing the matrix gamma in the example
-    support_Y = np.random.choice(np.arange(n_w), size=support_size, replace=False)
-    coefs_Y = np.random.uniform(0, 1, size=support_size)
-    # Define the function to generate the noise
-    epsilon_sample = lambda n: np.random.uniform(-1, 1, size=n)
-    # Treatment support
-    # Assuming the matrices gamma and beta have the same non-zero components
-    support_T = support_Y
-    coefs_T = np.random.uniform(0, 1, size=support_size)
-    # Define the function to generate the noise
-    eta_sample = lambda n: np.random.uniform(-1, 1, size=n)
-
-    # Generate controls, covariates, treatments and outcomes
-    W = np.random.normal(0, 1, size=(n, n_w))
-    X = np.random.uniform(0, 1, size=(n, n_x))
-    # Heterogeneous treatment effects
-    if constant:
-        TE = np.array([3 for x_i in X])
-    else:
-        TE = np.array([np.exp(4 + 2 * x_i) for x_i in X])
-    # Define treatment
-    log_odds = np.dot(W[:, support_T], coefs_T) + eta_sample(n)
-    T_sigmoid = 1 / (1 + np.exp(-log_odds))
-    T = np.array([np.random.binomial(1, p) for p in T_sigmoid])
-    # Define the outcome
-    Y = TE * T + np.dot(W[:, support_Y], coefs_Y) + epsilon_sample(n)
-
-    # Now we build the dataset
-    y_df = pd.DataFrame({'y': Y})
-    x_df = pd.DataFrame({'x': X.reshape(-1)})
-    t_df = pd.DataFrame({'t': T})
-    w_df = pd.DataFrame(data=W, index=np.arange(W.shape[0]), columns=[f'w_{i}' for i in range(W.shape[1])])
-
-    data = pd.concat([y_df, x_df, t_df, w_df], axis=1)
-
-    covariates = list(w_df.columns.values) + list(x_df.columns.values)
-    return data, covariates
-
-
-import doubleml as dml
-from doubleml.tests._utils_blp_manual import create_spline_basis, create_synthetic_data
-
-# DGP constants
-np.random.seed(123)
-n = 2000
-n_w = 10
-support_size = 5
-n_x = 1
-
-# Create data
-data, covariates = create_synthetic_data(n=n, n_w=n_w, support_size=support_size, n_x=n_x, constant=True)
-data_dml_base = dml.DoubleMLData(data,
-                                 y_col='y',
-                                 d_cols='t',
-                                 x_cols=covariates)
-
-# First stage estimation
-# Lasso regression
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-randomForest_reg = RandomForestRegressor(n_estimators=500)
-randomForest_class = RandomForestClassifier(n_estimators=500)
-
-np.random.seed(123)
-
-dml_irm = dml.DoubleMLIRM(data_dml_base,
-                          ml_g=randomForest_reg,
-                          ml_m=randomForest_class,
-                          trimming_threshold=0.01,
-                          n_folds=5
-                          )
-print("Training first stage")
-dml_irm.fit(store_predictions=True)
-
-spline_basis = create_spline_basis(X=data["x"], knots=3, degree=2)
-
-
-
-# get the orthogonal signal from the IRM model
-#orth_signal = dml_irm.psi_b.reshape(-1)
-#cate = DoubleMLIRMBLP(orth_signal, basis=spline_basis).fit()
-cate = dml_irm.cate(spline_basis)
-
-print(cate.confint(spline_basis, joint=False))
-
-groups = pd.DataFrame(np.vstack([data["x"] <= 0.2, (data["x"] >= 0.2) & (data["x"] <= 0.7), data["x"] >= 0.7]).T,
-             columns=['Group 1', 'Group 2', 'Group 3'])
-
-gate = dml_irm.gate(groups)
-print(gate)
\ No newline at end of file
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index 6ad7217a..477437c1 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -1,65 +1,59 @@
 import numpy as np
+import pandas as pd
 import pytest
 
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 import doubleml as dml
 
-from ._utils_blp_manual import fit_blp, blp_confint, create_spline_basis, create_synthetic_data
+from ._utils_blp_manual import fit_blp, blp_confint
 
 
 @pytest.fixture(scope='module',
-                params=[True])
-def constant(request):
+                params=[True, False])
+def ci_joint(request):
     return request.param
 
-
 @pytest.fixture(scope='module',
-                params=[True, False])
-def ci_joint(request):
+                params=[0.95, 0.9])
+def ci_level(request):
     return request.param
 
 
 @pytest.fixture(scope='module')
-def dml_blp_fixture(constant, ci_joint):
+def dml_blp_fixture(ci_joint, ci_level):
     np.random.seed(42)
     n = 200
-    n_w = 10
-    support_size = 5
-    n_x = 1
 
-    # Create data
-    data, covariates = create_synthetic_data(n=n, n_w=n_w, support_size=support_size, n_x=n_x, constant=constant)
-    data_dml_base = dml.DoubleMLData(data,
-                                     y_col='y',
-                                     d_cols='t',
-                                     x_cols=covariates)
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = dml.datasets.make_irm_data(n_obs=n, dim_x=5)
 
     # First stage estimation
     ml_m = RandomForestRegressor(n_estimators=100)
     ml_g = RandomForestClassifier(n_estimators=100)
 
-    np.random.seed(42)
-    dml_irm = dml.DoubleMLIRM(data_dml_base,
-                              ml_g=ml_m,
-                              ml_m=ml_g,
-                              trimming_threshold=0.05,
-                              n_folds=5)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
+                                  ml_g=ml_m,
+                                  ml_m=ml_g,
+                                  trimming_threshold=0.05,
+                                  n_folds=5)
 
-    dml_irm.fit(store_predictions=True)
+    dml_irm_obj.fit()
 
-    spline_basis = create_spline_basis(X=data["x"])
+    # create a random basis
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
 
     # cate
-    cate = dml_irm.cate(spline_basis)
+    cate = dml_irm_obj.cate(random_basis)
 
     # get the orthogonal signal from the IRM model
-    orth_signal = dml_irm.psi_b.reshape(-1)
-    cate_manual = fit_blp(orth_signal, spline_basis)
+    orth_signal = dml_irm_obj.psi_b.reshape(-1)
+    cate_manual = fit_blp(orth_signal, random_basis)
 
     np.random.seed(42)
-    ci = cate.confint(spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
+    ci = cate.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
     np.random.seed(42)
-    ci_manual = blp_confint(cate_manual, spline_basis, joint=ci_joint, level=0.95, n_rep_boot=1000)
+    ci_manual = blp_confint(cate_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
 
     res_dict = {'coef': cate.blp_model.params,
                 'coef_manual': cate_manual.params,
@@ -73,23 +67,25 @@ def dml_blp_fixture(constant, ci_joint):
     return res_dict
 
 
+@pytest.mark.ci
 def test_dml_blp_coef(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['coef'],
                        dml_blp_fixture['coef_manual'],
                        rtol=1e-9, atol=1e-4)
 
-
+@pytest.mark.ci
 def test_dml_blp_values(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['values'],
                        dml_blp_fixture['values_manual'],
                        rtol=1e-9, atol=1e-4)
 
-
+@pytest.mark.ci
 def test_dml_blp_omega(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['omega'],
                        dml_blp_fixture['omega_manual'],
                        rtol=1e-9, atol=1e-4)
 
+@pytest.mark.ci
 def test_dml_blp_ci(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['ci'],
                        dml_blp_fixture['ci_manual'],

From fe3448c83b5e35487d1c7cf6b5ddb159152da54b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 8 Sep 2022 16:31:50 +0200
Subject: [PATCH 04/15] Update double_ml_blp.py

---
 doubleml/double_ml_blp.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 642d1f40..07c15f5c 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -132,6 +132,13 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
             raise ValueError('The confidence level must be in (0,1). '
                              f'{str(level)} was passed.')
 
+        if not isinstance(n_rep_boot, int):
+            raise TypeError('The number of bootstrap replications must be of int type. '
+                            f'{str(n_rep_boot)} of type {str(type(n_rep_boot))} was passed.')
+        if n_rep_boot < 1:
+            raise ValueError('The number of bootstrap replications must be positive. '
+                             f'{str(n_rep_boot)} was passed.')
+
         alpha = 1 - level
         # blp of the orthogonal signal
         g_hat = self._blp_model.predict(basis)

From bc52a5136e92ed6c556f8232cdaaaf811f341a7b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 9 Sep 2022 12:18:25 +0200
Subject: [PATCH 05/15] add unit tests

add unit tests
restructure unit tests for blp, cate and gate
---
 doubleml/double_ml_blp.py                  | 13 ++---
 doubleml/double_ml_irm.py                  |  2 +-
 doubleml/tests/_utils_blp_manual.py        |  1 -
 doubleml/tests/test_blp.py                 | 50 ++++++-------------
 doubleml/tests/test_doubleml_exceptions.py | 58 +++++++++++++++++++++-
 doubleml/tests/test_irm.py                 | 35 +++++++++++++
 6 files changed, 112 insertions(+), 47 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 07c15f5c..3465c5a8 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -39,11 +39,6 @@ def __init__(self,
             raise ValueError('Invalid pd.DataFrame: '
                              'Contains duplicate column names.')
 
-        if orth_signal.shape[0] != basis.shape[0]:
-            raise ValueError('Incompatible input dimensions.'
-                             f'Signal of shape {str(orth_signal.shape)} and'
-                             f'basis of shape {str(basis.shape)} were passed.')
-
         self._orth_signal = orth_signal
         self._basis = basis
 
@@ -139,19 +134,21 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
             raise ValueError('The number of bootstrap replications must be positive. '
                              f'{str(n_rep_boot)} was passed.')
 
+        if self._blp_model is None:
+            raise ValueError('Apply fit() before confint().')
+
         alpha = 1 - level
         # blp of the orthogonal signal
         g_hat = self._blp_model.predict(basis)
 
         # calculate se for basis elements
-        se_scaling = 1
-        blp_se = np.sqrt((basis.to_numpy().dot(self._blp_omega) * basis.to_numpy()).sum(axis=1)) / se_scaling
+        blp_se = np.sqrt((basis.to_numpy().dot(self._blp_omega) * basis.to_numpy()).sum(axis=1))
 
         if joint:
             # calculate the maximum t-statistic with bootstrap
             normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
             bootstrap_samples = np.multiply(basis.to_numpy().dot(np.dot(sqrtm(self._blp_omega), normal_samples)).T,
-                                            (blp_se * se_scaling))
+                                            blp_se)
 
             max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
 
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 329dacb1..459de258 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -370,7 +370,7 @@ def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
             if groups.shape[1] == 1:
                 groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
             else:
-                raise TypeError('Columns must be of of bool or int type or the data frame only should contain'
+                raise TypeError('Columns must be of of bool or int type or the data frame only should contain '
                                 'one column.')
 
         # define the orthogonal signal
diff --git a/doubleml/tests/_utils_blp_manual.py b/doubleml/tests/_utils_blp_manual.py
index 213a2b16..aea79d66 100644
--- a/doubleml/tests/_utils_blp_manual.py
+++ b/doubleml/tests/_utils_blp_manual.py
@@ -3,7 +3,6 @@
 from scipy.linalg import sqrtm
 from scipy.stats import norm
 import pandas as pd
-import patsy
 
 
 def fit_blp(orth_signal, basis):
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index 477437c1..d932965a 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -2,7 +2,6 @@
 import pandas as pd
 import pytest
 
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 import doubleml as dml
 
 from ._utils_blp_manual import fit_blp, blp_confint
@@ -21,46 +20,25 @@ def ci_level(request):
 
 @pytest.fixture(scope='module')
 def dml_blp_fixture(ci_joint, ci_level):
+    n = 50
     np.random.seed(42)
-    n = 200
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
+    random_signal = np.random.normal(0, 1, size=(n, ))
 
-    # collect data
-    np.random.seed(42)
-    obj_dml_data = dml.datasets.make_irm_data(n_obs=n, dim_x=5)
-
-    # First stage estimation
-    ml_m = RandomForestRegressor(n_estimators=100)
-    ml_g = RandomForestClassifier(n_estimators=100)
-
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
-                                  ml_g=ml_m,
-                                  ml_m=ml_g,
-                                  trimming_threshold=0.05,
-                                  n_folds=5)
-
-    dml_irm_obj.fit()
-
-    # create a random basis
-    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
-
-    # cate
-    cate = dml_irm_obj.cate(random_basis)
-
-    # get the orthogonal signal from the IRM model
-    orth_signal = dml_irm_obj.psi_b.reshape(-1)
-    cate_manual = fit_blp(orth_signal, random_basis)
+    blp = dml.DoubleMLIRMBLP(random_signal, random_basis).fit()
+    blp_manual = fit_blp(random_signal, random_basis)
 
     np.random.seed(42)
-    ci = cate.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
+    ci = blp.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
     np.random.seed(42)
-    ci_manual = blp_confint(cate_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
-
-    res_dict = {'coef': cate.blp_model.params,
-                'coef_manual': cate_manual.params,
-                'values': cate.blp_model.fittedvalues,
-                'values_manual':  cate_manual.fittedvalues,
-                'omega': cate.blp_omega,
-                'omega_manual': cate_manual.cov_HC0,
+    ci_manual = blp_confint(blp_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
+
+    res_dict = {'coef': blp.blp_model.params,
+                'coef_manual': blp_manual.params,
+                'values': blp.blp_model.fittedvalues,
+                'values_manual':  blp_manual.fittedvalues,
+                'omega': blp.blp_omega,
+                'omega_manual': blp_manual.cov_HC0,
                 'ci': ci,
                 'ci_manual': ci_manual}
 
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 875902d1..be2794cb 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import numpy as np
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData, DoubleMLIRMBLP
 from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
     make_pliv_multiway_cluster_CKMS2021
 
@@ -652,3 +652,59 @@ def test_doubleml_nan_prediction():
     msg = r'Predictions from learner LassoWithInfPred\(\) for ml_m are not finite.'
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, LassoWithInfPred()).fit()
+
+@pytest.mark.ci
+def test_doubleml_exception_blp():
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(2, 3)))
+    signal = np.array([1, 2])
+
+    msg = "The signal must be of np.ndarray type. Signal of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        DoubleMLIRMBLP(orth_signal=1, basis=random_basis)
+    msg = 'The signal must be of one dimensional. Signal of dimensions 2 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        DoubleMLIRMBLP(orth_signal=np.array([[1], [2]]), basis=random_basis)
+    msg = "The basis must be of DataFrame type. Basis of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        DoubleMLIRMBLP(orth_signal=signal, basis=1)
+    msg = 'Invalid pd.DataFrame: Contains duplicate column names.'
+    with pytest.raises(ValueError, match=msg):
+        DoubleMLIRMBLP(orth_signal=signal, basis=pd.DataFrame(np.array([[1, 2], [4, 5]]),
+                   columns=['x_1', 'x_1']))
+
+
+    dml_blp_confint = DoubleMLIRMBLP(orth_signal=signal, basis=random_basis)
+    msg = 'joint must be True or False. Got 1.'
+    with pytest.raises(TypeError, match=msg):
+        dml_blp_confint.confint(random_basis, joint=1)
+    msg = "The confidence level must be of float type. 5% of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_blp_confint.confint(random_basis, level='5%')
+    msg = r'The confidence level must be in \(0,1\). 0.0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        dml_blp_confint.confint(random_basis, level=0.)
+    msg = "The number of bootstrap replications must be of int type. 500 of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_blp_confint.confint(random_basis, n_rep_boot='500')
+    msg = 'The number of bootstrap replications must be positive. 0 was passed.'
+    with pytest.raises(ValueError, match=msg):
+        dml_blp_confint.confint(random_basis, n_rep_boot=0)
+    msg = r'Apply fit\(\) before confint\(\).'
+    with pytest.raises(ValueError, match=msg):
+        dml_blp_confint.confint(random_basis)
+
+@pytest.mark.ci
+def test_doubleml_exception_gate():
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5)
+    dml_irm_obj.fit()
+
+    msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.gate(groups=2)
+    msg = 'Columns must be of of bool or int type or the data frame only should contain one column.'
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(50, 3))))
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 12370a4b..c4df18ec 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 import pytest
 import math
 
@@ -118,3 +119,37 @@ def test_dml_irm_boot(dml_irm_fixture):
         assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
                            dml_irm_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_irm_cate_gate():
+    n = 50
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = dml.datasets.make_irm_data(n_obs=n, dim_x=2)
+
+    # First stage estimation
+    ml_g = RandomForestRegressor(n_estimators=10)
+    ml_m = RandomForestClassifier(n_estimators=10)
+
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
+                                  ml_m=ml_m,
+                                  ml_g=ml_g,
+                                  trimming_threshold=0.05,
+                                  n_folds=5)
+
+    dml_irm_obj.fit()
+    # create a random basis
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
+    cate = dml_irm_obj.cate(random_basis)
+    assert isinstance(cate, dml.double_ml_blp.DoubleMLIRMBLP)
+
+    groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= 0,
+                                             obj_dml_data.data['X1'] > 0.2]),
+                            columns=['Group 1', 'Group 2'])
+    assert isinstance(dml_irm_obj.gate(groups_1), pd.DataFrame)
+
+    groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
+    assert isinstance(dml_irm_obj.gate(groups_2), pd.DataFrame)
+
+

From fb0809c76326eef0d77f5af8dfc74772e86b9b63 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 9 Sep 2022 13:30:50 +0200
Subject: [PATCH 06/15] change to fit PEP8 standards

---
 doubleml/double_ml_blp.py                  | 2 +-
 doubleml/tests/test_blp.py                 | 5 ++++-
 doubleml/tests/test_doubleml_exceptions.py | 5 +++--
 doubleml/tests/test_irm.py                 | 5 ++---
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 3465c5a8..1e133d4c 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -167,4 +167,4 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
         df_ci = pd.DataFrame(ci,
                              columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
                              index=basis.index)
-        return df_ci
\ No newline at end of file
+        return df_ci
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index d932965a..5d60cf2f 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -12,6 +12,7 @@
 def ci_joint(request):
     return request.param
 
+
 @pytest.fixture(scope='module',
                 params=[0.95, 0.9])
 def ci_level(request):
@@ -51,21 +52,23 @@ def test_dml_blp_coef(dml_blp_fixture):
                        dml_blp_fixture['coef_manual'],
                        rtol=1e-9, atol=1e-4)
 
+
 @pytest.mark.ci
 def test_dml_blp_values(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['values'],
                        dml_blp_fixture['values_manual'],
                        rtol=1e-9, atol=1e-4)
 
+
 @pytest.mark.ci
 def test_dml_blp_omega(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['omega'],
                        dml_blp_fixture['omega_manual'],
                        rtol=1e-9, atol=1e-4)
 
+
 @pytest.mark.ci
 def test_dml_blp_ci(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['ci'],
                        dml_blp_fixture['ci_manual'],
                        rtol=1e-9, atol=1e-4)
-
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index be2794cb..a8dfe4b5 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -653,6 +653,7 @@ def test_doubleml_nan_prediction():
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLPLR(dml_data, ml_l, LassoWithInfPred()).fit()
 
+
 @pytest.mark.ci
 def test_doubleml_exception_blp():
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(2, 3)))
@@ -670,8 +671,7 @@ def test_doubleml_exception_blp():
     msg = 'Invalid pd.DataFrame: Contains duplicate column names.'
     with pytest.raises(ValueError, match=msg):
         DoubleMLIRMBLP(orth_signal=signal, basis=pd.DataFrame(np.array([[1, 2], [4, 5]]),
-                   columns=['x_1', 'x_1']))
-
+                                                              columns=['x_1', 'x_1']))
 
     dml_blp_confint = DoubleMLIRMBLP(orth_signal=signal, basis=random_basis)
     msg = 'joint must be True or False. Got 1.'
@@ -693,6 +693,7 @@ def test_doubleml_exception_blp():
     with pytest.raises(ValueError, match=msg):
         dml_blp_confint.confint(random_basis)
 
+
 @pytest.mark.ci
 def test_doubleml_exception_gate():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index c4df18ec..52c9aeae 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -9,6 +9,7 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 import doubleml as dml
+from doubleml.datasets import make_irm_data
 
 from ._utils import draw_smpls
 from ._utils_irm_manual import fit_irm, boot_irm
@@ -126,7 +127,7 @@ def test_dml_irm_cate_gate():
     n = 50
     # collect data
     np.random.seed(42)
-    obj_dml_data = dml.datasets.make_irm_data(n_obs=n, dim_x=2)
+    obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
 
     # First stage estimation
     ml_g = RandomForestRegressor(n_estimators=10)
@@ -151,5 +152,3 @@ def test_dml_irm_cate_gate():
 
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
     assert isinstance(dml_irm_obj.gate(groups_2), pd.DataFrame)
-
-

From 756330f73dd26428ccb13cbdf8ed35f62caf0c9e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 12 Sep 2022 13:34:18 +0200
Subject: [PATCH 07/15] correct variance scaling for blp

---
 doubleml/double_ml_blp.py           | 2 +-
 doubleml/double_ml_irm.py           | 2 +-
 doubleml/tests/_utils_blp_manual.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 1e133d4c..a52e0939 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -148,7 +148,7 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
             # calculate the maximum t-statistic with bootstrap
             normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
             bootstrap_samples = np.multiply(basis.to_numpy().dot(np.dot(sqrtm(self._blp_omega), normal_samples)).T,
-                                            blp_se)
+                                            (1.0 / blp_se))
 
             max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
 
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 459de258..dbb192dd 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -8,6 +8,7 @@
 
 from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions
 
+
 class DoubleMLIRM(DoubleML):
     """Double machine learning for interactive regression models
 
@@ -384,4 +385,3 @@ def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
         df_ci.index = groups.columns.values
 
         return df_ci
-
diff --git a/doubleml/tests/_utils_blp_manual.py b/doubleml/tests/_utils_blp_manual.py
index aea79d66..3d9b5721 100644
--- a/doubleml/tests/_utils_blp_manual.py
+++ b/doubleml/tests/_utils_blp_manual.py
@@ -22,7 +22,7 @@ def blp_confint(blp_model, basis, joint=False, level=0.95, n_rep_boot=500):
     if joint:
         # calculate the maximum t-statistic with bootstrap
         normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
-        bootstrap_samples = np.multiply(basis.dot(np.dot(sqrtm(blp_omega), normal_samples)).T, blp_se)
+        bootstrap_samples = np.multiply(basis.dot(np.dot(sqrtm(blp_omega), normal_samples)).T, (1.0 / blp_se))
 
         max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
 

From a83fe0f10a4e682ffcb731bf1368314e2dc36443 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 12 Sep 2022 14:14:34 +0200
Subject: [PATCH 08/15] correct typos

---
 doubleml/double_ml_blp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index a52e0939..7277eaf5 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -7,7 +7,7 @@
 
 
 class DoubleMLIRMBLP:
-    """Best Linear Predictor for DoubleML IRM models
+    """Best linear predictor for DoubleML IRM models
 
         Parameters
         ----------

From 6385162b6c12888c613f4680c11b2cb2117a1657 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 4 Nov 2022 10:22:42 +0100
Subject: [PATCH 09/15] refactor BLP, CATE and GATE

---
 .coverage                                  | Bin 0 -> 69632 bytes
 doubleml/__init__.py                       |   4 +-
 doubleml/double_ml_blp.py                  |  37 ++++++++++---
 doubleml/double_ml_irm.py                  |  57 +++++++++-----------
 doubleml/tests/test_blp.py                 |  20 +++++--
 doubleml/tests/test_doubleml_exceptions.py |  58 +++++++++++++++++----
 doubleml/tests/test_irm.py                 |  20 +++++--
 7 files changed, 137 insertions(+), 59 deletions(-)
 create mode 100644 .coverage

diff --git a/.coverage b/.coverage
new file mode 100644
index 0000000000000000000000000000000000000000..c54090593472f4773cf5f4f99e12a7949991082a
GIT binary patch
literal 69632
zcmeHQ3sf7|nU)x#w`Pnn#(**M6R>$>-WdE&>^Se#-LA0}0RvVH64t|jZ7eJ4Imx!0
zwr6`{cC&TdY?4j0-Qb+0HF0cE&bCQULlT;_v6~Q}-K|YGF;)t-6%f+wf3HSD!ge|%
zc2BZBS2p*KX6FCyf4~2~|IB@i=Kgzkne42-$zp9a+V#bf1rn)LvQe*>NF*xwOMt)7
zM*$nM&=<(15!<)gR7vu_zEj8SmT2WCB+NeD-!Qq_=XCj+kF<wW-`6~^cu3U(2grvo
zKo}ql5C#YXm;u+lT19e7iuB<wyRp87wOfr1tTmYGw%oUE(*xV|4{X}JYnwi}PoJgM
z!zVXaze#VkwCmegtG>z9!s<=tMpJ{)ZZbFP?fco#*$x}qh^~0B53YIW#5^pIwhX35
zkg~JQ0MTYOwHmFR`U7ld4nBb|&CYh%gF66(H8q>j8Tu?<JX>#Nn^-GrZeVReEwW6F
z+3M^^oN7gKVxsg>7T3vWZGiupsM^4e*{bYl0-y$qwGnAwZ#9}5_OrGey)j(F1`Fuf
zVGq{UYH7{4Tl6g^Gp?rDX0n?sW<A@%HaP5T<4r4=gGdW~fV{}s0M>)3LJtNd`LZI|
znH9c?B!4<D+yu&*Eg@QG1yRu;jCD3UTI+%Gha9bK$T%PoEZA<jmE^5t?H0bWf^BmX
zTh7)u%A6s}kwt@24zuZ?gAE%$M?Wi>gR5b-<mgc)zynrm<jEC@QVDn^+hIG{0yka;
zqr+~&-wj}VLm?#EZzd=e$r%~aM-O60LG&2vO?F#&LlxyT;oY}z9d!KGeuG*`w3DD#
z)Y782G~p(Lr1e4LfHz0BnfwqLMk{J)dJBIdX!@mk8&Vc@(AT$E>Z5%PdyI{R1_H4l
zbXt?c+<=;j4b`E+a)`AWo7p`6XfP~%*^UIRQ>c<BZ_Buq>x5mvP#E4+yHeze<g_&D
z!z%1XytVjD5#>T5OWq`mh7Pr|Mw`PL3M5|?BtsIjQ8Ls<_#;rAT!lP&Q`)Vn!=Gm;
z<TsQqg-ns0lq7WpTLg+r=r2C15@^qC6PR-$5Uzfk(Y{}QqwZ)oD4rQFPp(S3mEuSR
zLm^7#kK7lBRen@~*>8oQkN&7AmC>GAmG59Xa}OC?AeyupO;%{K5QBqP06jXf(Fi9S
z>n#pD_FwcByC=H#%<3BinEf8&9?r_fr$h6Gf^3zcFZ#^=MjHw~woHA!#nQqW&B03o
zoVddVMVV}RT=b0>quN;vTPMP3K9q@730+fAbbkQaTBupUMr*6l+{j<1(A77H3Lzi0
zNmNcSc7UKh6WXE;TIL+g5Ga2X1*#B!UK>z>P+N_S!B&oJ5rNpgpS=MtcoEuaw6k_o
zE2~GRU}dmBwHs~v1}h5(;S!piNqv#^k&z6AC`G+wCAyJuWk?mt>()tK4!#Wr!;8TN
z*YZ|lNNxUWT$FFjIXLSkcSD?oo194ZK-@*pfxZSyKrt<7!TKNzp|ki~9Cfx1w3ISF
z3H&D?!T@1_FhCd}3=jqg1B3y>0AYYIKo}ql5C;DG7@(waQU&V&%b35BFyCh$W}0Aw
zd<X-C0m1-bfG|K9APf)&2m^!x!T@1_FhCd(%z#!#r8A+G6$Pmjl~jyY78Df}73CHd
z<`%5i7Zy|&7FQM*GjUW}GCwb0npcuna6l?ccij<MQL!g;OJ!Z%*KDvBqOR`#L#(;3
zZc|$uELSkr)$Ou07+dP<?y*{$t;SY3eov=;zXi?+=Ea2tAkttpwb|iVTc@41*)wzW
znYp>JxBzYBW$sl&jbzOG66Ss8&&(g0QNa}?ClUq-1B3y>0AYYIKo}ql5C#YXgaN_;
zVSq63B{HC;(xst+2bq#elA>`3N<*bd`9X#_4VA764KKulrfB^?d=eNRlSO;!bM(XX
zPI{s4L){N`&ARnkpY~O4yS6|xq4|mCA<a_tuhidCm#St|Z>U_VZAwnrtF$VM6jO>b
zii3)^@+<OZ<$LAn@gK(jAih36RrZ1GQP~E{OZ|x2OD&0eEADvQ#yExa=TfJ%LNXzF
z8WsG7pS!DN64%PDm36n7?6*7W>l!T%cp|j5B^Mq>%Wdlnf3HKM8#bI8+FGmzD+}M@
z`AvAn4n0i>&se}?TsC+@WLp&gGU5Tv7DI#8VzU{VOm=|TS_v>Z@i4897Q4w{N6(Z1
z%9aX1(TGQBXffJsrX~|>1&~eU0HP2KVsO|^EjCONJdEQowg5y~86|NDMnF^}c@V5A
zg|ek$B~6D6c88e-`P)lCJ}CzIHlx*UGPW2Fhmdxy2c&etNMRO0mszYZAzO+8O|)Sm
zv7mB0iU1^8uu43w5g;Am+TB?QP#J=uZdN-03s-Mz0l<m&-YCtG@$vy>KE~?^8!r!#
z=4ZT)+1hQ%1+@7XuLCN#JqJJ%1T#Ksyg7lQd>tie7Y_p(RIQ~XT_Poo4F}Nkt8JZ|
zvq628VChJQ342LR7GMZ(+&o4@i^FEefoWSN$cqfMI6k%;I}Nii+t-M<gvr`!XgAsS
z8(K{eD9z0%@>j2>Bu>GMV1ptUK2mL4CB8C9e2zP8SqU&AeG|b%NBk84B0AzDn6Pcj
zmWz(~AP+3o=%H-kP7$QhbSTW@mW&t(K=L+6eY4f#XhWvlz6?M_Hck@)nPZKu>3}0L
z4q}`rb5t&+Bu4~07T-+ZVOw+uusIDh5Zz9(4q;oCq*9WDBFl%O%Tq*0CR8p+-nRsl
z&K6uE_JPoio59A~4K`zI8$2s*Yl9y_KtIm`m|K&@n_?C-ba%INF`%TxsF{_uJFI4d
zy|WG9Deg)FEa4ZW=vuZ~8rc>@Bim$z8$RT+w=V)Tkr&Hw{n(BMwhjMG0U%W;ijHHA
zt)L5fx*DVxikAjSuo}L5PJmhn59z$Gqv9LjJ_~+8z|?@ezIFk?P~yxK1+a+$0MQY!
zzNIZ90@9)*AjpIAg*r-N5}-RGoTsf!E2b>?-Ge6NO;A>uMvS1rU^1KR1_KgQ3lj{w
zm;nsLx9w6DC9w&q9qx!B!5}N8O0jam>q(hHtfCM!4nYko<-(#a?}O0$LqR7PZ1UqN
z2`iwIL0LqnOstlCb1R}K$pHZ+h3x=^mBxvsGT0d_lnQgqpqoIqdqH&^q*CS#>Vh-O
z8C?lePbV_jx{v5P=tr0nx~EldYR{^+%a>^jRSzqA<3Ex;M!gU>Abmr(Upu4i&`fH6
zsyU!ZSHGwJwz^E^S4otwDs9SQ#k9hzSSKHsKPTT8e=@!yK23H(RzrPG?W0oSTydM?
zl+xqUO39?;Ur@nc{JBTwS}F2&;|u%$)!St*CI-MccMetCWG-C{Fwy=0%B?b&CI*zK
z+mDLPGM7R)2;UwMN#Q$)@^Z={906bHf-YB9D|1OjOF~DIzpF0YA#)|gB0t+_=aEY8
zl)2J{BZXOr=+s7Zkk`j#m`E&W$HhBku4LgV@w7&O%=Z6_?vlZei3o@z2*g%*v)V=W
z{|jUC-YCtWeg*S2Ubz3CKY!yz^#AkbYdqBd&%I6NN)XQYtnuaqiku3nJq8%iU|luU
zB~~)r|6jXB=28ilj&zuCRLB<BxOt4(j(b)Nq4p;I|I8RHf%>|0`~Pb;P)^~DV1ptU
zKEAAup)yE(jytT1!8Z|1RK#ByQ^ZFwVcV_{AMrsRymGlXrwGy*(f`-SLcrYqe?}}W
zag+Z4vKZpvjs5@hD(Z-E$Ksm_-~V44Q#+0B|EHBw2gQ~TmP{2NnfN|J%Kb7|w(t_M
z4}|-&x9I;biNO@Jn4$iE@*Of)O01gA>HjYlcTtM2<qiG+q!=!i;rd1O{}+jmV-fxT
z#2BP!JF*MKMM2)zZ|MIgY?Qeuk!FhO|1XFs0!H=!8SxP?*wdwpD3cJ~Bl`b3QDsH+
z|Fxn7Bl`aufr8<FXQ=<LE~9LMYB#6<uM#a6yq+jUE6Vr(6#}C!?}JhOe|Z7L3aMmJ
z7Gh<*Xf65vzpR)#Af%+Q9iTAkFHs3SX8M08T$11a`{L0Da+EMY7$6J~1_%R$0m1-b
zfG|K9APf)&2m}Am8K7XbfDS$XFJ)#V@Sl7L1B3y>0AYYIKo}ql5C#YXgaN_;VSq3|
z82Bq>fTDCNH2=R*p^`9O=4XtSeuh3yAE1@GU+G@bwd*XpYVDBrm)i5%f73ppJ)$kr
zyshceJgw=_+@&egWU6P>pQ%4o|A+cV>c3OBs<*0h)yXQK>i4P_RoyD9>LJzrs%llH
zO067M{#N;#@}%-HrA4`0S*BzZ!-`)hdK8Bh&5C;z6^b>W0QnFG2m^!x!T@35%U~dF
zqfDwQ-`I0`a;kdJHr75dUE%i!hP{64+4J!UwQz7%ZO;T8x_s?)U|(KxQVs0NsexUm
zu1_C0%}w`rcXwN)NgL4a4d!>oI-eRmAGkI&=DTF=)Ff0RfNCCKl<TX@E2@HBO;tUY
zgC`#G9dp;^<qv|;r_Z~G$0w28z-0U7Yj;myb&qzr4SC(Y$(0~eP<iGu==jPjW7jTv
zy0~L!>qg=hRltF@73Mw1o*J8+zBUw8yPzEQ)|Z?6x%L@fAkWM74!FJhM*5%VwXpu)
z6I`doeYCTC&-A&`_iLv|@9Q1fy`~H#?kd}N0C0-Eo{N@I@4%&r6Q|C)M-9z>zrWAh
z1zK99>7{U9bt$k_Jvdq29(ebt|7^hTJL2_Yb&^Zqa6w7Wlfc5*$zR{?_Z{>2j(y|A
zp5*oD!1^bz_w`%42c8%_H*x9G`&{me>cnC=kX4K%YMwVZp_rV3+_WOtSzgpr0%Z)^
znmvE0x#n?m-u}`20jp+dAsnnMEFJd@d&ghq`hOG1<0ke^O<4b%JFz=Hu>j!}V7#-Z
z-9RqF%SU+mLA>HQcxid)U|tZ<divONJp=ikC-U>V0~XWQ5_1t;ZZup{4(!ay!4^E~
z{}l8<WGzlwhjy>)DXATt{3qa$x1D?~?55XFtjvb|_UzK}Q7>@t<I%T1;9eZyF8Qu^
z8%8dj2n2G+4-KgoWx+{nvq~qsJVO(;95)4j98;7Ddz&)Nd-fW@J{!EgYkTqrFOK$e
zqi+AusMmV-2dCkSf864^Wc@~ylY>mUWDUp^t;stO0H2xsW#H4RKIGERU8zc44M$2>
z2iZ93X>|5@{k_-=Ez(7+Q2DERo*bM!`m%HQoX68Wa49!wCG5>vS$80Sp&Y!mGgrXw
zdsdVVyFGoq15V$e;pco0O+l5qoKuE=Lw~;C-}{rH@vivP<*3Bv`-Wz~ExzyPRk_q;
z@VK4ndN@|C4{9@Z_LcU_fgUGRVLIsfOETbaQ3g_|y7tsrr@xPzLiM<$NLmI*a+YCL
zF9)W<1#y#2N=LiXdrF*7jZOU_@ETt+^-|cqc<Dsnj7O852HOv$l@2>cy(9Yq<3oK%
zx<{@Z-W%Wy{mxNCpqm?T|JghGexK*-bSj*>EcM-5=fLQ`KyF!53PMi_qQC9G);@{}
z%31=u?^|*va5Qjod}@QwcV+5|`|V!0+xhOyB(x9z$SZFAsm)A=Gw(@0)6Y%ydnTuz
zsxp|TMy&(f2`-o06WrVr$Q|`|r7ngOOBb83`(~gKz3v@3vaG*#=uGR7b7WYT1jm*p
zo#`K&oSGcVXj~L8Rk?0oWWH|Ujz0O1S4Up!ogSI-`Hux=Mt!wz?v%6U6lde6fVn}x
zm;0D|*WwBIoUizXhlZz`|NigFJiRH;4-fbKdi_<c_vnVvlfIc&ZhC5J$~zfwdpT~>
zGwzvbzbz4!LMNK1F4X09JKdvhZp?Yzd2Dcy8yxGY7#r+*XY!QqNn4=GTQYd++-1(|
z898~HYd_LH*){6%*`6928TJSK=a?l6LHnYG=83VX$tiBe8K~ug|D1^la3m|?qvAuJ
z^gDWbPkFOnj@!9l0qk6{VE1J8#n*g?U$hM5YZ%zGgsB<$+8y`SE}+q2x~F>3;^wSp
z>++HfI;3cg?!w_X$??7C7D^;nCTUO4+vx4=5=r|JZYSP&sf*u`4DI2!Uhlo&wtgCT
z0w0%jdHQ^(gF<LWU)ihe-d9?CId!2Hifq)LVVf@BHFeii4VOQ9VP>HHt&g*9rz}%X
z6?s1#ow#%AUyt5@%riXB9yv2}*FL!hL}*Q1nHusowOJl4BA6ckjPtm!e89QAM{3{f
zt?jHGVFrhWxaUXwN3Q+)9M|t2_U0~Afnbda1fM<k&#%1qV1fEU_zAby{l|u`@7VjT
z!<teh95*Zbiz9&l#Ch|5eWSjb>4{@A$2zzdJm*KfpE$U7r*oa;vj?Qd+q+*dDiB$l
z701$QpOS`UU;m8rIej{aJ)<)LZuB$QxH@v=w(t0MyFZ=$^OSSsV_&VKd33n5Yh-8|
zVtt?gS?4dl(>LOsKII8~opU?gAB=zP$8N8evA1gy<)B)Y{KW4kucmbuf6wb54RG$|
z@vw7i{Pe`c%%#b*?Mz37cW>wL#k)uQ+}GDlQI~2bq|Gv<+Fi1%DOj(KGZ(pOjz-%v
z=lf27@Zx(LpK-mJ`uLlrsgG~xde5GIk2~~u!2KTgV%hY?Kf15wzJAK@_kA$*kG+M#
zdVLd0<~(@2|L`;1_~Y9zzPNj&X;A7~PeDl!@27e`*>#-THR1Dr{BC=9^UEI2&80oQ
z{h9anuTTB^?axSl_sHW1{!0rx(uRNgTiB<2+{awKdNmIqM}}Jmuk5%_EqTdz@ois{
zMB?>%_eqWqk8ddd<dckG`LFwbcg*X(dST+|k?B4-#nb=sLrv%X{qC^~Lp_Jv+ohj-
zVK4f;e%0gmpBtVU4vd`t^t!|G#P4c$On?5ZcY3dK-T=oXotvJ%=*}yO0}7ksM&A5x
zb5IP@<A36Y`d&1Aa?CSu@YI<9r6bOtw&(k+4qZ9yu}r(&y$Y#JrCK;0FGKVH8|lpw
z`0c+xGvmx<W|a9Y^B(gSj0T)%e#-PP&oe({{)zbq=G)9SnJ(rKV`26)M&@ss`<Y$L
zHl~ItV+xpTW(AYREMjzwoaX4y=|9n*(Emk$On*SXOaFp?ojyyyO#hgEmOe?JpubB$
zM!V?4w2f||S$ZG+b^0E92Pi;3gaN_;VSq3|7$6J~1_%R$0m1-bfH3eCX8`r;r5iWm
zthN?sH8nWfumNY))i|rF!dYb{&MGQ!R$h*?vND{Nmg20W1ZV5l<E*$CXGKLgD=frW
zK>^P4^Kq7!hqK&VoaN-;Y~4DXtzC<=>};H6W#KF{6K8AI;B573oUK}gvz04xwqga&
zmM_PdUXQbk44f@nhO_i^oGo37v$QmvrKaL6B?V_omf$Qo8E1<Z<18r&XNwl$ED?{3
zNEa@|-x3mVwqOCy7zSrFjWeANXId@JG#Z?#)i_hBaHdq^OrgM;T#mE&c$~>(IHM?>
z#l^{B2msIjOBQ^^U5uzl7$6J~1_%R$0m1-bfG|K9APf)&2m^!x!oa*3K=c2k|35DZ
zi7;V+FhCd}3=jqg1B3y>0AYYIKo}ql5C*>T44~)#H__+N`v2?9pO`Dm81tXZFgyeB
zYvx_%m(0%@H}e`i2k;W}W9DC(r<orx-S8~H<IH2sBg`?Tld;3|01q)N^B}W_+0EPw
z&jj4YY-Tnv<xDY?2hRnpVltRiCW%?VXc##orDtd_JwcDte}v})M(7Xe-_V2fTl5?7
zjKI0CJXJ(N!T@1_FhCd}3=jqg1B3y>0AYYIKp6OP7@#)6@7GfsQBsSN8kB56Ni|BU
zP*RDK3Y3(iqzomcC@De7dXyBSqzEO2C@DZmK1%XXl8cfYl&nL^T9jm?Bnu^(C|QG&
z)hJnol9eb~fs*AY(W4{-CCgBfj*_J)Nkd60N>Wg=1SQERS&Wh-lq^C?B1#sbBmpH0
zP{N>uMu`q3T9jx|qDF}dB}$YiP$EZ3JW6CJp->Ws68!uBaY~6^`i6vg7oOWcuPb5d
z=|m=5_Yr*u{RneH_q6Iw?OD}!`7&*x>S0B1{716Ks2Ab}q;Kf<YiG0_nn}%1H3u~5
z>i5*&R+p*#Dv9z{rA=9^m{wR7>*V9|=j8k1PsTUIr^znJYN*eteN;-ED{fPqQhHok
zDVdb~%U^aKICrIx7bv3FuiEPHOIXpX?X4_)H?<n=tf9$jX*JkFZ-}iH_Xg82;GFk3
zSH<xDMWh#gzqP?`WsP=t(_>`}FGEHsQE!8;i0NIZSQ(?)VQc|C%Bd8|&ji0+KEw$K
zmc{TkX!QQh&|9@jV|XhrmTxm!?IvT3;cy74MBLkY`L}}3!3w_~dp-5J<a?ss4(b)w
z3&<(nDRU*y2c#ohyP~^ft_<N&H>(|hg{xO6yD-1%bsUEJ70lOo9U<f8&);|*v$e~U
z?~}@elf}Dpgz-9{a=9_QVjO$ZoIsHy{w>=G1~gb#O?8Q_g|Xp)v6%(owK2U{99JOh
zCE4N{H}7-}Ee@NVwSo>=ifQRLMAiVu$97{Ugt!n)W(<}<@49a{+4mb-O*Wg!+-w7N
z)`)-eHG;7_%!ts{F<dv0_#Agw6+^^FFi{bIWlRwt!GvwQLgkn45*P779(Y(T&M9!?
z!mq36dDO>3fawri!5#I@R*RzznKC05mp~wMtg$SHt2nMqlsVD`zKI%fVXNbtNxj8_
z?f{m?)K0MuVOyq^QU`@Mvtap9bSm6@{YIn<0EZx%a=*-#ExbhR1L1es+gQ87W^8S1
zVGTBjH+DNTPrzIfgDGY)Lw9${cgS2Rv1(>z?GCHiVDD^Wk&`VJcTtM2Wviu;Z80>m
zO-4tH9ib(~aIp;6kL_q++w3Nb86hnaAIBP7K^LRl2-1l$NP{F;4VAr6TomMe9TndI
z_eNH>(bRyvK4GKGMTs<16u^R*B4B+>+iV161isQe*lMwLK^~N+izt&2-4WqDZ8}k9
zHCW6|s5~f3D@xE{Fqutug8>9J0tLhGoj0JE0)pzl^zwN}eGA*#Qa39YWJM)fE_gjr
zidK}rJyHmay1WlsSsZkN!A4#{u|iB3j+ODEwd9+dte83=q@*yNU;-GYu!)i}KxYo*
z;tDB~N<cu5a1*rtU#~bSVg8-5GCA}g=u@<TUZeZH?nzypZjtsK?f101wY27!8mFd0
zeO3K}+Mv!*T~<A<+M`;e{F&0JEKvMj@eBZv4`F~XKo}ql5C#YXf*4ps-TMVsdAM_Q
zpu5dY;_uhd&@SKG-Y9O|5Z~n@aokCRKAO$IcEDhc+1S$33Ch&Y0}lECW{1gUG&itl
zloa(QYQ%MZ!`fNdHbaBeVnY}k?vc631OcA-h?xF}8lR{Tc=L+*oy7odze1z?F^n4F
z!OZ`w@%R`<E28xI-yT4pFF$_`L*C-XCU7(%!_9nmKBDd4INU5gevq+Z>|Qq+D;7T;
z#1G8;pA5R9_5an9rzOlg%oFgt{#md-{~Y}=y^~(3`%w2oU9)bz)~9_{+paCpOlW?h
zc}TNV{VVmi)TOE!)f+08YMYW%_A0H)BE^*AjN+hTt^A7oS@~Xhdi;m+KZvi7PnCTj
zdsMc8@=`yd_EJmY-ikXOw=qs3{khaBt&mJW!GGy<cePC7S}9;qBf^2v4a_a)|F=~E
zAR``d&dkBqN`TRchl!s5-%<f68u2Jmm-kKO0HP2K!jFd`lEO0xWw0{FAs7L>Hprtp
zxKb$l3+`e=BiPaRC$sbax0ir?lGu_XrfS18<h#}bQo3NIFbh#wVM4YP1Da^VL}CFs
zJBk1#S+Ghxtq~xz^Z$1i0#t@zsGHR;a{hm70l<m&-YCtWez)fX%6yC$p8wyK2T1cX
zUc~(WmRvxakMYpW(DobvNf6BVtnuaqigH*u)-E0fG^m2rW?dpB!}I@}vq60!gTcYL
z8R;<LMo^Ol7{VJjk1;!OzbzBwMTXj&+>mWwBi<6|9%k<Re>E)9bBf~=!SFrlZL7pr
z28qvchb=1sMx<{dnCOVV0zgDZd;}A=Z5gai6dCbB9=x(f4`mB?3fwO6Z4l4nmW&t(
zm^=T!eHnm=Y@Aq$Io8;k4mcv?;Enfmm9U`mh+xO!n+ZSvzc~#w5Zz9(4q;oCz%tT<
zBFhI$mcznRk&y|N3zGLO0j0AAmxz5JbSDrTTDo!me`~UMQ_Nz9=Kpst29%T-HJda4
zzbgr_gkO}RYk9-`|LuzaP2|NgT)(LK|7utgEHaKo%>P%zT4J%%vlH3d5}+2sLpty4
zH_ZRnE&v!xoSCBL|2HuJAUXm@&HtCd0&0;FFgVj)2#c;w0(6g<|1X0j+QQ3<nEx+>
zW!=IBBPMpsU@5o|!Jvy7U~rai+oiC4+$N-U7Bq5z1cR)U!isZoa>46K87xv4rzk)F
zUkMA_g+yK62czcy^I<(aE1;4=S%`RLuy$UYmi+vG5v;dAAfTkM9iXsMSe`GA%3x=#
z5LWaHam)xeLH+-D`m6-r_SecRqkjjR<U<%B3=jqg1B3y>0AYYIKo}ql5C#YXgn_RP
z1FJVtl6!C6uMVxAtA^hP5INd~CPw+O>N{&;lquuZrG&=kqGva6xq9x-n%LoPvU+aw
S27nVjbd1L*QRO06&;1W$loSI1

literal 0
HcmV?d00001

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
index ccd491ab..c4aa5971 100644
--- a/doubleml/__init__.py
+++ b/doubleml/__init__.py
@@ -5,7 +5,7 @@
 from .double_ml_irm import DoubleMLIRM
 from .double_ml_iivm import DoubleMLIIVM
 from .double_ml_data import DoubleMLData, DoubleMLClusterData
-from .double_ml_blp import DoubleMLIRMBLP
+from .double_ml_blp import DoubleMLBLP
 
 __all__ = ['DoubleMLPLR',
            'DoubleMLPLIV',
@@ -13,6 +13,6 @@
            'DoubleMLIIVM',
            'DoubleMLData',
            'DoubleMLClusterData',
-           'DoubleMLIRMBLP']
+           'DoubleMLBLP']
 
 __version__ = get_distribution('doubleml').version
diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 7277eaf5..c49730e5 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -6,8 +6,8 @@
 from scipy.linalg import sqrtm
 
 
-class DoubleMLIRMBLP:
-    """Best linear predictor for DoubleML IRM models
+class DoubleMLBLP:
+    """Best linear predictor (BLP) for DoubleML with orthogonal signals.
 
         Parameters
         ----------
@@ -17,11 +17,16 @@ class DoubleMLIRMBLP:
         basis : :class:`pandas.DataFrame`
             The basis for estimating the best linear predictor. Has to have the shape (n,d),
             where d is the number of predictors.
+
+        is_gate : bool
+            Indicates whether the basis is constructed for GATEs (dummy-basis).
+            Default is ``False``.
     """
 
     def __init__(self,
                  orth_signal,
-                 basis):
+                 basis,
+                 is_gate=False):
 
         if not isinstance(orth_signal, np.ndarray):
             raise TypeError('The signal must be of np.ndarray type. '
@@ -41,6 +46,7 @@ def __init__(self,
 
         self._orth_signal = orth_signal
         self._basis = basis
+        self._is_gate = is_gate
 
         # initialize the score and the covariance
         self._blp_model = None
@@ -89,15 +95,16 @@ def fit(self):
 
         return self
 
-    def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
+    def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
         """
-        Confidence intervals for BLP for DoubleML IRM.
+        Confidence intervals for the BLP model.
 
         Parameters
         ----------
         basis : :class:`pandas.DataFrame`
             The basis for constructing the confidence interval. Has to have the same form as the basis from
-            the construction.
+            the construction. If ``None`` the basis for the construction of the model is used.
+            Default is ``None``
 
         joint : bool
             Indicates whether joint confidence intervals are computed.
@@ -138,6 +145,20 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
             raise ValueError('Apply fit() before confint().')
 
         alpha = 1 - level
+        gate_names = None
+        # define basis if none is supplied
+        if basis is None:
+            if self._is_gate:
+                # reduce to unique groups
+                basis = pd.DataFrame(np.diag(v=np.full((self._basis.shape[1]), True)))
+                gate_names = list(self._basis.columns.values)
+            else:
+                basis = self._basis
+        elif not (basis.shape[1] == self._basis.shape[1]):
+            raise ValueError('Invalid basis: DataFrame has to have the exact same number and ordering of columns.')
+        elif not list(basis.columns.values) == list(self._basis.columns.values):
+            raise ValueError('Invalid basis: DataFrame has to have the exact same number and ordering of columns.')
+
         # blp of the orthogonal signal
         g_hat = self._blp_model.predict(basis)
 
@@ -167,4 +188,8 @@ def confint(self, basis, joint=False, level=0.95, n_rep_boot=500):
         df_ci = pd.DataFrame(ci,
                              columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
                              index=basis.index)
+
+        if self._is_gate and gate_names is not None:
+            df_ci.index = gate_names
+
         return df_ci
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 69d41fcd..aaf08b26 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -1,11 +1,11 @@
 import numpy as np
 import pandas as pd
+import warnings
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
 from .double_ml import DoubleML
-from .double_ml_blp import DoubleMLIRMBLP
-
+from .double_ml_blp import DoubleMLBLP
 from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions
 
 
@@ -334,43 +334,40 @@ def cate(self, basis):
 
         Returns
         -------
-        model : :class:`doubleML.DoubleMLIRMBLP`
+        model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model.
         """
+        valid_score = ['ATE']
+        if self.score not in valid_score:
+            raise ValueError('Invalid score ' + self.score + '. ' +
+                             'Valid score ' + ' or '.join(valid_score) + '.')
+
         # define the orthogonal signal
         orth_signal = self.psi_b.reshape(-1)
         # fit the best linear predictor
-        model = DoubleMLIRMBLP(orth_signal, basis=basis).fit()
+        model = DoubleMLBLP(orth_signal, basis=basis).fit()
 
         return model
 
-    def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
+    def gate(self, groups):
         """
-        Calculate group average treatment effects (GATE) for a given basis.
+        Calculate group average treatment effects (GATE) for mutually exclusive groups.
 
         Parameters
         ----------
         groups : :class:`pandas.DataFrame`
-            The group indicator for estimating the best linear predictor. Has to have the shape (n,d),
+            The group indicator for estimating the best linear predictor. Has to be dummy coded with shape (n,d),
             where d is the number of groups or (n,1) and contain the corresponding groups.
 
-        joint : bool
-            Indicates whether joint confidence intervals are computed.
-            Default is ``False``
-
-        level : float
-            The confidence level.
-            Default is ``0.95``.
-
-        n_rep_boot : int
-            Number of bootstrap samples for joint confidence interval.
-            Default is ``500``.
-
         Returns
         -------
-        df_ci : pd.DataFrame
-            A data frame with the confidence interval(s).
+        model : :class:`doubleML.DoubleMLBLPGATE`
+            Best linear Predictor model for Group Effects.
         """
+        valid_score = ['ATE']
+        if self.score not in valid_score:
+            raise ValueError('Invalid score ' + self.score + '. ' +
+                             'Valid score ' + ' or '.join(valid_score) + '.')
 
         if not isinstance(groups, pd.DataFrame):
             raise TypeError('Groups must be of DataFrame type. '
@@ -380,17 +377,15 @@ def gate(self, groups, joint=False, level=0.95, n_rep_boot=500):
             if groups.shape[1] == 1:
                 groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
             else:
-                raise TypeError('Columns must be of of bool or int type or the data frame only should contain '
-                                'one column.')
+                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
+                                'Alternatively, groups should only contain one column.')
+
+        if any(groups.sum(0) <= 5):
+            warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
         # define the orthogonal signal
         orth_signal = self.psi_b.reshape(-1)
-        # fit the best linear predictor
-        model = DoubleMLIRMBLP(orth_signal, basis=groups).fit()
+        # fit the best linear predictor for GATE (different confint() method)
+        model = DoubleMLBLP(orth_signal, basis=groups, is_gate=True).fit()
 
-        # reduce to unique groups and create confidence interval
-        unique_groups = pd.DataFrame(np.diag(v=np.full((groups.shape[1]), True)))
-        df_ci = model.confint(unique_groups, joint, level, n_rep_boot)
-        df_ci.index = groups.columns.values
-
-        return df_ci
+        return model
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index 5d60cf2f..2f125d50 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -26,11 +26,13 @@ def dml_blp_fixture(ci_joint, ci_level):
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
     random_signal = np.random.normal(0, 1, size=(n, ))
 
-    blp = dml.DoubleMLIRMBLP(random_signal, random_basis).fit()
+    blp = dml.DoubleMLBLP(random_signal, random_basis).fit()
     blp_manual = fit_blp(random_signal, random_basis)
 
     np.random.seed(42)
-    ci = blp.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
+    ci_1 = blp.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
+    np.random.seed(42)
+    ci_2 = blp.confint(joint=ci_joint, level=ci_level, n_rep_boot=1000)
     np.random.seed(42)
     ci_manual = blp_confint(blp_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
 
@@ -40,7 +42,8 @@ def dml_blp_fixture(ci_joint, ci_level):
                 'values_manual':  blp_manual.fittedvalues,
                 'omega': blp.blp_omega,
                 'omega_manual': blp_manual.cov_HC0,
-                'ci': ci,
+                'ci_1': ci_1,
+                'ci_2': ci_2,
                 'ci_manual': ci_manual}
 
     return res_dict
@@ -68,7 +71,14 @@ def test_dml_blp_omega(dml_blp_fixture):
 
 
 @pytest.mark.ci
-def test_dml_blp_ci(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['ci'],
+def test_dml_blp_ci_1(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['ci_1'],
+                       dml_blp_fixture['ci_2'],
+                       rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_blp_ci_2(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['ci_1'],
                        dml_blp_fixture['ci_manual'],
                        rtol=1e-9, atol=1e-4)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 47aed3d3..5c2b3b09 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import numpy as np
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData, DoubleMLIRMBLP
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData, DoubleMLBLP
 from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
     make_pliv_multiway_cluster_CKMS2021
 
@@ -664,19 +664,24 @@ def test_doubleml_exception_blp():
 
     msg = "The signal must be of np.ndarray type. Signal of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        DoubleMLIRMBLP(orth_signal=1, basis=random_basis)
+        DoubleMLBLP(orth_signal=1, basis=random_basis)
     msg = 'The signal must be of one dimensional. Signal of dimensions 2 was passed.'
     with pytest.raises(ValueError, match=msg):
-        DoubleMLIRMBLP(orth_signal=np.array([[1], [2]]), basis=random_basis)
+        DoubleMLBLP(orth_signal=np.array([[1], [2]]), basis=random_basis)
     msg = "The basis must be of DataFrame type. Basis of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
-        DoubleMLIRMBLP(orth_signal=signal, basis=1)
+        DoubleMLBLP(orth_signal=signal, basis=1)
     msg = 'Invalid pd.DataFrame: Contains duplicate column names.'
     with pytest.raises(ValueError, match=msg):
-        DoubleMLIRMBLP(orth_signal=signal, basis=pd.DataFrame(np.array([[1, 2], [4, 5]]),
-                                                              columns=['x_1', 'x_1']))
+        DoubleMLBLP(orth_signal=signal, basis=pd.DataFrame(np.array([[1, 2], [4, 5]]),
+                                                           columns=['a_1', 'a_1']))
 
-    dml_blp_confint = DoubleMLIRMBLP(orth_signal=signal, basis=random_basis)
+    dml_blp_confint = DoubleMLBLP(orth_signal=signal, basis=random_basis)
+    msg = r'Apply fit\(\) before confint\(\).'
+    with pytest.raises(ValueError, match=msg):
+        dml_blp_confint.confint(random_basis)
+
+    dml_blp_confint.fit()
     msg = 'joint must be True or False. Got 1.'
     with pytest.raises(TypeError, match=msg):
         dml_blp_confint.confint(random_basis, joint=1)
@@ -692,9 +697,12 @@ def test_doubleml_exception_blp():
     msg = 'The number of bootstrap replications must be positive. 0 was passed.'
     with pytest.raises(ValueError, match=msg):
         dml_blp_confint.confint(random_basis, n_rep_boot=0)
-    msg = r'Apply fit\(\) before confint\(\).'
+    msg = 'Invalid basis: DataFrame has to have the exact same number and ordering of columns.'
     with pytest.raises(ValueError, match=msg):
-        dml_blp_confint.confint(random_basis)
+        dml_blp_confint.confint(basis=pd.DataFrame(np.array([[1], [4]]), columns=['a_1']))
+    msg = 'Invalid basis: DataFrame has to have the exact same number and ordering of columns.'
+    with pytest.raises(ValueError, match=msg):
+        dml_blp_confint.confint(basis=pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]), columns=['x_1', 'x_2', 'x_3']))
 
 
 @pytest.mark.ci
@@ -709,6 +717,34 @@ def test_doubleml_exception_gate():
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.gate(groups=2)
-    msg = 'Columns must be of of bool or int type or the data frame only should contain one column.'
+    msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
+           'Alternatively, groups should only contain one column.')
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(50, 3))))
+        dml_irm_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))))
+
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATTE')
+    dml_irm_obj.fit()
+
+    msg = 'Invalid score ATTE. Valid score ATE.'
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.gate(groups=2)
+
+
+@pytest.mark.ci
+def test_doubleml_exception_cate():
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATTE')
+    dml_irm_obj.fit()
+
+    msg = 'Invalid score ATTE. Valid score ATE.'
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.cate(basis=2)
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 52c9aeae..8609c55a 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -124,7 +124,7 @@ def test_dml_irm_boot(dml_irm_fixture):
 
 @pytest.mark.ci
 def test_dml_irm_cate_gate():
-    n = 50
+    n = 9
     # collect data
     np.random.seed(42)
     obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
@@ -143,12 +143,24 @@ def test_dml_irm_cate_gate():
     # create a random basis
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
     cate = dml_irm_obj.cate(random_basis)
-    assert isinstance(cate, dml.double_ml_blp.DoubleMLIRMBLP)
+    assert isinstance(cate, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(cate.confint(), pd.DataFrame)
 
     groups_1 = pd.DataFrame(np.column_stack([obj_dml_data.data['X1'] <= 0,
                                              obj_dml_data.data['X1'] > 0.2]),
                             columns=['Group 1', 'Group 2'])
-    assert isinstance(dml_irm_obj.gate(groups_1), pd.DataFrame)
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_1 = dml_irm_obj.gate(groups_1)
+    assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(gate_1.confint(), pd.DataFrame)
+    assert all(gate_1.confint().index == groups_1.columns)
 
+    np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
-    assert isinstance(dml_irm_obj.gate(groups_2), pd.DataFrame)
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_2 = dml_irm_obj.gate(groups_2)
+    assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(gate_2.confint(), pd.DataFrame)
+    assert all(gate_2.confint().index == ["Group_1", "Group_2"])

From e3270816305c0e1ef4e6d208621c2a378e1735c1 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 4 Nov 2022 11:53:31 +0100
Subject: [PATCH 10/15] extend unit tests

---
 .coverage                  | Bin 69632 -> 69632 bytes
 doubleml/tests/test_blp.py |   2 ++
 2 files changed, 2 insertions(+)

diff --git a/.coverage b/.coverage
index c54090593472f4773cf5f4f99e12a7949991082a..2424fd2d3e3d1decbb1d5b054b6bb1c95bb585e8 100644
GIT binary patch
delta 38
ucmZozz|ydQWkan#tFf__spaM-{g7DJ(@%?b#!SB5^MonGWb?sZ`z8P)01!t2

delta 38
ucmZozz|ydQWkan#tD&)#vGL|6{g7DJqMb2%;*)RpJYh;O*nF_pz6k&es1C*e

diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index 2f125d50..a0a2aaf6 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -42,6 +42,8 @@ def dml_blp_fixture(ci_joint, ci_level):
                 'values_manual':  blp_manual.fittedvalues,
                 'omega': blp.blp_omega,
                 'omega_manual': blp_manual.cov_HC0,
+                'basis': blp.basis,
+                'signal': blp.orth_signal,
                 'ci_1': ci_1,
                 'ci_2': ci_2,
                 'ci_manual': ci_manual}

From 4a1fa2c56e0a276e8a6c97097e7c95d428c24cd1 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 28 Nov 2022 16:34:48 +0100
Subject: [PATCH 11/15] adjust to PEP8

---
 doubleml/double_ml_data.py | 7 +++----
 doubleml/double_ml_iivm.py | 4 ++--
 doubleml/double_ml_irm.py  | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/doubleml/double_ml_data.py b/doubleml/double_ml_data.py
index e2d6a36d..c37fa37b 100644
--- a/doubleml/double_ml_data.py
+++ b/doubleml/double_ml_data.py
@@ -725,10 +725,9 @@ def from_arrays(cls, x, y, d, cluster_vars, z=None, use_other_treat_as_covariate
 
         data = pd.concat((pd.DataFrame(cluster_vars, columns=cluster_cols), dml_data.data), axis=1)
 
-        return(cls(data, dml_data.y_col, dml_data.d_cols,
-                   cluster_cols,
-                   dml_data.x_cols, dml_data.z_cols,
-                   dml_data.use_other_treat_as_covariate, dml_data.force_all_x_finite))
+        return (cls(data, dml_data.y_col, dml_data.d_cols, cluster_cols,
+                    dml_data.x_cols, dml_data.z_cols,
+                    dml_data.use_other_treat_as_covariate, dml_data.force_all_x_finite))
 
     @property
     def cluster_cols(self):
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 75d498af..cab8d342 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -206,7 +206,7 @@ def _check_data(self, obj_dml_data):
         one_treat = (obj_dml_data.n_treat == 1)
         binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
         zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
-        if not(one_treat & binary_treat & zero_one_treat):
+        if not (one_treat & binary_treat & zero_one_treat):
             raise ValueError('Incompatible data. '
                              'To fit an IIVM model with DML '
                              'exactly one binary variable with values 0 and 1 '
@@ -219,7 +219,7 @@ def _check_data(self, obj_dml_data):
         if one_instr:
             binary_instr = (type_of_target(obj_dml_data.z) == 'binary')
             zero_one_instr = np.all((np.power(obj_dml_data.z, 2) - obj_dml_data.z) == 0)
-            if not(one_instr & binary_instr & zero_one_instr):
+            if not (one_instr & binary_instr & zero_one_instr):
                 raise ValueError(err_msg)
         else:
             raise ValueError(err_msg)
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 17dee093..66673bc3 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -176,7 +176,7 @@ def _check_data(self, obj_dml_data):
         one_treat = (obj_dml_data.n_treat == 1)
         binary_treat = (type_of_target(obj_dml_data.d) == 'binary')
         zero_one_treat = np.all((np.power(obj_dml_data.d, 2) - obj_dml_data.d) == 0)
-        if not(one_treat & binary_treat & zero_one_treat):
+        if not (one_treat & binary_treat & zero_one_treat):
             raise ValueError('Incompatible data. '
                              'To fit an IRM model with DML '
                              'exactly one binary variable with values 0 and 1 '

From c9fc8b6f6fc5dd7eb9ad46c98d57709c23f7d043 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 28 Nov 2022 16:47:17 +0100
Subject: [PATCH 12/15] refactor for nonlinear score update

---
 doubleml/double_ml_irm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 66673bc3..93438931 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -352,7 +352,7 @@ def cate(self, basis):
                              'Valid score ' + ' or '.join(valid_score) + '.')
 
         # define the orthogonal signal
-        orth_signal = self.psi_b.reshape(-1)
+        orth_signal = self.psi_elements['psi_b'].reshape(-1)
         # fit the best linear predictor
         model = DoubleMLBLP(orth_signal, basis=basis).fit()
 
@@ -393,7 +393,7 @@ def gate(self, groups):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
         # define the orthogonal signal
-        orth_signal = self.psi_b.reshape(-1)
+        orth_signal = self.psi_elements['psi_b'].reshape(-1)
         # fit the best linear predictor for GATE (different confint() method)
         model = DoubleMLBLP(orth_signal, basis=groups, is_gate=True).fit()
 

From 369be3e5731204d31cfa42cb7e567e9ed761d6a0 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 28 Nov 2022 17:06:16 +0100
Subject: [PATCH 13/15] add warnings for multiple treatments

---
 doubleml/double_ml_irm.py                  |  8 +++++++
 doubleml/tests/test_doubleml_exceptions.py | 25 ++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 93438931..9b117f98 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -351,6 +351,10 @@ def cate(self, basis):
             raise ValueError('Invalid score ' + self.score + '. ' +
                              'Valid score ' + ' or '.join(valid_score) + '.')
 
+        if self.n_rep != 1:
+            raise NotImplementedError('Only implemented for one repetition. ' +
+                                      f'Number of repetitions is {str(self.n_rep)}.')
+
         # define the orthogonal signal
         orth_signal = self.psi_elements['psi_b'].reshape(-1)
         # fit the best linear predictor
@@ -378,6 +382,10 @@ def gate(self, groups):
             raise ValueError('Invalid score ' + self.score + '. ' +
                              'Valid score ' + ' or '.join(valid_score) + '.')
 
+        if self.n_rep != 1:
+            raise NotImplementedError('Only implemented for one repetition. ' +
+                                      f'Number of repetitions is {str(self.n_rep)}.')
+
         if not isinstance(groups, pd.DataFrame):
             raise TypeError('Groups must be of DataFrame type. '
                             f'Groups of type {str(type(groups))} was passed.')
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 41cd359c..f40c8ba5 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -752,6 +752,19 @@ def test_doubleml_exception_gate():
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.gate(groups=2)
 
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=2)
+    dml_irm_obj.fit()
+
+    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_irm_obj.gate(groups=2)
+
 
 @pytest.mark.ci
 def test_doubleml_exception_cate():
@@ -766,3 +779,15 @@ def test_doubleml_exception_cate():
     msg = 'Invalid score ATTE. Valid score ATE.'
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.cate(basis=2)
+
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=2)
+    dml_irm_obj.fit()
+    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_irm_obj.cate(basis=2)

From 9a18d0681a29cf7a9ddf5c4941f16ed3ece4b884 Mon Sep 17 00:00:00 2001
From: Philipp Bach <bach.philipp@outlook.com>
Date: Mon, 5 Dec 2022 12:19:15 +0100
Subject: [PATCH 14/15] drop .coverage file

---
 .coverage | Bin 69632 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .coverage

diff --git a/.coverage b/.coverage
deleted file mode 100644
index 2424fd2d3e3d1decbb1d5b054b6bb1c95bb585e8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 69632
zcmeHw3sf7|xvs<ry)|QuF$Rp0pMcF9^E3v(6Fbg3bx*IcRRIH53=-DEfNd-*=~~HY
zPtNJOT`}k6)Nyl?oHVC5I4fyQ92=B%+oY=@2~FDAO$gB5Q=8tzSSi$2KuB}{y)}Aa
zJDm~dtmJlW+3Y=<ng84W{{DaeGy5@`2k+l)va|YTi?z*Y*Oy8bNTgE9CcR!Fk*MG=
z3I0MK1#HMdUm%x8ZQpKFB`NskE*-N+qLrVNF#C0X$K-3D*A;3$)*e><K=Xp)VO1*}
zARod2VSq3|7$6K_23+@R6{%@y(nq@O#)ekbZZ$Tt)?ljJ`oQ+h4{g^!w0X<!?fT$8
zeU4fWpZt9NX1&$Yp>Joc`esurt2ddOOpQjn$=ss1A7DdgJ8Wzdy5hk;xaOe~3$Q%e
zGMJh`%Fea`M7!10X0&$c53*f(_yoQ*JKJdw?f?+h)M7?w=yQ1ST)mZTX05Ebk+lW2
z$T2nLs&gN8suiip$<oJITqmQo5&mmpY6CkWRoU4LK#dk_6VkrHYBV<<U~PGNW4MNm
z7SOZP9;~g^(pG4<=vz%@TurmhWH(vNdbX2oblBOZTUIa+krw&@dC|22tZ$<VeLE=0
zmlefMMEDkx{OP=KGbm@aglLTjqM|_<>uh$kH2~!gJKEZjaX=ziu-$Sy$=k`=EqrAK
z+vXOwjMO*AoFU4QMT1ffv+0n74I4jCACb(%)i7J~^r#Zx0jo9g)T(5u1iX^%v>j@N
z8!v;=VYlG#1~9&%7!vJwl9Y<ntSsqchp?j{dJGLFyDhw-igB9o?%TKyI(~b<L9Ha#
zNl+_lZPi<vag#yP`k-;Zo1@!IVTcT)6*V-yg+CEA{Yt$JDGNI28(J+5vA%{q#>PSe
zfmjeat=VC2L`}tp>d<I8%vy~tYyp2X7#6;2M*`O=R>@PhXWh<q!Y*Ja4sWVmX>vts
zMuzke6?P-uT70I6aiNeUZxKdAhuT=9&0!4%k}nIAA&E$o47Cyd2vjFuAy3_$al7j9
z=NXFm4W&yVQ>3P(NL|4ef#MSSON^-m+7oF4b1nqJ)o(Z259n{!9qk6ivlHd18&Ynk
zI8wn-j8gfd4<ulfA5&oV+aTzpKPpCLv?rqSU2IqWVPh*qlXj!Y3QZPbaPSJCM<+HJ
z;bdcj#bL+(i@sv_MAx2(zCnP<_Yn7RRxUmrnl}_=s}6n9XCE-yQ1G#3>l-YVR@P_^
zUJ~HMoi-@SWYgoKZ@w7SMKEk#2&3h2HdZBcO+nH9A!uu%W(6B<ZANnwf0;s8-ykZ4
zeAFgUIl<Tgg8FP|i#BMPb1*}o{81FBLih#kKm|f=Gd2ZVIkH6*V#fjYCb-~5Xq(Z_
z+D&b&9-V@f!T!`?wCNkIEF6SOXl^$3W!6VVG8Cf}^^%q7M#hyTRiv(4Cv`daHW&;q
z1{++<+l(Q#`L78vzA@)u#7*vnI14v9(e8n`i=qR44VHjnTF`<GK^8)1@wYhYTpegB
zWqcC&Pd<bJ!T@1_FhCd}3=jqg1B3y>0AYYIKo}ql{OvJ7NfV?B)c=<;e<xvnz&ye<
z!v^^f1_%R$0m1-bfG|K9APf)&2m^!x!T@1_Fd&!#t&GZKLMtnZ(kUvX6s;^MDJm(+
zFD}k6TCXoIsxB?9E~{b^sEky8UcS7btf1(iRF>(wE3~3wZ}!&e`ucC!U@b&_{ey>D
zbAA2h_I6mVV63m-ZD};N*4N)>wX|4`ZE*a)F8cusoDs}Ri;F;{(Q0b9!?E@*J8QFN
z=jpTa^I>rT+9=50r-mBIm=7e(2h3lXKQUv1D@aZx3=jqg1B3y>0AYYIKo}ql5C#YX
zgaN_;Vc;udKucvxLjw;oC6yvY;|`RD%8>Gd3<(-4QxzIsNCZvM`hWN&F+L`T_R{C+
zN9bMjLfuEYAL&|j>$N`ZYuXNNk!Di!Q_aJgrRra+zpE}+&8ps1xm4SgoU%`8RhB5G
z6=xNP6l>*I<<H6Y$uko_O8jABLt?t@L)l}pjg*)AF}06clJIuIiG)oF3h6JTPHB~7
zQt}Kc_)9<c*2pBTmD{T8?=ac#bTrgASsd_0Xj^MOJdBp#-WC2{k486aI5o7lS`Ahf
zzQgmI@QfXLnh>6`fXBFO@Px?r4FJf92Q*s@jaG}zW@t9q0cKk@!05!ov^iSsCW9S4
zQvxVks{lnK9;LC>XtSA`O{^6_Hdg|OLNJKIVK=qfFiG$*j>Fgr5ET`a#32{~QH|t5
zu(lk^mWq`$9X8k<W)|e{ECczJION-nR=dgAYB&-?+Pxl-G6f@rSpZ!kSYbl8mI9h+
z!$f02<#v_;NUC6!cv_=CI>WWQyBMId1Vi1bb^sQx-nJru6Yae*njzyA0?K@h*BLfm
z0U*uKc%6~jZOsR?`53PgDz_sKK#~MA9x>jWKvB7ll5~iN0Sz{+r6k=VC5?>-(etbA
zU0ZTNeU)J8Xom@VNo@{b2yfgxMq{hPX2*ePdp5|647E5ub{M-15ttoo#9P8-Z8LP3
z><0{OCJ2<~78LnwR#OtEU`DV(5ey%xwyzRj86-Z(9k#9n7?Hk-U}7Ww3IGuu@exee
zwiU}oM|_Y67HjoTws5Bi(r7vy=5cFQ90VYFo1>w{YH_q9Q|?#>AR-&534zS9#<onr
z5g7+DPK-IKmr|0Wf*p%*Ch)K=x&zpf0UC&Ir&x!uEz8m=$sv*DL(!FKq9YS37bG88
z0!rr!E)n}c=*G=pW9<f;v8^4RmA19Rk079*=K#!Ysp3r$!3^Eq?OF^dX>n?1W$g~D
z*<kN#$9IamQvgf&MJcwHZI&js)zHK?8{vizx$K>b08Qk@GF(5lvypAbKT`lmHOZpm
zSW_G5f}XAh>4oB@K@zNn@1B#O7Q#b1@9U`eM!3&{9}qA#BCoGo05Fs|GsOUGW&l8R
z1Z-$+kBWe_=m-e%pnS28l9&YOjtJ*ztI&!m3x4;Y8F>?wRiP0hXfT+}CcD9a1l7U>
zgDz$O1MzLUTt!K2LTZOQVn{H^O1V<3T=05Qp%AMm1dYQ`!)m#(sLT5x^!`xL2?m?O
zL`uR6sANzU(J2$FCEwgiC`xirKuKXcKw;$xVyO&v#)_rF95d)9(CuDOlK`odIg7gB
z40Bdj#x&5$Os?)@`Y!rW=A`Z!)mz$gsvYuW+G5osioV2;Wsg%YCJaj7)E&^yYCAPk
znxAP7YBJUDtG}nNQ2A97<!eftvQ#mnuqxKcC*;q|_a~l8Y)s6MU6j>QUr_t0v;<ee
z<^-kmgtS^RCHXg0@Rxt?let!kyxsWH{(sF5nTv@7aL%2>hV3$!E)JO3{(tp0nM)G~
zO3dv?)fSmcAsmEn4~V4j9Ykd%<q(d5uXI6|tEiK?q@pFEBgx-Ym+zFhQsR-1^x1i&
zvb$xjOyNjj7NR<}Q61#<aTz8W3)*q%E}1J;xJo>&Q6Q22f5|;E_%RUyaRh<b>TgxM
z=>C6kT;3a_8Pu<6zQzmp{|o1Dyr}+v!F-K}`v3WN$XrRn8IKrmPN2xEqB`P$0S(sG
zP~BoB!~OrYTV*blaOr4=2}gxoagCeDh;-a@;s~|3=>KQOVF}dNo!kFkvypNNX9OD*
z!SL~AbsUvJ;&a?#RUE#FU}7Tv%D5svf(hGph4_dM^5B)r#W_Wg#;E?kJ{|(*_W!fu
zafw^>|ChxP2XF5GXKtX53U@5NnehGprE#^>*#3V;Idw>E`C!R(@sWw|BcwejbL9#z
z5&J;6FMFH*|B^UN5y1@g|5NXhxzgg*Y)=1wvABy;Y%Oo<|EI)pu?*KQrvJZ4d>o7F
z|0l;G9qGs}6c+_~U%#pUpR`HlqC}c0rvJYnt_T>@|7XNUz+g|8E}=|9bdT!)>qM0m
z)&JLu5{&BqYXl00`<<cwzq*35398+k{=Z7JT=06L6s;)V|5pf%y1Wm@^#A2W6f2~X
zL0O2EiK4aS`~R|1>Y$L4!ghedsJ})f^qA@Yns7;e|L@C3AIMR{0AYYIKo}ql5C#YX
zgaN_;VSq3|7$6M%f6f2}s|9rE`F|-hD}n#yLl__o5C#YXgaN_;VSq3|7$6J~1_%R$
z0m8uFAOjSoQ=$3)O$wES@iISWwDhy|3Hl(d)csobvaUmC(bZ^&wZGC{(Ef+^N$pW>
ziRK+mzvdZDr{*3_g(h1)tNvX5k@|nDf2{s{b(?ydI$xct@~QrydP&uzvZ@|dJ*cWt
zWvkT63FYsUuPaX}A6HtGdz2MQMlquJrJ`4HMA4$SPf?{<0}7B2VSq3|7$6J~2EGag
zGB(Mis>)5hSEi<GhHT><lQUI*e_+Jxx1PI@m{bP`SJm}S!l5hI&jj`tq^8uuuDn{<
zb^6B4!86><Ku=GPMVhh^?cQj9cf9NAp$mcQ!{ffo)-Fv_4Fag)0miug`ht=Tu&a4P
z@0H+*M}5cL^#z4PAoSS_?vaTpBsVzKapn5GGuPZ>-EKocPhV;^$P`tdy#hME`s(=g
zOP+4-___Mgghf?wU~QFo@A0R{r)I7X2h}dBguM-w<^ir_))y%7a(#nt@BYz&C;Kd{
zzwad1WpN+t>e)MUe(Zy~nXw1@hWD(g0Ev4l_8$bCQm^NdWz0KxdGh4xbM7%ii{J0>
z_jZGp7HMWVoL5s0Y}E`+)pP{jJLW$Z@cWK>{aBsUGB{jR*83E&Fn;Pc_xgRuJ-*}L
zI=MG>Jvy-dsT=(RmY%^Uht5x4zWf1~|B^bn6b|H+B8l1;3{EH}Z!kZj1a?-I^p-&x
zL$(&rA8W6B+?;n{tZ>k(Sy~JStBcDgJR{zTSGa-S1`4>z{nL}y|I3}+lbBqD@QN_r
zxifAc7vU8myuu(}=^VU_0(7t-h-W=>{Q2I&LeG<hg*^d_=^M%U2rfSsE+r3k=H+1v
z9`k<&dLXhEr>sM}*Y%dw4Nd(EaLC(Ez7}@VYbRIcLVjm%`NWtPIQYrf+aGc-4RV)#
zH+l@Cmrn))`4fkS)r)fAq_sKaQ{A57$vTdkhChxe$%ehn+2+0b3}By)Uf=b-1w)s{
z2DmY|e|XGmJ@>;i@Wnr2@m#iktJ%pxCS9@yWJ=Z)91MWZO#LeG*)<<>>F2L*NM8*{
z%2x;3IOS<__Imw&*b6PvMXONxt9qXrnmYE1bL70o(=&KEKV>EC&0AT2Fo2;PytT7e
z!0!82l#jSQ{e6Q@-{FzxeGgAVmAakNh5^Gsq2J&4)8UEk#PsE;#O3>kXTdFg;OAAj
z+-&f;otb(#R-+GUGk)&Xjw^v)Csbi3==n>s;BZM6QmCfx^f{-$pPNSYxU5K721oLi
zVO6gLX21n;lTFD)yEA*soKKHW|1t17UorJk*u8k^WdE#3lbZqC4`q~(ILExB`vVih
z{YQI7uOHbL;0y!KF+-q-8+8B0JN7}p=bLmooVqOiy*lUM*#1C%MM@e%PYa^I<G<c9
zh6&190=pkrayD=*aB5<DqtADB`l|b#KDXQX-s}{#5C7<^Zv3gsPK7h?OFcWlO%Hgc
zrk~zmFi($J2f346KDRfxxi^qM=Iu^j3@4T^HsA2gLL+*^J9>24K-=)ywqfV!h%N<=
zEloK)Fg`UsHJ;V9C{e0%-MPqo!@?bV>YuNTzTP)8I_vWv56q7F>fGFEXYFau#!Ulr
zLw+y!3HP4G6Yx1-^^FXVOt<{Q-&c70(q0%D>Hp38Yg+HIjbo>Lvu)hW^z^iMD&Y2V
z+>~d+Guv@TGAe~mHcwxyFX(Z)$K2ew^M>>I&=5B?-di<3)cx+%Y2Q<}zy@#G(CPD6
zIIm~))ETbhXvb9dn8#;(dU$liAMl@NmMjGAOBR|Z$ET;JxmjnRjtl;CCMUs>oTQIS
z4|_82>g_x2&3z?d*MbGGbH#!^Q#F@f_Z@l3GFYf#V9ye!cJLc_-CwtWMu+L%njwpu
z8@y~iS8vcEMQe2zk0eM=>^r|uBDp$6dwSnNZ)cZCI*xL?@W#vC{Dx$BFTeFh-$l3e
zv%r)1xTM?D?>iF|LOc2^UhD9_+SbRZi?vW>llCmzeC3|$d!}o-!m*39gB@>wl50C{
znSQ#&`_b6s-P8Ym?7`!nkqP$b+1Y#c%QYZEYZ5BdkhiPN@?a6c^u*_!$9?rf&h0%~
z_f}tBSKTNxG(60`FzP>g{Ws^i0r!YEf0+sdYgHil-1&cf_5B0u)!&AnaC^gleE7!B
zecwHzDObXAvvQy`3i!{Qx6s!==Bu5VJU)B8lY7x~Va)rfgX?fQ*GWEqNP429=S8Ce
zk+nr}JfrSuX;}7+&pDsdr-RrtHXGo^K8KBKqet)fN8cXzXH$Qfc8-4Ht8=uBjdXR7
z4$nZW@Ap6F{N+FPk9ucLdjj9&+)nq06W{oW+v{cQ9hzi0sFou?`G=`%89k-n_xi^I
zoO^j9?A(?(GdVeXdFosT(^=)+*EMqK-m!l7jdj!1<+@2}iwvoDkL+3+)@#%3C2oeJ
z(e~_x{xcuG^!}!2U2mm7@m6{I6Fa!Rb7$V?4nGlazt6o?F>~op?(6w)oc8;D9}fR>
zUvaQr-+_`j58fU)@+>#;#EwfZ?HO$zlDgJYP|_m@sNPR^pWt>+`uv}~*U{7RiidM^
z8BgqZ7T8YzhaJyKe*fqb2mezGJ2FOo_dD39d&0+DyLPPrAV){qhOX{>KrMOMcj+Bp
zibUe|dG||Bj7)5-{PfeTVEJ$Oe}CNTy>@Z(*wLAOIK?yY$-~VT`~&Xsi^IJ~Iy$6Z
zcwsO4ym8Ir_n#k`9tn(I`0R$m@#ODocg}qA-FN%0aozyOrJSFcx#TV=NdO9)6Gq?q
zr<R}?q$mE&4fnrf`1H7E@X+aT|I0_6KkF#;Z#aDQh{rPHcK0cyGL>rKOri|U|8JtV
zNZ_~s{=!T!SC}#8cg*|D+b|k%f%zHJ%e=t+i1`=he=^@=zQc4ghZzfVfH5-vgL#nI
z&1`3CnF^+e$z@hB8O$O^$H-}p{(}B9{VDyQ^e6O(^n3I#={M+e^egmF=;!ED^hx@k
z=*MXneT25rtu#yTr@u+xNACm$$cHdM7$6J~1_%R$0m1-bfG|K9APf)&zUB;|UcGeF
zCY;sP;jFe6XB#)-tfmHM8#dsqx*BIyRXD4x#92iJ&dSSiR#t|y_3Lp~T8guh5}Xwm
z<E*F%XN840D=5HOem>6f@^H3p9nRLS#aV7H&T?{amYt2WHEVFTdNs~gt-{&Ll{j0m
z0%yyY<4mu|SymR#mMz0sW+u*-F2z|!2F}vcah8^bvn5M#mYRyQ#fx#4l7h2Ei*S~V
z$3>(I7vgV8NjO`u0A~z?Gn&SkPKPtC7H1j_&eUq0sZ=;qDsiSz;7l&ZSz;p2WHOvl
z6wVS7WH1DP=l>-OzUD4Q)FTWK1_%R$0m1-bfG|K9APf)&2m^!x!T@1lUJRi5f71V-
z7llNaFhCd}3=jqg1B3y>0AYYIKo}ql5C#YXUwa17^Z%Ra^JxA54d&0xRc4&|Kg<X`
z1MnN>J?2-;FBmuTIy?vPGV>GW-<fBaA2L1gEWi`Y<IJPXai)v0!}9<SGc5CMW-qgc
zxgVYhxP#fkY-B2#Ql<c&3s}WuG3iVSvw+bsaz;we(q4L!o}m8(&j*asAJM<1hv>KI
zH{lt9^Iv<ah=POx!T@1_FhCd}3=jqg1B3y>0AYYI@YOItZHC{kr#7La4kfiH*@%)F
zlx#prHA<>bQi+lZl$4{S3?=JPQi_riloX?+2qlFmDL_d+O7c*$4kc?*l8cfYlw_l1
z4N6v{WEDzQqGSb1mZL<Ek}Q-gLrEq|mZBsBCFv+hL&*}9q@rXoN>Wg=2qnoVS%{J(
zlq^6AgAy7gI+SQpqCtroB`TCCQKCSJ93_b;k)eb_Ndijn@Bb$#C3@+b66QU4ZvTR=
zjA@{gnOxn+^j-9$%t_rds<*V~R6FF$w8g4N6n%*w%O0m*Oc<2DsXL&Z)ply8G(XcE
z)MTpPSAS1kq4KLF%GZ=OWvOCDVO6Y?PspE_?@v6H*qE3hyC|!rzM%F~X$h``%?V2B
z32C)tO7d@i-F4vHl|o*ih+e;HtH&>4MX$EEvhdy1X0)@0W~-&mU<<t=wnp3=Ov8Y4
z-s8L>j`uGjz3}_34R$MQw8NVotK)bXGD3-Y8+27%??T1O7|jl2E9g;4rAdA+`0er`
zPC&3Cj<-Rh_jiWgs$Cw(TXC^`yU}Vl8CwlULP%xe-qy>%6?6_(`1RQ9sV^ko7wvXX
zudrS~PU$Y0D|J2~o#EP*+#_>k35U8>?EoxXy<*wL`Bks;2-L4=zQ*ee8Lx2u#_NpK
zu0XzDDicl?@6J)i>x9bX$MK4B>`ik5MV|P#Y$F)ZU|kK>Ew&cM#)HNd7KGQv^<HsY
zfv}h4ifi1w(>1m_Y<AWPI^-y3q~8)*0~{Yaj9n1oLNM8JSOUH4zQbfcU}!VhY$kJy
z4b)j9{>|42#_lj9LRZIe-9X}V++kH55g)<CMEsR;MSKJkw(Sa)U%Fdd#0PocVYxV`
zz>N#PuAb*n9}fYh!*B(6G_+VPj&@|qtaw}kfy}YSvN*2dxH2*3$Q1Y{YQ%-Do^K`%
z77MxqSQ=M5#X5v-nNdz165h;$<wMcwaP#$BkuCrnf@Io*GFPtf60r}2-(_!O?FO5%
zt-X~s*dX56?a({{b4eVgh+u~9?o#iPxzgg*%*xswR<pt0)y^U(TP*IP6kE$SOB35_
zXkwd<j#fKDONrxR8Ll7O*~qrrO%^jkS|mP>HMM~*M!ONDljD#ENw69!d!e`}$oo1f
zz7g(?tZb915qW*mCYg&8X{H!}1#v~dhSv5-1Y`uh(mmK}v2{Tnl&4E5lMvkz;XG|R
zQDrq+%+07gC`&6!&|ol`O?HC;1T_K$!|$CppqK)J>c95#c}GJl+tyki5e%}T5-k_J
zo+w2t%HJL-1V&xn2dyj)I>BHgFQQl>CJe{QMA2IE%}rKH9TZYhm`*SOj8oV|$rzwB
z2XYC;lu0EZphvk0TK}(C9Fs8ri?K3!^dITdw1Hlu`-AQ&UA=CR_Fe7wwR^O*=2sf0
zrb>NH{i52S&Qe`bJ)_#ITBQ8B(y1&`{6X<70FV!1fG|K9APf)&2m^u`SVP_aC0BX4
zb9A7)&CTNP*U``}-`n0KZrl*x<sxz1NrOI`&A@iTV2;_?+S&!m)Xf79`T%B!$!0V+
zvS^eP^(Jb?b$-LzS=n|&qt#+V7#r`CxyS?op7@BG{)ic$s1kVdiujFS0JmSG(fv3^
zjqqURf7f_?9HSL6`utZ9pwE||zl9-habpuWnvmgUzB?b)_HQ0;79T&zSTT05TZ|Qp
zpAO;&X8v0SUD5jg8p$&f=3VAV_+9@TSf78MeuUmdFVuac`;o3iw_fYhzNYQa7HK9m
zKh->}S*rfE`n&3K)vW4Gl}oi<$tnAkR%MA|T5(o!NU>IaRsNiOpFA`1qr@L3HYBFY
zK9oHs+emq-A5;6NB?)gQoJiP|ppgDT>XcSVCZXWJ_PMu4CULD4FsKpbz~~0%w)6kn
zHvk|b9&pag!M19E(TRtNo&Voj1t=QvC^47!&6NP65DdbPhar-}GYA#1GR7eo0lPNH
zqdT~ADEmw9VnZX?vG*sD`Tsl1Kt4rm$x&0a;TiJX>j5cKFjAO>7_2ZMTT1~=v|*yL
zfSjEr0Fo+LC7#wOkjVW1-NgWvB^c^fwTqtr-&O>0qP;gpGpOI4g@7_2<Avw{cNYNC
z{EQbh|GzaK(B@-2bThOg4?vOxGafPCoIp_t3&%Rd!+-`GV6|DdNXhX0|CU@(U&vr^
zFm6UWOt=x$<^YE9#?50yChoUqgS^O4dy5;g9c#o}0^P&Ro&T?aMS4zgd?FaWC%t`@
z_{t#hIqtA^CBTUEO#~Ag@mBzd=!lPC!nUn|)rle_KFEVt*6N{b;ZA|u1-=d9dEA;6
z2LW^E|930{5Rr`&D>26!+cE)1WE{Nto~{}ebRHG#SbQ_#=l{24fCi%5Db^uu%Q9F-
zdPrpXV982Ycq%e7p>jd;fhC}HuHX`}4}|Unf<sF;&;M^r6>o|NW@!F@*J40Pi&L{X
z^Z&b30898qDYlk3&HvxI2+%}cEW`DSng6eWCBY)&Sk(M~4Xh;=D;=50-kt=t5FXNb
zU%zSozit7*P~yxKGylJt0RYhvFlPS00v1q<jDW$J?qXPUZ4#h+)ck)1EYTKTR@D4|
z1uW|pE*LejTLDYKg$M>+%m9P4eA_OE<>NLXwX>j+10)z^r5skAi<1jpPby%Mx;RDo
z`TuHI*e)dM@;(?d|6d5};aLHd49Y^ptAMrh;<V)F|4U%K{XqdGh3x=^mBaFUaa0C7
zW5uwdUx;Hyxe4n3C(`F6@V37;W*Pl^*d!mq0AYYIKo}ql5C#YXgaN_;VSq3|7$6LM
zeHd81nUdUp`+jw3^;`}7K7h#4E;KR9k5%7Y2ct|`w=X3$J{LQ?dE3=<ch|-bcZ=0?
UTQ&lm@S$ToK8Y$9y?XBd0Z8N(ga7~l


From 5fe966ce4bbd4c518a3b296e519f3c7827118297 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 8 Dec 2022 10:39:33 +0100
Subject: [PATCH 15/15] add summary and fix docu

---
 doubleml/double_ml_blp.py  | 39 +++++++++++++++++++++++++++++++++-----
 doubleml/double_ml_irm.py  |  9 +++++----
 doubleml/tests/test_blp.py |  8 +++++++-
 3 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index c49730e5..25aa8f25 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -12,11 +12,12 @@ class DoubleMLBLP:
         Parameters
         ----------
         orth_signal : :class:`numpy.array`
-            The orthogonal signal to be predicted. Has to be of shape (n,).
+            The orthogonal signal to be predicted. Has to be of shape ``(n_obs,)``,
+            where ``n_obs`` is the number of observations.
 
         basis : :class:`pandas.DataFrame`
-            The basis for estimating the best linear predictor. Has to have the shape (n,d),
-            where d is the number of predictors.
+            The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
+            where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
 
         is_gate : bool
             Indicates whether the basis is constructed for GATEs (dummy-basis).
@@ -52,6 +53,14 @@ def __init__(self,
         self._blp_model = None
         self._blp_omega = None
 
+    def __str__(self):
+        class_name = self.__class__.__name__
+        header = f'================== {class_name} Object ==================\n'
+        fit_summary = str(self.summary)
+        res = header + \
+            '\n------------------ Fit summary ------------------\n' + fit_summary
+        return res
+
     @property
     def blp_model(self):
         """
@@ -80,6 +89,25 @@ def blp_omega(self):
         """
         return self._blp_omega
 
+    @property
+    def summary(self):
+        """
+        A summary for the best linear predictor effect after calling :meth:`fit`.
+        """
+        col_names = ['coef', 'std err', 't', 'P>|t|', '[0.025', '0.975]']
+        if self.blp_model is None:
+            df_summary = pd.DataFrame(columns=col_names)
+        else:
+            summary_stats = {'coef': self.blp_model.params,
+                             'std err': self.blp_model.bse,
+                             't': self.blp_model.tvalues,
+                             'P>|t|': self.blp_model.pvalues,
+                             '[0.025': self.blp_model.conf_int()[0],
+                             '0.975]': self.blp_model.conf_int()[1]}
+            df_summary = pd.DataFrame(summary_stats,
+                                      columns=col_names)
+        return df_summary
+
     def fit(self):
         """
         Estimate DoubleML models.
@@ -162,13 +190,14 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
         # blp of the orthogonal signal
         g_hat = self._blp_model.predict(basis)
 
+        np_basis = basis.to_numpy()
         # calculate se for basis elements
-        blp_se = np.sqrt((basis.to_numpy().dot(self._blp_omega) * basis.to_numpy()).sum(axis=1))
+        blp_se = np.sqrt((np.dot(np_basis, self._blp_omega) * np_basis).sum(axis=1))
 
         if joint:
             # calculate the maximum t-statistic with bootstrap
             normal_samples = np.random.normal(size=[basis.shape[1], n_rep_boot])
-            bootstrap_samples = np.multiply(basis.to_numpy().dot(np.dot(sqrtm(self._blp_omega), normal_samples)).T,
+            bootstrap_samples = np.multiply(np.dot(np_basis, np.dot(sqrtm(self._blp_omega), normal_samples)).T,
                                             (1.0 / blp_se))
 
             max_t_stat = np.quantile(np.max(np.abs(bootstrap_samples), axis=0), q=level)
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 9b117f98..f39f57b3 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -338,8 +338,8 @@ def cate(self, basis):
         Parameters
         ----------
         basis : :class:`pandas.DataFrame`
-            The basis for estimating the best linear predictor. Has to have the shape (n,d),
-            where d is the number of predictors.
+            The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
+            where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
 
         Returns
         -------
@@ -369,8 +369,9 @@ def gate(self, groups):
         Parameters
         ----------
         groups : :class:`pandas.DataFrame`
-            The group indicator for estimating the best linear predictor. Has to be dummy coded with shape (n,d),
-            where d is the number of groups or (n,1) and contain the corresponding groups.
+            The group indicator for estimating the best linear predictor.
+            Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
+            and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups.
 
         Returns
         -------
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index a0a2aaf6..cdbc0081 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -46,7 +46,8 @@ def dml_blp_fixture(ci_joint, ci_level):
                 'signal': blp.orth_signal,
                 'ci_1': ci_1,
                 'ci_2': ci_2,
-                'ci_manual': ci_manual}
+                'ci_manual': ci_manual,
+                'blp_model': blp}
 
     return res_dict
 
@@ -84,3 +85,8 @@ def test_dml_blp_ci_2(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['ci_1'],
                        dml_blp_fixture['ci_manual'],
                        rtol=1e-9, atol=1e-4)
+
+
+def test_dml_blp_return_types(dml_blp_fixture):
+    assert isinstance(dml_blp_fixture['blp_model'].__str__(), str)
+    assert isinstance(dml_blp_fixture['blp_model'].summary, pd.DataFrame)
\ No newline at end of file