Skip to content

Commit

Permalink
Fix: use uncalibrated probabilities for realized performance calculat…
Browse files Browse the repository at this point in the history
…ion in CBPE (#98)
  • Loading branch information
nnansters committed Sep 19, 2022
1 parent cc1c866 commit 946a384
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def _fit(self, reference_data: pd.DataFrame, *args, **kwargs) -> CBPE:

_list_missing([self.y_true, self.y_pred_proba, self.y_pred], list(reference_data.columns))

# We need uncalibrated data to calculate the realized performance on.
# We need realized performance in threshold calculations.
# https://github.com/NannyML/nannyml/issues/98
reference_data[f'uncalibrated_{self.y_pred_proba}'] = reference_data[self.y_pred_proba]

for metric in self.metrics:
metric.fit(reference_data)

Expand Down Expand Up @@ -95,6 +100,10 @@ def _estimate(self, data: pd.DataFrame, *args, **kwargs) -> Result:

_list_missing([self.y_pred_proba, self.y_pred], list(data.columns))

# We need uncalibrated data to calculate the realized performance on.
# https://github.com/NannyML/nannyml/issues/98
data[f'uncalibrated_{self.y_pred_proba}'] = data[self.y_pred_proba]

if self.needs_calibration:
data[self.y_pred_proba] = self.calibrator.calibrate(data[self.y_pred_proba])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ def _fit(self, reference_data: pd.DataFrame, *args, **kwargs) -> CBPE:

_list_missing([self.y_true, self.y_pred] + model_output_column_names(self.y_pred_proba), reference_data)

# We need uncalibrated data to calculate the realized performance on.
# We need realized performance in threshold calculations.
# https://github.com/NannyML/nannyml/issues/98
for class_proba in model_output_column_names(self.y_pred_proba):
reference_data[f'uncalibrated_{class_proba}'] = reference_data[class_proba]

for metric in self.metrics:
metric.fit(reference_data)

Expand All @@ -84,6 +90,11 @@ def _estimate(self, data: pd.DataFrame, *args, **kwargs) -> Result:

_list_missing([self.y_pred] + model_output_column_names(self.y_pred_proba), data)

# We need uncalibrated data to calculate the realized performance on.
# https://github.com/NannyML/nannyml/issues/98
for class_proba in model_output_column_names(self.y_pred_proba):
data[f'uncalibrated_{class_proba}'] = data[class_proba]

data = _calibrate_predicted_probabilities(data, self.y_true, self.y_pred_proba, self._calibrators)

chunks = self.chunker.split(data)
Expand Down
56 changes: 40 additions & 16 deletions nannyml/performance_estimation/confidence_based/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def __init__(
self.lower_threshold: Optional[float] = None
self.confidence_deviation: Optional[float] = None

self.uncalibrated_y_pred_proba = f'uncalibrated_{self.estimator.y_pred_proba}'

def __str__(self):
return self.column_name

Expand Down Expand Up @@ -153,10 +155,20 @@ def __eq__(self, other):
"""Establishes equality by comparing all properties."""
return self.display_name == other.display_name and self.column_name == other.column_name

def _common_cleaning(self, data: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
def _common_cleaning(
self, data: pd.DataFrame, y_pred_proba_column_name: str = None
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
if y_pred_proba_column_name is None:
if not isinstance(self.estimator.y_pred_proba, str):
raise InvalidArgumentsException(
f"'y_pred_proba' is of type '{type(self.estimator.y_pred_proba)}'. "
f"Binary use cases require 'y_pred_proba' to be a string."
)
y_pred_proba_column_name = self.estimator.y_pred_proba

clean_targets = self.estimator.y_true in data.columns and not data[self.estimator.y_true].isna().all()

y_pred_proba = data[self.estimator.y_pred_proba]
y_pred_proba = data[y_pred_proba_column_name]
y_pred = data[self.estimator.y_pred]

y_pred_proba.dropna(inplace=True)
Expand Down Expand Up @@ -243,7 +255,7 @@ def _estimate(self, data: pd.DataFrame):
return estimate_roc_auc(y_pred_proba)

def realized_performance(self, data: pd.DataFrame) -> float:
y_pred_proba, _, y_true = self._common_cleaning(data)
y_pred_proba, _, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
return np.NaN
Expand Down Expand Up @@ -303,7 +315,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
return bse.f1_sampling_error(self._sampling_error_components, data)

def realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data)
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
return np.NaN
Expand Down Expand Up @@ -345,7 +357,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
return bse.precision_sampling_error(self._sampling_error_components, data)

def realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data)
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
return np.NaN
Expand Down Expand Up @@ -385,7 +397,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
return bse.recall_sampling_error(self._sampling_error_components, data)

def realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data)
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
return np.NaN
Expand Down Expand Up @@ -425,7 +437,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
return bse.specificity_sampling_error(self._sampling_error_components, data)

def realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data)
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
return np.NaN
Expand Down Expand Up @@ -470,7 +482,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
return bse.accuracy_sampling_error(self._sampling_error_components, data)

def realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data)
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
return np.NaN
Expand All @@ -492,7 +504,7 @@ def _get_binarized_multiclass_predictions(data: pd.DataFrame, y_pred: str, y_pre
return y_preds, y_pred_probas, classes


def _get_multiclass_predictions(data: pd.DataFrame, y_pred: str, y_pred_proba: ModelOutputsType):
def _get_multiclass_uncalibrated_predictions(data: pd.DataFrame, y_pred: str, y_pred_proba: ModelOutputsType):
if not isinstance(y_pred_proba, dict):
raise CalculatorException(
"multiclass model outputs should be of type Dict[str, str].\n"
Expand All @@ -502,7 +514,7 @@ def _get_multiclass_predictions(data: pd.DataFrame, y_pred: str, y_pred_proba: M
labels, class_probability_columns = [], []
for label in sorted(y_pred_proba.keys()):
labels.append(label)
class_probability_columns.append(y_pred_proba[label])
class_probability_columns.append(f'uncalibrated_{y_pred_proba[label]}')
return data[y_pred], data[class_probability_columns], labels


Expand Down Expand Up @@ -541,7 +553,9 @@ def realized_performance(self, data: pd.DataFrame) -> float:
return np.NaN

y_true = data[self.estimator.y_true]
_, y_pred_probas, labels = _get_multiclass_predictions(data, self.estimator.y_pred, self.estimator.y_pred_proba)
_, y_pred_probas, labels = _get_multiclass_uncalibrated_predictions(
data, self.estimator.y_pred, self.estimator.y_pred_proba
)

return roc_auc_score(y_true, y_pred_probas, multi_class='ovr', average='macro', labels=labels)

Expand Down Expand Up @@ -582,7 +596,9 @@ def realized_performance(self, data: pd.DataFrame) -> float:
return np.NaN

y_true = data[self.estimator.y_true]
y_pred, _, labels = _get_multiclass_predictions(data, self.estimator.y_pred, self.estimator.y_pred_proba)
y_pred, _, labels = _get_multiclass_uncalibrated_predictions(
data, self.estimator.y_pred, self.estimator.y_pred_proba
)

return f1_score(y_true=y_true, y_pred=y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -623,7 +639,9 @@ def realized_performance(self, data: pd.DataFrame) -> float:
return np.NaN

y_true = data[self.estimator.y_true]
y_pred, _, labels = _get_multiclass_predictions(data, self.estimator.y_pred, self.estimator.y_pred_proba)
y_pred, _, labels = _get_multiclass_uncalibrated_predictions(
data, self.estimator.y_pred, self.estimator.y_pred_proba
)

return precision_score(y_true=y_true, y_pred=y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -664,7 +682,9 @@ def realized_performance(self, data: pd.DataFrame) -> float:
return np.NaN

y_true = data[self.estimator.y_true]
y_pred, _, labels = _get_multiclass_predictions(data, self.estimator.y_pred, self.estimator.y_pred_proba)
y_pred, _, labels = _get_multiclass_uncalibrated_predictions(
data, self.estimator.y_pred, self.estimator.y_pred_proba
)

return recall_score(y_true=y_true, y_pred=y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -705,7 +725,9 @@ def realized_performance(self, data: pd.DataFrame) -> float:
return np.NaN

y_true = data[self.estimator.y_true]
y_pred, _, labels = _get_multiclass_predictions(data, self.estimator.y_pred, self.estimator.y_pred_proba)
y_pred, _, labels = _get_multiclass_uncalibrated_predictions(
data, self.estimator.y_pred, self.estimator.y_pred_proba
)

mcm = multilabel_confusion_matrix(y_true, y_pred, labels=labels)
tn_sum = mcm[:, 0, 0]
Expand Down Expand Up @@ -748,6 +770,8 @@ def realized_performance(self, data: pd.DataFrame) -> float:
return np.NaN

y_true = data[self.estimator.y_true]
y_pred, _, labels = _get_multiclass_predictions(data, self.estimator.y_pred, self.estimator.y_pred_proba)
y_pred, _, labels = _get_multiclass_uncalibrated_predictions(
data, self.estimator.y_pred, self.estimator.y_pred_proba
)

return accuracy_score(y_true, y_pred)

0 comments on commit 946a384

Please sign in to comment.