-
Notifications
You must be signed in to change notification settings - Fork 110
/
fairlearn_classifier.py
206 lines (178 loc) · 7.42 KB
/
fairlearn_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
from copy import deepcopy
from typing import Any, Callable, Optional, Union
import numpy as np
import pandas as pd
from fairlearn.reductions import ExponentiatedGradient, GridSearch
from fairlearn.reductions._grid_search._grid_generator import _GridGenerator
from ...utils import create_logger, import_object
from .inprocessing import InProcessing
DEFAULT_GRID_SIZE = 10
class FairlearnClassifier(InProcessing):
def __init__(
self,
reduction: Union[str, Union[object, Callable]],
estimator: Union[str, Union[object, Callable]],
constraint: Union[str, Union[object, Callable]],
**kwargs,
):
"""Creates a model from the Fairlearn package.
Especially designed for the ExponentiatedGradient and GridSearch methods.
Parameters
----------
reduction : Union[str, callable]
Reductions method. Either Exponentiated Gradient or Grid Search method. If
string, it is imported during runtime.
estimator : Union[str, callable]
Base estimator for the reductions method. If string, it is imported during
runtime.
constraint: Union[str, Moment]
Constraint for the reductions method. Must be a Moment from the Fairlearn
package.
**kwargs : dict, optional
A dictionary containing the hyperparameters for the reduction method, base
estimator and constraint. Parameters for the base estimator should be
included with the prefix `model__`, and for the constraint with the prefix
`constraint__`. Every other parameter will be passed down to the reductions
method.
"""
self.logger = create_logger("methods.inprocessing.ExponentiatedGradient")
# Importing any object that was passed down as string.
if isinstance(reduction, str):
self.logger.debug(f"Importing reduction: '{reduction}'.")
reduction = import_object(reduction)
if isinstance(estimator, str):
self.logger.debug(f"Importing estimator: '{estimator}'.")
estimator = import_object(estimator)
if isinstance(constraint, str):
self.logger.debug(f"Importing constraint: '{constraint}'.")
constraint = import_object(constraint)
# Parse keyword arguments
self.model_kwargs, self.constraint_kwargs, self.kwargs = self.parse_kwargs(
kwargs
)
# Instantiate objects for the method
self.logger.info(
f"Instantiating estimator '{estimator}' with parameters:"
f" {self.model_kwargs}."
)
self.estimator = estimator(**self.model_kwargs)
self.logger.info(
f"Instantiating constraint '{constraint}' with "
f"parameters: {self.constraint_kwargs}."
)
self.constraint = constraint(**self.constraint_kwargs)
self.logger.info(
f"Instantiating reduction '{reduction}' with parameters:" f" {self.kwargs}."
)
self.reduction = reduction(
estimator=self.estimator,
constraints=self.constraint,
**self.kwargs,
)
# Depending on reduction, use probability mass function method.
if isinstance(self.reduction, ExponentiatedGradient):
self.predict_proba_method = lambda clf: clf._pmf_predict
else:
self.predict_proba_method = lambda clf: clf.predict_proba
def fit(self, X: pd.DataFrame, y: pd.Series, s: pd.Series):
"""Fits the fairlearn classifier to the data.
Parameters
----------
X : pd.DataFrame
The input data.
y : pd.Series
The target values.
s : pd.Series
The protected attribute.
"""
if isinstance(self.reduction, GridSearch):
self._generate_grid(X, y, s)
return self.reduction.fit(X, y, sensitive_features=s)
def predict_proba(
self,
X: pd.DataFrame,
s: Optional[pd.Series] = None,
) -> pd.Series:
"""Use the machine learning model to make predictions on new data.
Parameters
----------
X : pd.DataFrame
The input data.
s : Optional[pd.Series], optional
The protected attribute.
"""
return pd.Series(
data=self.predict_proba_method(self.reduction)(X)[:, 1],
name="predictions",
index=X.index,
) # Note: This is based on assumption of using LightGBM as base estimator
def _generate_grid(self, X: pd.DataFrame, y: pd.Series, s: pd.Series) -> None:
"""Generates a grid to pass to GridSearch method.
Parameters
----------
X : pd.DataFrame
The input data.
y : pd.Series
The target values.
s : pd.Series
The protected attribute.
"""
dummy_constraint = deepcopy(self.constraint)
dummy_constraint.load_data(X, y, sensitive_features=s)
# Randomly select a set of Lagrangian multipliers from the generated grid
grid = _GridGenerator(
grid_size=self.kwargs.pop("grid_size", 50),
grid_limit=self.kwargs.pop("grid_limit", 3.0),
pos_basis=self.kwargs.pop("pos_basis", dummy_constraint.pos_basis),
neg_basis=self.kwargs.pop("neg_basis", dummy_constraint.neg_basis),
neg_allowed=self.kwargs.pop(
"neg_allowed", dummy_constraint.neg_basis_present
),
force_L1_norm=self.kwargs.pop(
"force_L1_norm",
dummy_constraint.default_objective_lambda_vec is not None,
),
grid_offset=None,
).grid
self.rng = np.random.RandomState(self.kwargs.pop("random_state", 42))
rng_indices = self.rng.choice(grid.shape[1], 2, replace=False)
grid = grid.iloc[:, rng_indices]
self.reduction.grid = grid
@staticmethod
def parse_kwargs(kwargs: dict[str, Any]) -> tuple[dict, dict, dict]:
"""Parses the keyword arguments for the FairlearnClassifier.
Parameters
----------
kwargs : dict[str, Any]
A dictionary containing the hyperparameters for the reduction method, base
estimator and constraint. Parameters for the base estimator should be
included with the prefix `model__`, and for the constraint with the prefix
`constraint__`. Every other parameter will be passed down to the reductions
method.
Returns
-------
tuple[dict, dict, dict]
The parameters for the base estimator, constraint, and reductions method, in
this order.
"""
MODEL_PREFIX = "model__"
CONSTRAINT_PREFIX = "constraint__"
# kwargs for base estimator (or model)
model_kwargs = {
k[len(MODEL_PREFIX) :]: v
for k, v in kwargs.items()
if k.startswith(MODEL_PREFIX)
}
# -> for the constraint
constraint_kwargs = {
k[len(CONSTRAINT_PREFIX) :]: v
for k, v in kwargs.items()
if k.startswith(CONSTRAINT_PREFIX)
}
# -> finally, everything left is a kwarg to the reductions method
kwargs = {
k: v
for k, v in kwargs.items()
if not any([k.startswith(MODEL_PREFIX), k.startswith(CONSTRAINT_PREFIX)])
}
return model_kwargs, constraint_kwargs, kwargs