Skip to content

Commit

Permalink
Fix #266 #271. Raise exception if incompatible metrics in binning_tra…
Browse files Browse the repository at this point in the history
…nsform_params
  • Loading branch information
guillermo-navas-palencia committed Dec 6, 2023
1 parent 2477d5c commit 1ca485e
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 5 deletions.
60 changes: 55 additions & 5 deletions optbinning/binning/binning_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1379,10 +1379,35 @@ def _transform(self, X, metric, metric_special, metric_missing,
indices_selected_variables = self.get_support(indices=True)
n_selected_variables = len(indices_selected_variables)

if metric == "indices":
# Check if specific binning transform metrics were supplied, and
# whether these are compatible. Default base metric is the binning
# process transform metric.
base_metric = metric

if self.binning_transform_params is not None:
metrics = set()

if metric is not None:
metrics.add(metric)

for idx in indices_selected_variables:
name = self.variable_names[idx]
params = self.binning_transform_params.get(name, {})
metrics.add(params.get("metric", metric))

if len(metrics) > 1:
# indices and default transform metrics are numeric. If bins
# metrics is present the dtypes are incompatible.
if "bins" in metrics:
raise ValueError(
"metric 'bins' cannot be mixed with numeric metrics.")
else:
base_metric = metrics.pop()

if base_metric == "indices":
X_transform = np.full(
(n_samples, n_selected_variables), -1, dtype=int)
elif metric == "bins":
elif base_metric == "bins":
X_transform = np.full(
(n_samples, n_selected_variables), "", dtype=object)
else:
Expand Down Expand Up @@ -1423,7 +1448,8 @@ def _transform(self, X, metric, metric_special, metric_missing,
X_transform[:, i] = optb.transform(**tparams)

if isinstance(X, pd.DataFrame):
return pd.DataFrame(X_transform, columns=selected_variables, index=X.index)
return pd.DataFrame(
X_transform, columns=selected_variables, index=X.index)

return X_transform

Expand Down Expand Up @@ -1452,16 +1478,40 @@ def _transform_disk(self, input_path, output_path, chunksize, metric,
selected_variables = self.get_support(names=True)
n_selected_variables = len(selected_variables)

# Check if specific binning transform metrics were supplied, and
# whether these are compatible. Default base metric is the binning
# process transform metric.
base_metric = metric

if self.binning_transform_params is not None:
metrics = set()

if metric is not None:
metrics.add(metric)

for name in selected_variables:
params = self.binning_transform_params.get(name, {})
metrics.add(params.get("metric", metric))

if len(metrics) > 1:
# indices and default transform metrics are numeric. If bins
# metrics is present the dtypes are incompatible.
if "bins" in metrics:
raise ValueError(
"metric 'bins' cannot be mixed with numeric metrics.")
else:
base_metric = metrics.pop()

chunks = pd.read_csv(input_path, engine='c', chunksize=chunksize,
usecols=selected_variables, **kwargs)

for k, chunk in enumerate(chunks):
n_samples, n_variables = chunk.shape

if metric == "indices":
if base_metric == "indices":
X_transform = np.full(
(n_samples, n_selected_variables), -1, dtype=int)
elif metric == "bins":
elif base_metric == "bins":
X_transform = np.full(
(n_samples, n_selected_variables), "", dtype=object)
else:
Expand Down
11 changes: 11 additions & 0 deletions tests/test_binning_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,17 @@ def test_default_fit_transform_disk():
target="target", chunksize=100)


def test_binning_transform_params():
btp = {variable_names[0]: {"metric": "bins"},
variable_names[1]: {"metric": "woe"}}

process = BinningProcess(variable_names[:3],
binning_transform_params=btp)

with raises(ValueError):
X_transform = process.fit_transform(X[:, :3], y)


def test_update_binned_variable():
process = BinningProcess(variable_names)
process.fit(X, y, check_input=True)
Expand Down

0 comments on commit 1ca485e

Please sign in to comment.