113 lines
3.7 KiB
Python
113 lines
3.7 KiB
Python
|
# Sebastian Raschka 2014-2020
|
||
|
# mlxtend Machine Learning Library Extensions
|
||
|
#
|
||
|
# Author: Sebastian Raschka <sebastianraschka.com>
|
||
|
#
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
from scipy.optimize import minimize
|
||
|
import warnings
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
def create_counterfactual(x_reference, y_desired, model, X_dataset,
|
||
|
y_desired_proba=None, lammbda=0.1, random_seed=None):
|
||
|
|
||
|
"""
|
||
|
Implementation of the counterfactual method by Wachter et al. 2017
|
||
|
|
||
|
References:
|
||
|
|
||
|
- Wachter, S., Mittelstadt, B., & Russell, C. (2017).
|
||
|
Counterfactual explanations without opening the black box:
|
||
|
Automated decisions and the GDPR. Harv. JL & Tech., 31, 841.,
|
||
|
https://arxiv.org/abs/1711.00399
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
|
||
|
x_reference : array-like, shape=[m_features]
|
||
|
The data instance (training example) to be explained.
|
||
|
|
||
|
y_desired : int
|
||
|
The desired class label for `x_reference`.
|
||
|
|
||
|
model : estimator
|
||
|
A (scikit-learn) estimator implementing `.predict()` and/or
|
||
|
`predict_proba()`.
|
||
|
- If `model` supports `predict_proba()`, then this is used by
|
||
|
default for the first loss term,
|
||
|
`(lambda * model.predict[_proba](x_counterfact) - y_desired[_proba])^2`
|
||
|
- Otherwise, method will fall back to `predict`.
|
||
|
|
||
|
X_dataset : array-like, shape=[n_examples, m_features]
|
||
|
A (training) dataset for picking the initial counterfactual
|
||
|
as initial value for starting the optimization procedure.
|
||
|
|
||
|
y_desired_proba : float (default: None)
|
||
|
A float within the range [0, 1] designating the desired
|
||
|
class probability for `y_desired`.
|
||
|
- If `y_desired_proba=None` (default), the first loss term
|
||
|
is `(lambda * model(x_counterfact) - y_desired)^2` where `y_desired`
|
||
|
is a class label
|
||
|
- If `y_desired_proba` is not None, the first loss term
|
||
|
is `(lambda * model(x_counterfact) - y_desired_proba)^2`
|
||
|
|
||
|
lammbda : Weighting parameter for the first loss term,
|
||
|
`(lambda * model(x_counterfact) - y_desired[_proba])^2`
|
||
|
|
||
|
random_seed : int (default=None)
|
||
|
If int, random_seed is the seed used by
|
||
|
the random number generator for selecting the inital counterfactual
|
||
|
from `X_dataset`.
|
||
|
|
||
|
"""
|
||
|
if y_desired_proba is not None:
|
||
|
use_proba = True
|
||
|
if not hasattr(model, "predict_proba"):
|
||
|
raise AttributeError("Your `model` does not support "
|
||
|
"`predict_proba`. Set `y_desired_proba` "
|
||
|
" to `None` to use `predict`instead.")
|
||
|
else:
|
||
|
use_proba = False
|
||
|
|
||
|
if y_desired_proba is None:
|
||
|
# class label
|
||
|
y_to_be_annealed_to = y_desired
|
||
|
else:
|
||
|
# class proba corresponding to class label y_desired
|
||
|
y_to_be_annealed_to = y_desired_proba
|
||
|
|
||
|
# start with random counterfactual
|
||
|
rng = np.random.RandomState(random_seed)
|
||
|
x_counterfact = X_dataset[rng.randint(X_dataset.shape[0])]
|
||
|
|
||
|
# compute median absolute deviation
|
||
|
mad = np.abs(np.median(X_dataset, axis=0) - x_reference)
|
||
|
|
||
|
def dist(x_reference, x_counterfact):
|
||
|
numerator = np.abs(x_reference - x_counterfact)
|
||
|
return np.sum(numerator/mad)
|
||
|
|
||
|
def loss(x_counterfact, lammbda):
|
||
|
|
||
|
if use_proba:
|
||
|
y_predict = model.predict_proba(
|
||
|
x_counterfact.reshape(1, -1)).flatten()[y_desired]
|
||
|
else:
|
||
|
y_predict = model.predict(x_counterfact.reshape(1, -1))
|
||
|
|
||
|
diff = lammbda*(y_predict - y_to_be_annealed_to)**2
|
||
|
|
||
|
return diff + dist(x_reference, x_counterfact)
|
||
|
|
||
|
res = minimize(loss, x_counterfact, args=(lammbda), method='Nelder-Mead')
|
||
|
|
||
|
if not res['success']:
|
||
|
warnings.warn(res['message'])
|
||
|
|
||
|
x_counterfact = res['x']
|
||
|
|
||
|
return x_counterfact
|