projektAI/venv/Lib/site-packages/mlxtend/evaluate/counterfactual.py
2021-06-06 22:13:05 +02:00

113 lines
3.7 KiB
Python

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause
from scipy.optimize import minimize
import warnings
import numpy as np
def create_counterfactual(x_reference, y_desired, model, X_dataset,
y_desired_proba=None, lammbda=0.1, random_seed=None):
"""
Implementation of the counterfactual method by Wachter et al. 2017
References:
- Wachter, S., Mittelstadt, B., & Russell, C. (2017).
Counterfactual explanations without opening the black box:
Automated decisions and the GDPR. Harv. JL & Tech., 31, 841.,
https://arxiv.org/abs/1711.00399
Parameters
----------
x_reference : array-like, shape=[m_features]
The data instance (training example) to be explained.
y_desired : int
The desired class label for `x_reference`.
model : estimator
A (scikit-learn) estimator implementing `.predict()` and/or
`predict_proba()`.
- If `model` supports `predict_proba()`, then this is used by
default for the first loss term,
`(lambda * model.predict[_proba](x_counterfact) - y_desired[_proba])^2`
- Otherwise, method will fall back to `predict`.
X_dataset : array-like, shape=[n_examples, m_features]
A (training) dataset for picking the initial counterfactual
as initial value for starting the optimization procedure.
y_desired_proba : float (default: None)
A float within the range [0, 1] designating the desired
class probability for `y_desired`.
- If `y_desired_proba=None` (default), the first loss term
is `(lambda * model(x_counterfact) - y_desired)^2` where `y_desired`
is a class label
- If `y_desired_proba` is not None, the first loss term
is `(lambda * model(x_counterfact) - y_desired_proba)^2`
lammbda : Weighting parameter for the first loss term,
`(lambda * model(x_counterfact) - y_desired[_proba])^2`
random_seed : int (default=None)
If int, random_seed is the seed used by
the random number generator for selecting the inital counterfactual
from `X_dataset`.
"""
if y_desired_proba is not None:
use_proba = True
if not hasattr(model, "predict_proba"):
raise AttributeError("Your `model` does not support "
"`predict_proba`. Set `y_desired_proba` "
" to `None` to use `predict`instead.")
else:
use_proba = False
if y_desired_proba is None:
# class label
y_to_be_annealed_to = y_desired
else:
# class proba corresponding to class label y_desired
y_to_be_annealed_to = y_desired_proba
# start with random counterfactual
rng = np.random.RandomState(random_seed)
x_counterfact = X_dataset[rng.randint(X_dataset.shape[0])]
# compute median absolute deviation
mad = np.abs(np.median(X_dataset, axis=0) - x_reference)
def dist(x_reference, x_counterfact):
numerator = np.abs(x_reference - x_counterfact)
return np.sum(numerator/mad)
def loss(x_counterfact, lammbda):
if use_proba:
y_predict = model.predict_proba(
x_counterfact.reshape(1, -1)).flatten()[y_desired]
else:
y_predict = model.predict(x_counterfact.reshape(1, -1))
diff = lammbda*(y_predict - y_to_be_annealed_to)**2
return diff + dist(x_reference, x_counterfact)
res = minimize(loss, x_counterfact, args=(lammbda), method='Nelder-Mead')
if not res['success']:
warnings.warn(res['message'])
x_counterfact = res['x']
return x_counterfact