# OneR classifier

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# The classic OneR (One Rule) classifier
# Authors: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
import warnings
from scipy.stats import chi2_contingency
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError


class OneRClassifier(BaseEstimator, ClassifierMixin):

    """OneR (One Rule) Classifier.

    Parameters
    ----------
    resolve_ties : str (default: 'first')
        Option for how to resolve ties if two or more features
        have the same error. Options are
        - 'first' (default): chooses first feature in the list, i.e.,
          feature with the lower column index.
        - 'chi-squared': performs a chi-squared test for each feature
          against the target and selects the feature with the lowest p-value.

    Attributes
    ----------
    self.classes_labels_ : array-like, shape = [n_labels]
        Array containing the unique class labels found in the
        training set.

    self.feature_idx_ : int
        The index of the rules' feature based on the column in
        the training set.

    self.p_value_ : float
        The p value for a given feature. Only available after calling `fit`
        when the OneR attribute `resolve_ties = 'chi-squared'` is set.

    self.prediction_dict_ : dict
        Dictionary containing information about the
        feature's (self.feature_idx_)
        rules and total error. E.g.,
        `{'total error': 37, 'rules (value: class)': {0: 0, 1: 2}}`
        means the total error is 37, and the rules are
        "if feature value == 0 classify as 0"
        and "if feature value == 1 classify as 2".
        (And classify as class 1 otherwise.)

    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/
    """

    def __init__(self, resolve_ties='first'):

        allowed = {'first', 'chi-squared'}
        if resolve_ties not in allowed:
            raise ValueError('resolve_ties must be in %s. Got %s.'
                             % (allowed, resolve_ties))
        self.resolve_ties = resolve_ties

    def fit(self, X, y):
        """Learn rule from training data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """
        # This check will only catch the most extreme cases
        # but better than nothing
        for c in range(X.shape[1]):
            if np.unique(X[:, c]).shape[0] == X.shape[0]:
                warnings.warn('Feature array likely contains at least one'
                              ' non-categorical column.'
                              ' Column %d appears to have a unique value'
                              ' in every row.' % c)
            break

        n_class_labels = np.unique(y).shape[0]

        def compute_class_counts(X, y, feature_index, feature_value):
            mask = X[:, feature_index] == feature_value
            return np.bincount(y[mask], minlength=n_class_labels)

        prediction_dict = {}  # save feature_idx: feature_val, label, error

        # iterate over features
        for feature_index in np.arange(X.shape[1]):

            # iterate over each possible value per feature
            for feature_value in np.unique(X[:, feature_index]):

                class_counts = compute_class_counts(X, y,
                                                    feature_index,
                                                    feature_value)
                most_frequent_class = np.argmax(class_counts)
                self.class_labels_ = np.unique(y)

                # count all classes for that feature match
                # except the most frequent one
                inverse_index = np.ones(n_class_labels, dtype=bool)
                inverse_index[most_frequent_class] = False

                error = np.sum(class_counts[inverse_index])

                # compute the total error for each feature and
                #  save all the corresponding rules for a given feature
                if feature_index not in prediction_dict:
                    prediction_dict[feature_index] = {
                        'total error': 0, 'rules (value: class)': {}
                        }
                prediction_dict[feature_index][
                    'rules (value: class)'][
                        feature_value] = most_frequent_class
                prediction_dict[feature_index]['total error'] += error

            # get best feature (i.e., the feature with the lowest error)
            best_err = np.inf
            best_idx = [None]
            for i in prediction_dict:
                if prediction_dict[i]['total error'] < best_err:
                    best_err = prediction_dict[i]['total error']
                    best_idx[-1] = i

            if self.resolve_ties == 'chi-squared':

                # collect duplicates
                for i in prediction_dict:
                    if i == best_idx[-1]:
                        continue
                    if prediction_dict[i]['total error'] == best_err:
                        best_idx.append(i)

                p_values = []
                for feature_idx in best_idx:

                    rules = prediction_dict[feature_idx][
                        'rules (value: class)']

                    # contingency table for a given feature
                    #   (e.g., petal_width for iris)
                    #   is organized as follows (without the sum columns):
                    #
                    #              petal_width
                    # species      (0.0976,0.791] (0.791,1.63] (1.63,2.5] sum
                    #   setosa                 50            0          0  50
                    #   versicolor              0           48          2  50
                    #   virginica               0            4         46  50
                    #   sum                    50           52         48 150

                    ary = np.zeros((n_class_labels, len(rules)))

                    for idx, r in enumerate(rules):
                        ary[:, idx] = np.bincount(y[X[:, feature_idx] == r],
                                                  minlength=n_class_labels)

                    # returns "stat, p, dof, expected"
                    _, p, _, _ = chi2_contingency(ary)
                p_values.append(p)
                best_p_idx = np.argmax(p_values)
                best_idx = best_idx[best_p_idx]
                self.p_value_ = p_values[best_p_idx]

            elif self.resolve_ties == 'first':
                best_idx = best_idx[0]

        self.feature_idx_ = best_idx
        self.prediction_dict_ = prediction_dict[best_idx]
        return self

    def predict(self, X):
        """ Predict class labels for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        ----------
        maj : array-like, shape = [n_samples]
            Predicted class labels.

        """
        if not hasattr(self, 'prediction_dict_'):
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before using the model.")

        rules = self.prediction_dict_['rules (value: class)']

        y_pred = np.zeros(X.shape[0], dtype=np.int)

        # Set up labels for those class labels in the
        # dataset for which no rule exists. We use the
        # first non-specified class label as the default class label.
        rule_labels = set()
        for feature_value in rules:
            class_label = rules[feature_value]
            rule_labels.add(class_label)
        other_label = (set(self.class_labels_) - rule_labels)
        if len(other_label):
            y_pred[:] = list(other_label)[0]
        # else just use "np.zeros"; we could also change this to
        #  self.class_labels_[-1]+1 in future

        # classify all class labels for which rules exist
        for feature_value in rules:
            mask = X[:, self.feature_idx_] == feature_value
            y_pred[mask] = rules[feature_value]

        return y_pred