# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Classes for column-based scaling of datasets
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import pandas as pd
import numpy as np


def minmax_scaling(array, columns, min_val=0, max_val=1):
    """Min max scaling of pandas' DataFrames.

    Parameters
    ----------
    array : pandas DataFrame or NumPy ndarray, shape = [n_rows, n_columns].
    columns : array-like, shape = [n_columns]
        Array-like with column names, e.g., ['col1', 'col2', ...]
        or column indices [0, 2, 4, ...]
    min_val : `int` or `float`, optional (default=`0`)
        minimum value after rescaling.
    max_val : `int` or `float`, optional (default=`1`)
        maximum value after rescaling.

    Returns
    ----------
    df_new : pandas DataFrame object.
        Copy of the array or DataFrame with rescaled columns.

    Examples
    ----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/preprocessing/minmax_scaling/

    """
    ary_new = array.astype(float)
    if len(ary_new.shape) == 1:
        ary_new = ary_new[:, np.newaxis]

    if isinstance(ary_new, pd.DataFrame):
        ary_newt = ary_new.loc
    elif isinstance(ary_new, np.ndarray):
        ary_newt = ary_new
    else:
        raise AttributeError('Input array must be a pandas'
                             'DataFrame or NumPy array')

    numerator = ary_newt[:, columns] - ary_newt[:, columns].min(axis=0)
    denominator = (ary_newt[:, columns].max(axis=0) -
                   ary_newt[:, columns].min(axis=0))
    ary_newt[:, columns] = numerator / denominator

    if not min_val == 0 and not max_val == 1:
        ary_newt[:, columns] = (ary_newt[:, columns] *
                                (max_val - min_val) + min_val)

    return ary_newt[:, columns]


def standardize(array, columns=None, ddof=0, return_params=False, params=None):
    """Standardize columns in pandas DataFrames.

    Parameters
    ----------
    array : pandas DataFrame or NumPy ndarray, shape = [n_rows, n_columns].
    columns : array-like, shape = [n_columns] (default: None)
        Array-like with column names, e.g., ['col1', 'col2', ...]
        or column indices [0, 2, 4, ...]
        If None, standardizes all columns.
    ddof : int (default: 0)
        Delta Degrees of Freedom. The divisor used in calculations
        is N - ddof, where N represents the number of elements.
    return_params : dict (default: False)
        If set to True, a dictionary is returned in addition to the
        standardized array. The parameter dictionary contains the
        column means ('avgs') and standard deviations ('stds') of
        the individual columns.
    params : dict (default: None)
        A dictionary with column means and standard deviations as
        returned by the `standardize` function if `return_params`
        was set to True. If a `params` dictionary is provided, the
        `standardize` function will use these instead of computing
        them from the current array.

    Notes
    ----------
    If all values in a given column are the same, these values are all
    set to `0.0`. The standard deviation in the `parameters` dictionary
    is consequently set to `1.0` to avoid dividing by zero.

    Returns
    ----------
    df_new : pandas DataFrame object.
        Copy of the array or DataFrame with standardized columns.

    Examples
    ----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/preprocessing/standardize/

    """
    ary_new = array.astype(float)
    dim = ary_new.shape
    if len(dim) == 1:
        ary_new = ary_new[:, np.newaxis]

    if isinstance(ary_new, pd.DataFrame):
        ary_newt = ary_new.loc
        if columns is None:
            columns = ary_new.columns
    elif isinstance(ary_new, np.ndarray):
        ary_newt = ary_new
        if columns is None:
            columns = list(range(ary_new.shape[1]))

    else:
        raise AttributeError('Input array must be a pandas '
                             'DataFrame or NumPy array')

    if params is not None:
        parameters = params
    else:
        parameters = {'avgs': ary_newt[:, columns].mean(axis=0),
                      'stds': ary_newt[:, columns].std(axis=0, ddof=ddof)}
    are_constant = np.all(ary_newt[:, columns] == ary_newt[0, columns], axis=0)

    for c, b in zip(columns, are_constant):
        if b:
            ary_newt[:, c] = np.zeros(dim[0])
            parameters['stds'][c] = 1.0

    ary_newt[:, columns] = ((ary_newt[:, columns] - parameters['avgs']) /
                            parameters['stds'])

    if return_params:
        return ary_newt[:, columns], parameters
    else:
        return ary_newt[:, columns]