projektAI/venv/Lib/site-packages/mlxtend/preprocessing/transactionencoder.py

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from scipy.sparse import csr_matrix
from sklearn.base import BaseEstimator, TransformerMixin


class TransactionEncoder(BaseEstimator, TransformerMixin):
    """Encoder class for transaction data in Python lists

    Parameters
    ------------
    None

    Attributes
    ------------
    columns_: list
      List of unique names in the `X` input list of lists

    Examples
    ------------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/preprocessing/TransactionEncoder/

    """
    def __init__(self):
        return None

    def fit(self, X):
        """Learn unique column names from transaction DataFrame

        Parameters
        ------------
        X : list of lists
          A python list of lists, where the outer list stores the
          n transactions and the inner list stores the items in each
          transaction.

          For example,
          [['Apple', 'Beer', 'Rice', 'Chicken'],
           ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'],
           ['Apple', 'Bananas'],
           ['Milk', 'Beer', 'Rice', 'Chicken'],
           ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'],
           ['Apple', 'Bananas']]

        """
        unique_items = set()
        for transaction in X:
            for item in transaction:
                unique_items.add(item)
        self.columns_ = sorted(unique_items)
        columns_mapping = {}
        for col_idx, item in enumerate(self.columns_):
            columns_mapping[item] = col_idx
        self.columns_mapping_ = columns_mapping
        return self

    def transform(self, X, sparse=False):
        """Transform transactions into a one-hot encoded NumPy array.

        Parameters
        ------------
        X : list of lists
          A python list of lists, where the outer list stores the
          n transactions and the inner list stores the items in each
          transaction.

          For example,
          [['Apple', 'Beer', 'Rice', 'Chicken'],
           ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'],
           ['Apple', 'Bananas'],
           ['Milk', 'Beer', 'Rice', 'Chicken'],
           ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'],
           ['Apple', 'Bananas']]

        sparse: bool (default=False)
          If True, transform will return Compressed Sparse Row matrix
          instead of the regular one.

        Returns
        ------------
        array : NumPy array [n_transactions, n_unique_items]
           if sparse=False (default).
           Compressed Sparse Row matrix otherwise
           The one-hot encoded boolean array of the input transactions,
           where the columns represent the unique items found in the input
           array in alphabetic order. Exact representation depends
           on the sparse argument

           For example,
           array([[True , False, True , True , False, True ],
                  [True , False, True , False, False, True ],
                  [True , False, True , False, False, False],
                  [True , True , False, False, False, False],
                  [False, False, True , True , True , True ],
                  [False, False, True , False, True , True ],
                  [False, False, True , False, True , False],
                  [True , True , False, False, False, False]])
          The corresponding column labels are available as self.columns_, e.g.,
          ['Apple', 'Bananas', 'Beer', 'Chicken', 'Milk', 'Rice']
        """
        if sparse:
            indptr = [0]
            indices = []
            for transaction in X:
                # set is necessary because conversion to SparseDataFrame
                # will fail if there are duplicate items
                for item in set(transaction):
                    col_idx = self.columns_mapping_[item]
                    indices.append(col_idx)
                indptr.append(len(indices))
            non_sparse_values = [True]*len(indices)
            array = csr_matrix((non_sparse_values, indices, indptr),
                               dtype=bool)
        else:
            array = np.zeros((len(X), len(self.columns_)), dtype=bool)
            for row_idx, transaction in enumerate(X):
                for item in transaction:
                    col_idx = self.columns_mapping_[item]
                    array[row_idx, col_idx] = True
        return array

    def inverse_transform(self, array):
        """Transforms an encoded NumPy array back into transactions.

        Parameters
        ------------
        array : NumPy array [n_transactions, n_unique_items]
            The NumPy one-hot encoded boolean array of the input transactions,
            where the columns represent the unique items found in the input
            array in alphabetic order

            For example,
            ```
            array([[True , False, True , True , False, True ],
                  [True , False, True , False, False, True ],
                  [True , False, True , False, False, False],
                  [True , True , False, False, False, False],
                  [False, False, True , True , True , True ],
                  [False, False, True , False, True , True ],
                  [False, False, True , False, True , False],
                  [True , True , False, False, False, False]])
            ```
            The corresponding column labels are available as self.columns_,
            e.g., ['Apple', 'Bananas', 'Beer', 'Chicken', 'Milk', 'Rice']

        Returns
        ------------
        X : list of lists
            A python list of lists, where the outer list stores the
            n transactions and the inner list stores the items in each
            transaction.

          For example,
          ```
          [['Apple', 'Beer', 'Rice', 'Chicken'],
           ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'],
           ['Apple', 'Bananas'],
           ['Milk', 'Beer', 'Rice', 'Chicken'],
           ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'],
           ['Apple', 'Bananas']]
          ```

        """
        return [[self.columns_[idx]
                 for idx, cell in enumerate(row) if cell]
                for row in array]

    def fit_transform(self, X, sparse=False):
        """Fit a TransactionEncoder encoder and transform a dataset."""
        return self.fit(X).transform(X, sparse=sparse)
Działa 2021-06-06 22:13:05 +02:00			`# Sebastian Raschka 2014-2020`
			`# mlxtend Machine Learning Library Extensions`
			`# Author: Sebastian Raschka <sebastianraschka.com>`
			`#`
			`# License: BSD 3 clause`

			`import numpy as np`
			`from scipy.sparse import csr_matrix`
			`from sklearn.base import BaseEstimator, TransformerMixin`


			`class TransactionEncoder(BaseEstimator, TransformerMixin):`
			`"""Encoder class for transaction data in Python lists`

			`Parameters`
			`------------`
			`None`

			`Attributes`
			`------------`
			`columns_: list`
			List of unique names in the `X` input list of lists

			`Examples`
			`------------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/preprocessing/TransactionEncoder/`

			`"""`
			`def __init__(self):`
			`return None`

			`def fit(self, X):`
			`"""Learn unique column names from transaction DataFrame`

			`Parameters`
			`------------`
			`X : list of lists`
			`A python list of lists, where the outer list stores the`
			`n transactions and the inner list stores the items in each`
			`transaction.`

			`For example,`
			`[['Apple', 'Beer', 'Rice', 'Chicken'],`
			`['Apple', 'Beer', 'Rice'],`
			`['Apple', 'Beer'],`
			`['Apple', 'Bananas'],`
			`['Milk', 'Beer', 'Rice', 'Chicken'],`
			`['Milk', 'Beer', 'Rice'],`
			`['Milk', 'Beer'],`
			`['Apple', 'Bananas']]`

			`"""`
			`unique_items = set()`
			`for transaction in X:`
			`for item in transaction:`
			`unique_items.add(item)`
			`self.columns_ = sorted(unique_items)`
			`columns_mapping = {}`
			`for col_idx, item in enumerate(self.columns_):`
			`columns_mapping[item] = col_idx`
			`self.columns_mapping_ = columns_mapping`
			`return self`

			`def transform(self, X, sparse=False):`
			`"""Transform transactions into a one-hot encoded NumPy array.`

			`Parameters`
			`------------`
			`X : list of lists`
			`A python list of lists, where the outer list stores the`
			`n transactions and the inner list stores the items in each`
			`transaction.`

			`For example,`
			`[['Apple', 'Beer', 'Rice', 'Chicken'],`
			`['Apple', 'Beer', 'Rice'],`
			`['Apple', 'Beer'],`
			`['Apple', 'Bananas'],`
			`['Milk', 'Beer', 'Rice', 'Chicken'],`
			`['Milk', 'Beer', 'Rice'],`
			`['Milk', 'Beer'],`
			`['Apple', 'Bananas']]`

			`sparse: bool (default=False)`
			`If True, transform will return Compressed Sparse Row matrix`
			`instead of the regular one.`

			`Returns`
			`------------`
			`array : NumPy array [n_transactions, n_unique_items]`
			`if sparse=False (default).`
			`Compressed Sparse Row matrix otherwise`
			`The one-hot encoded boolean array of the input transactions,`
			`where the columns represent the unique items found in the input`
			`array in alphabetic order. Exact representation depends`
			`on the sparse argument`

			`For example,`
			`array([[True , False, True , True , False, True ],`
			`[True , False, True , False, False, True ],`
			`[True , False, True , False, False, False],`
			`[True , True , False, False, False, False],`
			`[False, False, True , True , True , True ],`
			`[False, False, True , False, True , True ],`
			`[False, False, True , False, True , False],`
			`[True , True , False, False, False, False]])`
			`The corresponding column labels are available as self.columns_, e.g.,`
			`['Apple', 'Bananas', 'Beer', 'Chicken', 'Milk', 'Rice']`
			`"""`
			`if sparse:`
			`indptr = [0]`
			`indices = []`
			`for transaction in X:`
			`# set is necessary because conversion to SparseDataFrame`
			`# will fail if there are duplicate items`
			`for item in set(transaction):`
			`col_idx = self.columns_mapping_[item]`
			`indices.append(col_idx)`
			`indptr.append(len(indices))`
			`non_sparse_values = [True]*len(indices)`
			`array = csr_matrix((non_sparse_values, indices, indptr),`
			`dtype=bool)`
			`else:`
			`array = np.zeros((len(X), len(self.columns_)), dtype=bool)`
			`for row_idx, transaction in enumerate(X):`
			`for item in transaction:`
			`col_idx = self.columns_mapping_[item]`
			`array[row_idx, col_idx] = True`
			`return array`

			`def inverse_transform(self, array):`
			`"""Transforms an encoded NumPy array back into transactions.`

			`Parameters`
			`------------`
			`array : NumPy array [n_transactions, n_unique_items]`
			`The NumPy one-hot encoded boolean array of the input transactions,`
			`where the columns represent the unique items found in the input`
			`array in alphabetic order`

			`For example,`
			```
			`array([[True , False, True , True , False, True ],`
			`[True , False, True , False, False, True ],`
			`[True , False, True , False, False, False],`
			`[True , True , False, False, False, False],`
			`[False, False, True , True , True , True ],`
			`[False, False, True , False, True , True ],`
			`[False, False, True , False, True , False],`
			`[True , True , False, False, False, False]])`
			```
			`The corresponding column labels are available as self.columns_,`
			`e.g., ['Apple', 'Bananas', 'Beer', 'Chicken', 'Milk', 'Rice']`

			`Returns`
			`------------`
			`X : list of lists`
			`A python list of lists, where the outer list stores the`
			`n transactions and the inner list stores the items in each`
			`transaction.`

			`For example,`
			```
			`[['Apple', 'Beer', 'Rice', 'Chicken'],`
			`['Apple', 'Beer', 'Rice'],`
			`['Apple', 'Beer'],`
			`['Apple', 'Bananas'],`
			`['Milk', 'Beer', 'Rice', 'Chicken'],`
			`['Milk', 'Beer', 'Rice'],`
			`['Milk', 'Beer'],`
			`['Apple', 'Bananas']]`
			```

			`"""`
			`return [[self.columns_[idx]`
			`for idx, cell in enumerate(row) if cell]`
			`for row in array]`

			`def fit_transform(self, X, sparse=False):`
			`"""Fit a TransactionEncoder encoder and transform a dataset."""`
			`return self.fit(X).transform(X, sparse=sparse)`