Inzynierka/Lib/site-packages/sklearn/tree/_tree.pxd
2023-06-02 12:51:02 +02:00

104 lines
4.5 KiB
Cython

# Authors: Gilles Louppe <g.louppe@gmail.com>
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Brian Holt <bdholt1@gmail.com>
# Joel Nothman <joel.nothman@gmail.com>
# Arnaud Joly <arnaud.v.joly@gmail.com>
# Jacob Schreiber <jmschreiber91@gmail.com>
# Nelson Liu <nelson@nelsonliu.me>
#
# License: BSD 3 clause
# See _tree.pyx for details.
import numpy as np
cimport numpy as cnp
ctypedef cnp.npy_float32 DTYPE_t # Type of X
ctypedef cnp.npy_float64 DOUBLE_t # Type of y, sample_weight
ctypedef cnp.npy_intp SIZE_t # Type for indices and counters
ctypedef cnp.npy_int32 INT32_t # Signed 32 bit integer
ctypedef cnp.npy_uint32 UINT32_t # Unsigned 32 bit integer
from ._splitter cimport Splitter
from ._splitter cimport SplitRecord
cdef struct Node:
# Base storage structure for the nodes in a Tree object
SIZE_t left_child # id of the left child of the node
SIZE_t right_child # id of the right child of the node
SIZE_t feature # Feature used for splitting the node
DOUBLE_t threshold # Threshold value at the node
DOUBLE_t impurity # Impurity of the node (i.e., the value of the criterion)
SIZE_t n_node_samples # Number of samples at the node
DOUBLE_t weighted_n_node_samples # Weighted number of samples at the node
cdef class Tree:
# The Tree object is a binary tree structure constructed by the
# TreeBuilder. The tree structure is used for predictions and
# feature importances.
# Input/Output layout
cdef public SIZE_t n_features # Number of features in X
cdef SIZE_t* n_classes # Number of classes in y[:, k]
cdef public SIZE_t n_outputs # Number of outputs in y
cdef public SIZE_t max_n_classes # max(n_classes)
# Inner structures: values are stored separately from node structure,
# since size is determined at runtime.
cdef public SIZE_t max_depth # Max depth of the tree
cdef public SIZE_t node_count # Counter for node IDs
cdef public SIZE_t capacity # Capacity of tree, in terms of nodes
cdef Node* nodes # Array of nodes
cdef double* value # (capacity, n_outputs, max_n_classes) array of values
cdef SIZE_t value_stride # = n_outputs * max_n_classes
# Methods
cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
SIZE_t feature, double threshold, double impurity,
SIZE_t n_node_samples,
double weighted_n_node_samples) nogil except -1
cdef int _resize(self, SIZE_t capacity) nogil except -1
cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1
cdef cnp.ndarray _get_value_ndarray(self)
cdef cnp.ndarray _get_node_ndarray(self)
cpdef cnp.ndarray predict(self, object X)
cpdef cnp.ndarray apply(self, object X)
cdef cnp.ndarray _apply_dense(self, object X)
cdef cnp.ndarray _apply_sparse_csr(self, object X)
cpdef object decision_path(self, object X)
cdef object _decision_path_dense(self, object X)
cdef object _decision_path_sparse_csr(self, object X)
cpdef compute_feature_importances(self, normalize=*)
# =============================================================================
# Tree builder
# =============================================================================
cdef class TreeBuilder:
# The TreeBuilder recursively builds a Tree object from training samples,
# using a Splitter object for splitting internal nodes and assigning
# values to leaves.
#
# This class controls the various stopping criteria and the node splitting
# evaluation order, e.g. depth-first or best-first.
cdef Splitter splitter # Splitting algorithm
cdef SIZE_t min_samples_split # Minimum number of samples in an internal node
cdef SIZE_t min_samples_leaf # Minimum number of samples in a leaf
cdef double min_weight_leaf # Minimum weight in a leaf
cdef SIZE_t max_depth # Maximal tree depth
cdef double min_impurity_decrease # Impurity threshold for early stopping
cpdef build(self, Tree tree, object X, cnp.ndarray y,
cnp.ndarray sample_weight=*)
cdef _check_input(self, object X, cnp.ndarray y, cnp.ndarray sample_weight)