koho.cpp  1.1.0
Public Member Functions | Protected Attributes | List of all members
koho::GiniCriterion Class Reference

Gini Index impurity criterion. More...

#include <decision_tree.h>

Collaboration diagram for koho::GiniCriterion:
Collaboration graph
[legend]

Public Member Functions

 GiniCriterion (OutputsIdx_t n_outputs, ClassesIdx_t *n_classes, ClassesIdx_t n_classes_max, SamplesIdx_t n_samples, ClassWeights_t *class_weight)
 Create and initialize a new gini criterion. More...
 
void calculate_node_histogram (Classes_t *y, std::vector< SamplesIdx_t > &samples, SamplesIdx_t start, SamplesIdx_t end)
 Calculate weighted class histograms for all outputs for current node. More...
 
double calculate_impurity (std::vector< Histogram_t > &histogram)
 Calculate impurity of weighted class histogram using the Gini criterion. More...
 
void calculate_node_impurity ()
 Calculate impurity for all outputs of the current node. More...
 
void calculate_NA_histogram (Classes_t *y, std::vector< SamplesIdx_t > &samples, SamplesIdx_t pos)
 Calculate class histograms for all outputs for the samples with missing values and the samples with values. More...
 
void calculate_NA_impurity ()
 Calculate impurity for all outputs of samples with missing values and samples with values. More...
 
double calculate_NA_impurity_improvement ()
 
void init_threshold_histograms ()
 
void init_threshold_values_histograms ()
 
void update_threshold_histograms (Classes_t *y, std::vector< SamplesIdx_t > &samples, SamplesIdx_t new_pos)
 
void calculate_threshold_impurity ()
 Calculate impurity for all outputs of samples with values that are smaller and greater than a threshold. More...
 
void calculate_threshold_NA_impurity ()
 
double calculate_threshold_impurity_improvement ()
 
double calculate_threshold_values_impurity_improvement ()
 
double calculate_threshold_NA_left_impurity_improvement ()
 
double calculate_threshold_NA_right_impurity_improvement ()
 
std::vector< std::vector< Histogram_t > > get_node_weighted_histogram ()
 
double get_node_impurity ()
 
double get_node_impurity_NA ()
 
double get_node_impurity_values ()
 
double get_node_impurity_threshold_left ()
 
double get_node_impurity_threshold_right ()
 

Protected Attributes

OutputsIdx_t n_outputs
 
ClassesIdx_tn_classes
 
ClassesIdx_t n_classes_max
 
SamplesIdx_t n_samples
 
ClassWeights_tclass_weight
 
std::vector< std::vector< Histogram_t > > node_weighted_histogram
 
std::vector< Histogram_tnode_weighted_n_samples
 
std::vector< double > node_impurity
 
std::vector< std::vector< Histogram_t > > node_weighted_histogram_NA
 
std::vector< Histogram_tnode_weighted_n_samples_NA
 
std::vector< double > node_impurity_NA
 
std::vector< std::vector< Histogram_t > > node_weighted_histogram_values
 
std::vector< Histogram_tnode_weighted_n_samples_values
 
std::vector< double > node_impurity_values
 
SamplesIdx_t node_pos_NA
 
std::vector< std::vector< Histogram_t > > node_weighted_histogram_threshold_left
 
std::vector< Histogram_tnode_weighted_n_samples_threshold_left
 
std::vector< double > node_impurity_threshold_left
 
std::vector< Histogram_tnode_weighted_n_samples_threshold_left_NA
 
std::vector< double > node_impurity_threshold_left_NA
 
std::vector< std::vector< Histogram_t > > node_weighted_histogram_threshold_right
 
std::vector< Histogram_tnode_weighted_n_samples_threshold_right
 
std::vector< double > node_impurity_threshold_right
 
std::vector< Histogram_tnode_weighted_n_samples_threshold_right_NA
 
std::vector< double > node_impurity_threshold_right_NA
 
SamplesIdx_t node_pos_threshold
 

Detailed Description

Gini Index impurity criterion.

Constructor & Destructor Documentation

◆ GiniCriterion()

koho::GiniCriterion::GiniCriterion ( OutputsIdx_t  n_outputs,
ClassesIdx_t n_classes,
ClassesIdx_t  n_classes_max,
SamplesIdx_t  n_samples,
ClassWeights_t class_weight 
)

Create and initialize a new gini criterion.

Assuming: y[o] is 0, 1, 2, ... (n_classes[o] - 1) for all outputs o.

Member Function Documentation

◆ calculate_impurity()

double koho::GiniCriterion::calculate_impurity ( std::vector< Histogram_t > &  histogram)

Calculate impurity of weighted class histogram using the Gini criterion.

◆ calculate_NA_histogram()

void koho::GiniCriterion::calculate_NA_histogram ( Classes_t y,
std::vector< SamplesIdx_t > &  samples,
SamplesIdx_t  pos 
)

Calculate class histograms for all outputs for the samples with missing values and the samples with values.

Assuming: number of missing values > 0

◆ calculate_NA_impurity()

void koho::GiniCriterion::calculate_NA_impurity ( )

Calculate impurity for all outputs of samples with missing values and samples with values.

Assuming: number of missing values > 0
Assuming: calculate_NA_histogram()

◆ calculate_NA_impurity_improvement()

double koho::GiniCriterion::calculate_NA_impurity_improvement ( )

Calculate impurity improvement over all outputs from the current node to its children assuming a split between missing values and values. Assuming: number of missing values > 0
Assuming: calculate_node_impurity(), calculate_NA_impurity()

◆ calculate_node_histogram()

void koho::GiniCriterion::calculate_node_histogram ( Classes_t y,
std::vector< SamplesIdx_t > &  samples,
SamplesIdx_t  start,
SamplesIdx_t  end 
)

Calculate weighted class histograms for all outputs for current node.

◆ calculate_node_impurity()

void koho::GiniCriterion::calculate_node_impurity ( )

Calculate impurity for all outputs of the current node.

Assuming: calculate_node_histogram()

◆ calculate_threshold_impurity()

void koho::GiniCriterion::calculate_threshold_impurity ( )

Calculate impurity for all outputs of samples with values that are smaller and greater than a threshold.

Assuming: update_threshold_histograms()

◆ calculate_threshold_impurity_improvement()

double koho::GiniCriterion::calculate_threshold_impurity_improvement ( )

Calculate the impurity improvement over all outputs from the current node to its children assuming a split of the samples with values smaller and greater than a threshold in the case that all samples have values. Assuming: calculate_node_impurity(), calculate_threshold_impurity()

◆ calculate_threshold_NA_impurity()

void koho::GiniCriterion::calculate_threshold_NA_impurity ( )

Calculate the impurity for all outputs of samples with values that are smaller and greater than a threshold and passing on the samples with missing values. Assuming: number of missing values > 0
Assuming: update_threshold_histograms(), calculate_NA_histograms()

◆ calculate_threshold_NA_left_impurity_improvement()

double koho::GiniCriterion::calculate_threshold_NA_left_impurity_improvement ( )

Calculate the impurity improvement over all outputs from the current node to its children assuming a split of the samples with values smaller and greater than a threshold and passing on the samples with missing values to the left child. Assuming: calculate_NA_impurity(), calculate_threshold_impurity(), calculate_threshold_NA_impurity()

◆ calculate_threshold_NA_right_impurity_improvement()

double koho::GiniCriterion::calculate_threshold_NA_right_impurity_improvement ( )

Calculate the impurity improvement over all outputs from the current node to its children assuming a split of the samples with values smaller and greater than a threshold and passing on the samples with missing values to the right child. Assuming: calculate_NA_impurity(), calculate_threshold_impurity(), calculate_threshold_NA_impurity()

◆ calculate_threshold_values_impurity_improvement()

double koho::GiniCriterion::calculate_threshold_values_impurity_improvement ( )

Calculate the impurity improvement over all outputs from the current node to its children assuming a split of the samples with values smaller and greater than a threshold in the case that there are also samples with missing values. Assuming: calculate_NA_impurity(), calculate_threshold_impurity()

◆ get_node_impurity()

double koho::GiniCriterion::get_node_impurity ( )
inline

◆ get_node_impurity_NA()

double koho::GiniCriterion::get_node_impurity_NA ( )
inline

◆ get_node_impurity_threshold_left()

double koho::GiniCriterion::get_node_impurity_threshold_left ( )
inline

◆ get_node_impurity_threshold_right()

double koho::GiniCriterion::get_node_impurity_threshold_right ( )
inline

◆ get_node_impurity_values()

double koho::GiniCriterion::get_node_impurity_values ( )
inline

◆ get_node_weighted_histogram()

std::vector<std::vector<Histogram_t> > koho::GiniCriterion::get_node_weighted_histogram ( )
inline

◆ init_threshold_histograms()

void koho::GiniCriterion::init_threshold_histograms ( )

Initialize class histograms for all outputs for using a threshold on samples with values, in the case that all samples have values. Assuming: calculate_node_histogram()

◆ init_threshold_values_histograms()

void koho::GiniCriterion::init_threshold_values_histograms ( )

Initialize class histograms for all outputs for using a threshold on samples with values, in the case that there are also samples with missing values. Assuming: number of missing values > 0
Assuming: calculate_NA_histogram()

◆ update_threshold_histograms()

void koho::GiniCriterion::update_threshold_histograms ( Classes_t y,
std::vector< SamplesIdx_t > &  samples,
SamplesIdx_t  new_pos 
)

Update class histograms for all outputs for using a threshold on values, from current position to the new position (correspond to thresholds). Assuming: new_pos > pos
Assuming: init_threshold_histograms() or init_threshold_values_histograms()

Member Data Documentation

◆ class_weight

ClassWeights_t* koho::GiniCriterion::class_weight
protected

◆ n_classes

ClassesIdx_t* koho::GiniCriterion::n_classes
protected

◆ n_classes_max

ClassesIdx_t koho::GiniCriterion::n_classes_max
protected

◆ n_outputs

OutputsIdx_t koho::GiniCriterion::n_outputs
protected

◆ n_samples

SamplesIdx_t koho::GiniCriterion::n_samples
protected

◆ node_impurity

std::vector<double> koho::GiniCriterion::node_impurity
protected

◆ node_impurity_NA

std::vector<double> koho::GiniCriterion::node_impurity_NA
protected

◆ node_impurity_threshold_left

std::vector<double> koho::GiniCriterion::node_impurity_threshold_left
protected

◆ node_impurity_threshold_left_NA

std::vector<double> koho::GiniCriterion::node_impurity_threshold_left_NA
protected

◆ node_impurity_threshold_right

std::vector<double> koho::GiniCriterion::node_impurity_threshold_right
protected

◆ node_impurity_threshold_right_NA

std::vector<double> koho::GiniCriterion::node_impurity_threshold_right_NA
protected

◆ node_impurity_values

std::vector<double> koho::GiniCriterion::node_impurity_values
protected

◆ node_pos_NA

SamplesIdx_t koho::GiniCriterion::node_pos_NA
protected

◆ node_pos_threshold

SamplesIdx_t koho::GiniCriterion::node_pos_threshold
protected

◆ node_weighted_histogram

std::vector<std::vector<Histogram_t> > koho::GiniCriterion::node_weighted_histogram
protected

◆ node_weighted_histogram_NA

std::vector<std::vector<Histogram_t> > koho::GiniCriterion::node_weighted_histogram_NA
protected

◆ node_weighted_histogram_threshold_left

std::vector<std::vector<Histogram_t> > koho::GiniCriterion::node_weighted_histogram_threshold_left
protected

◆ node_weighted_histogram_threshold_right

std::vector<std::vector<Histogram_t> > koho::GiniCriterion::node_weighted_histogram_threshold_right
protected

◆ node_weighted_histogram_values

std::vector<std::vector<Histogram_t> > koho::GiniCriterion::node_weighted_histogram_values
protected

◆ node_weighted_n_samples

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples
protected

◆ node_weighted_n_samples_NA

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples_NA
protected

◆ node_weighted_n_samples_threshold_left

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples_threshold_left
protected

◆ node_weighted_n_samples_threshold_left_NA

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples_threshold_left_NA
protected

◆ node_weighted_n_samples_threshold_right

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples_threshold_right
protected

◆ node_weighted_n_samples_threshold_right_NA

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples_threshold_right_NA
protected

◆ node_weighted_n_samples_values

std::vector<Histogram_t> koho::GiniCriterion::node_weighted_n_samples_values
protected

The documentation for this class was generated from the following files: