koho.cpp  1.0.0
decision_forest.h
Go to the documentation of this file.
1 
10 // Author: AI Werkstatt (TM)
11 // (C) Copyright 2019, AI Werkstatt (TM) www.aiwerkstatt.com. All rights reserved.
12 
13 #ifndef KOHO_DECISION_FOREST_H
14 #define KOHO_DECISION_FOREST_H
15 
16 #include "decision_tree.h"
17 
18 namespace koho {
19 
20 // =============================================================================
21 // Decision Forest Classifier
22 // =============================================================================
23 
26 
27  protected:
28  std::vector<std::string> classes;
30  std::vector<std::string> features;
32 
33  // Hyperparameters
34  unsigned long n_estimators;
35  bool bootstrap;
36  bool oob_score;
37  std::string class_balance;
40  unsigned long max_thresholds;
41  std::string missing_values;
42 
43  // Random Number Generator
45 
46  // Model
47  std::vector<DecisionTreeClassifier> dtc_; // underlying sub-estimators
48 
49  // Performance Characteristics
50  double oob_score_; // Out_Of-Bag estimate
51 
52  public:
54 
115  DecisionForestClassifier(std::vector<std::string> classes,
116  ClassesIdx_t n_classes,
117  std::vector<std::string> features,
118  FeaturesIdx_t n_features,
119  unsigned long n_estimators = 100,
120  bool bootstrap = false,
121  bool oob_score = false,
122  std::string const& class_balance = "balanced",
123  TreeDepthIdx_t max_depth = 3,
124  FeaturesIdx_t max_features = 0,
125  unsigned long max_thresholds = 0,
126  std::string const& missing_values = "None",
127  long random_state_seed = 0);
128 
130 
135  void fit(Features_t* X,
136  Classes_t* y,
137  SamplesIdx_t n_samples);
138 
140 
145  void predict_proba(Features_t *X,
146  SamplesIdx_t n_samples,
147  double *y_prob);
148 
150 
155  void predict(Features_t *X,
156  SamplesIdx_t n_samples,
157  Classes_t *y);
158 
160 
166  double score(Features_t* X,
167  Classes_t* y,
168  SamplesIdx_t n_samples);
169 
171 
176  void calculate_feature_importances(double* importances);
177 
179 
202  void export_graphviz(std::string const& filename, bool rotate=false);
203 
205 
214  void export_graphviz(std::string const& filename, unsigned long e, bool rotate);
215 
217 
220  std::string export_text(unsigned long e);
221 
224 
228  void export_serialize(std::string const& filename);
229 
232 
236  static DecisionForestClassifier import_deserialize(std::string const& filename);
237 
239  void serialize(std::ofstream& fout);
241  static DecisionForestClassifier deserialize(std::ifstream& fin);
242  };
243 
244 } // namespace koho
245 
246 #endif
void serialize(std::ofstream &fout)
Serialize.
Definition: decision_forest.cpp:390
Definition: decision_forest.cpp:20
unsigned long ClassesIdx_t
Definition: decision_tree.h:46
unsigned long TreeDepthIdx_t
Definition: decision_tree.h:48
unsigned long max_thresholds
Definition: decision_forest.h:40
RandomState random_state
Definition: decision_forest.h:44
unsigned long FeaturesIdx_t
Definition: decision_tree.h:45
std::string export_text(unsigned long e)
Export of a decision tree from a decision forest in a simple text format.
Definition: decision_forest.cpp:382
A decision forest classifier.
Definition: decision_forest.h:25
Decision Tree module.
A random number generator.
Definition: random_number_generator.h:20
std::vector< std::string > features
Definition: decision_forest.h:30
double oob_score_
Definition: decision_forest.h:50
FeaturesIdx_t n_features
Definition: decision_forest.h:31
void calculate_feature_importances(double *importances)
Calculate feature importances from the decision forest.
Definition: decision_forest.cpp:340
std::vector< std::string > classes
Definition: decision_forest.h:28
static DecisionForestClassifier import_deserialize(std::string const &filename)
Definition: decision_forest.cpp:537
void predict_proba(Features_t *X, SamplesIdx_t n_samples, double *y_prob)
Predict classes probabilities for the test data.
Definition: decision_forest.cpp:276
void export_graphviz(std::string const &filename, bool rotate=false)
Export of a decision forest as individual decision trees in GraphViz dot format.
Definition: decision_forest.cpp:364
std::string class_balance
Definition: decision_forest.h:37
double score(Features_t *X, Classes_t *y, SamplesIdx_t n_samples)
Calculate score for the test data.
Definition: decision_forest.cpp:323
TreeDepthIdx_t max_depth
Definition: decision_forest.h:38
long Classes_t
Definition: decision_tree.h:39
bool bootstrap
Definition: decision_forest.h:35
ClassesIdx_t n_classes
Definition: decision_forest.h:29
bool oob_score
Definition: decision_forest.h:36
void export_serialize(std::string const &filename)
Definition: decision_forest.cpp:433
void predict(Features_t *X, SamplesIdx_t n_samples, Classes_t *y)
Predict classes for the test data.
Definition: decision_forest.cpp:308
static DecisionForestClassifier deserialize(std::ifstream &fin)
Deserialize.
Definition: decision_forest.cpp:462
unsigned long n_estimators
Definition: decision_forest.h:34
DecisionForestClassifier(std::vector< std::string > classes, ClassesIdx_t n_classes, std::vector< std::string > features, FeaturesIdx_t n_features, unsigned long n_estimators=100, bool bootstrap=false, bool oob_score=false, std::string const &class_balance="balanced", TreeDepthIdx_t max_depth=3, FeaturesIdx_t max_features=0, unsigned long max_thresholds=0, std::string const &missing_values="None", long random_state_seed=0)
Create and initialize a new decision forest classifier.
Definition: decision_forest.cpp:28
double Features_t
Definition: decision_tree.h:38
std::string missing_values
Definition: decision_forest.h:41
void fit(Features_t *X, Classes_t *y, SamplesIdx_t n_samples)
Build a decision forest classifier from the training data.
Definition: decision_forest.cpp:106
std::vector< DecisionTreeClassifier > dtc_
Definition: decision_forest.h:47
unsigned long SamplesIdx_t
Definition: decision_tree.h:44
FeaturesIdx_t max_features
Definition: decision_forest.h:39