Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

df_reg_traverse_model.py

1 # file: df_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 #
19 # ! Content:
20 # ! Python example of decision forest regression model traversal.
21 # !
22 # ! The program trains the decision forest regression model on a training
23 # ! datasetFileName and prints the trained model by its depth-first traversing.
24 # !*****************************************************************************
25 
26 #
27 
28 
29 #
30 from __future__ import print_function
31 
32 from daal import algorithms
33 from daal.algorithms import decision_forest
34 import daal.algorithms.decision_forest.regression
35 import daal.algorithms.decision_forest.regression.training
36 
37 from daal.data_management import (
38  FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable, features
39 )
40 
41 # Input data set parameters
42 trainDatasetFileName = "../data/batch/df_regression_train.csv"
43 categoricalFeaturesIndices = [3]
44 nFeatures = 13 # Number of features in training and testing data sets
45 
46 # Decision forest parameters
47 nTrees = 2
48 
49 
50 def trainModel():
51 
52  # Create Numeric Tables for training data and dependent variables
53  trainData, trainDependentVariable = loadData(trainDatasetFileName)
54 
55  # Create an algorithm object to train the decision forest regression model with the default method
56  algorithm = decision_forest.regression.training.Batch()
57 
58  # Pass a training data set and dependent values to the algorithm
59  algorithm.input.set(decision_forest.regression.training.data, trainData)
60  algorithm.input.set(decision_forest.regression.training.dependentVariable, trainDependentVariable)
61 
62  algorithm.parameter.nTrees = nTrees
63 
64  # Build the decision forest regression model and return the result
65  return algorithm.compute()
66 
67 
68 def loadData(fileName):
69 
70  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
71  trainDataSource = FileDataSource(
72  fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
73  )
74 
75  # Create Numeric Tables for training data and dependent variables
76  data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
77  dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
78  mergedData = MergedNumericTable(data, dependentVar)
79 
80  # Retrieve the data from input file
81  trainDataSource.loadDataBlock(mergedData)
82 
83  dictionary = data.getDictionary()
84  for i in range(len(categoricalFeaturesIndices)):
85  dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL
86 
87  return data, dependentVar
88 
89 
90 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
91 class PrintNodeVisitor(algorithms.regression.TreeNodeVisitor):
92 
93  def __init__(self):
94  super(PrintNodeVisitor, self).__init__()
95 
96  def onLeafNode(self, level, response):
97 
98  for i in range(level):
99  print(" ", end='')
100  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
101  return True
102 
103 
104  def onSplitNode(self, level, featureIndex, featureValue):
105 
106  for i in range(level):
107  print(" ", end='')
108  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
109  return True
110 
111 
112 def printModel(m):
113  visitor = PrintNodeVisitor()
114  print("Number of trees: {}".format(m.getNumberOfTrees()))
115  for i in range(m.getNumberOfTrees()):
116  print("Tree #{}".format(i))
117  m.traverseDF(i, visitor)
118 
119 if __name__ == "__main__":
120 
121  trainingResult = trainModel()
122  printModel(trainingResult.get(decision_forest.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.