Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

dt_reg_traverse_model.py

1 # file: dt_reg_traverse_model.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 #
19 # ! Content:
20 # ! C++ example of decision tree classification model traversal.
21 # !
22 # ! The program trains the decision tree classification model on a training
23 # ! datasetFileName and prints the trained model by its depth-first traversing.
24 # !*****************************************************************************
25 
26 #
27 
28 
29 #
30 
31 from __future__ import print_function
32 
33 from daal.algorithms import regression
34 from daal.algorithms import decision_tree
35 import daal.algorithms.decision_tree.regression
36 import daal.algorithms.decision_tree.regression.training
37 
38 from daal.data_management import FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
39 
40 # Input data set parameters
41 trainDatasetFileName = "../data/batch/decision_tree_train.csv"
42 pruneDatasetFileName = "../data/batch/decision_tree_prune.csv"
43 
44 nFeatures = 5 # Number of features in training and testing data sets
45 
46 
47 def trainModel():
48 
49  # Initialize FileDataSource to retrieve the input data from a .csv file
50  trainDataSource = FileDataSource(
51  trainDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
52  )
53 
54  # Create Numeric Tables for training data and dependent variables
55  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
56  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
57  mergedData = MergedNumericTable(trainData, trainGroundTruth)
58 
59  # Retrieve the data from the input file
60  trainDataSource.loadDataBlock(mergedData)
61 
62  # Initialize FileDataSource<CSVFeatureManager> to retrieve the pruning input data from a .csv file
63  pruneDataSource = FileDataSource(
64  pruneDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
65  )
66 
67  # Create Numeric Tables for pruning data and dependent variables
68  pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
69  pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
70  pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
71 
72  # Retrieve the data from the pruning input file
73  pruneDataSource.loadDataBlock(pruneMergedData)
74 
75  # Create an algorithm object to train the Decision tree model
76  algorithm = decision_tree.regression.training.Batch()
77 
78  # Pass the training data set, dependent variables, and pruning dataset with dependent variables to the algorithm
79  algorithm.input.set(decision_tree.regression.training.data, trainData)
80  algorithm.input.set(decision_tree.regression.training.dependentVariables, trainGroundTruth)
81  algorithm.input.set(decision_tree.regression.training.dataForPruning, pruneData)
82  algorithm.input.set(decision_tree.regression.training.dependentVariablesForPruning, pruneGroundTruth)
83 
84  # Train the Decision tree model and return the results
85  return algorithm.compute()
86 
87 
88 # Visitor class implementing NodeVisitor interface, prints out tree nodes of the model when it is called back by model traversal method
89 class PrintNodeVisitor(regression.TreeNodeVisitor):
90 
91  def __init__(self):
92  super(PrintNodeVisitor, self).__init__()
93 
94  def onLeafNode(self, level, response):
95 
96  for i in range(level):
97  print(" ", end='')
98  print("Level {}, leaf node. Response value = {:.4g}".format(level, response))
99  return True
100 
101 
102  def onSplitNode(self, level, featureIndex, featureValue):
103 
104  for i in range(level):
105  print(" ", end='')
106  print("Level {}, split node. Feature index = {}, feature value = {:.4g}".format(level, featureIndex, featureValue))
107  return True
108 
109 
110 def printModel(m):
111  visitor = PrintNodeVisitor()
112  m.traverseDF(visitor)
113 
114 if __name__ == "__main__":
115 
116  trainingResult = trainModel()
117  printModel(trainingResult.get(decision_tree.regression.training.model))

For more complete information about compiler optimizations, see our Optimization Notice.