24 from daal.algorithms
import decision_forest
25 from daal.algorithms.decision_forest.classification
import prediction, training
26 from daal.algorithms
import classifier
27 from daal.data_management
import (
28 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
29 MergedNumericTable, features
32 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
33 if utils_folder
not in sys.path:
34 sys.path.insert(0, utils_folder)
35 from utils
import printNumericTable, printNumericTables
37 DAAL_PREFIX = os.path.join(
'..',
'data')
40 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_train.csv')
41 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_test.csv')
48 minObservationsInLeafNode = 8
52 predictionResult =
None
53 testGroundTruth =
None
60 trainDataSource = FileDataSource(
62 DataSourceIface.notAllocateNumericTable,
63 DataSourceIface.doDictionaryFromContext
67 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
68 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
69 mergedData = MergedNumericTable(trainData, trainGroundTruth)
72 trainDataSource.loadDataBlock(mergedData)
75 dict = trainData.getDictionary()
78 dict[0].featureType = features.DAAL_CONTINUOUS
79 dict[1].featureType = features.DAAL_CONTINUOUS
80 dict[2].featureType = features.DAAL_CATEGORICAL
83 algorithm = training.Batch(nClasses)
84 algorithm.parameter.nTrees = nTrees
85 algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
86 algorithm.parameter.featuresPerNode = nFeatures
87 algorithm.parameter.varImportance = decision_forest.training.MDI
88 algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
91 algorithm.input.set(classifier.training.data, trainData)
92 algorithm.input.set(classifier.training.labels, trainGroundTruth)
95 trainingResult = algorithm.compute()
96 model = trainingResult.get(classifier.training.model)
97 printNumericTable(trainingResult.getTable(training.variableImportance),
"Variable importance results: ")
98 printNumericTable(trainingResult.getTable(training.outOfBagError),
"OOB error: ")
101 global testGroundTruth, predictionResult
104 testDataSource = FileDataSource(
106 DataSourceIface.notAllocateNumericTable,
107 DataSourceIface.doDictionaryFromContext
111 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
112 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
113 mergedData = MergedNumericTable(testData, testGroundTruth)
116 testDataSource.loadDataBlock(mergedData)
119 dict = testData.getDictionary()
122 dict[0].featureType = features.DAAL_CONTINUOUS
123 dict[1].featureType = features.DAAL_CONTINUOUS
124 dict[2].featureType = features.DAAL_CATEGORICAL
127 algorithm = prediction.Batch(nClasses)
130 algorithm.input.setTable(classifier.prediction.data, testData)
131 algorithm.input.setModel(classifier.prediction.model, model)
135 predictionResult = algorithm.compute()
139 printNumericTable(predictionResult.get(classifier.prediction.prediction),
"Decision forest prediction results (first 10 rows):",10)
140 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10);
142 if __name__ ==
"__main__":