24 from daal.algorithms
import gbt
25 from daal.algorithms.gbt.classification
import prediction, training
26 from daal.algorithms
import classifier
27 from daal.data_management
import (
28 FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
29 MergedNumericTable, features
32 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
33 if utils_folder
not in sys.path:
34 sys.path.insert(0, utils_folder)
35 from utils
import printNumericTable, printNumericTables
37 DAAL_PREFIX = os.path.join(
'..',
'data')
40 trainDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_train.csv')
41 testDatasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'df_classification_test.csv')
48 minObservationsInLeafNode = 8
52 predictionResult =
None
53 testGroundTruth =
None
60 trainDataSource = FileDataSource(
62 DataSourceIface.notAllocateNumericTable,
63 DataSourceIface.doDictionaryFromContext
67 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
68 trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
69 mergedData = MergedNumericTable(trainData, trainGroundTruth)
72 trainDataSource.loadDataBlock(mergedData)
75 dict = trainData.getDictionary()
78 dict[0].featureType = features.DAAL_CONTINUOUS
79 dict[1].featureType = features.DAAL_CONTINUOUS
80 dict[2].featureType = features.DAAL_CATEGORICAL
83 algorithm = training.Batch(nClasses)
84 algorithm.parameter().maxIterations = maxIterations
85 algorithm.parameter().minObservationsInLeafNode = minObservationsInLeafNode
86 algorithm.parameter().featuresPerNode = nFeatures
89 algorithm.input.set(classifier.training.data, trainData)
90 algorithm.input.set(classifier.training.labels, trainGroundTruth)
93 trainingResult = algorithm.compute()
94 model = trainingResult.get(classifier.training.model)
97 global testGroundTruth, predictionResult
100 testDataSource = FileDataSource(
102 DataSourceIface.notAllocateNumericTable,
103 DataSourceIface.doDictionaryFromContext
107 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
108 testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
109 mergedData = MergedNumericTable(testData, testGroundTruth)
112 testDataSource.loadDataBlock(mergedData)
115 dict = testData.getDictionary()
118 dict[0].featureType = features.DAAL_CONTINUOUS
119 dict[1].featureType = features.DAAL_CONTINUOUS
120 dict[2].featureType = features.DAAL_CATEGORICAL
123 algorithm = prediction.Batch(nClasses)
126 algorithm.input.setTable(classifier.prediction.data, testData)
127 algorithm.input.setModel(classifier.prediction.model, model)
131 predictionResult = algorithm.compute()
136 printNumericTable(predictionResult.get(classifier.prediction.prediction),
"Gragient boosted trees prediction results (first 10 rows):",10)
137 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):",10)
139 if __name__ ==
"__main__":