24 from daal
import step1Local, step2Master
25 from daal.algorithms.linear_regression
import training, prediction
26 from daal.data_management
import (
27 DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,NumericTableIface
30 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
31 if utils_folder
not in sys.path:
32 sys.path.insert(0, utils_folder)
33 from utils
import printNumericTable
35 DAAL_PREFIX = os.path.join(
'..',
'data')
37 trainDatasetFileNames = [
38 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_1.csv'),
39 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_2.csv'),
40 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_3.csv'),
41 os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_train_4.csv')
44 testDatasetFileName = os.path.join(DAAL_PREFIX,
'distributed',
'linear_regression_test.csv')
49 nDependentVariables = 2
52 predictionResult =
None
59 masterAlgorithm = training.Distributed(step2Master)
61 for i
in range(nBlocks):
63 trainDataSource = FileDataSource(
64 trainDatasetFileNames[i], DataSourceIface.notAllocateNumericTable,
65 DataSourceIface.doDictionaryFromContext
69 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
70 trainDependentVariables = HomogenNumericTable(
71 nDependentVariables, 0, NumericTableIface.doNotAllocate
73 mergedData = MergedNumericTable(trainData, trainDependentVariables)
76 trainDataSource.loadDataBlock(mergedData)
79 localAlgorithm = training.Distributed(step1Local)
82 localAlgorithm.input.set(training.data, trainData)
83 localAlgorithm.input.set(training.dependentVariables, trainDependentVariables)
87 masterAlgorithm.input.add(training.partialModels, localAlgorithm.compute())
90 masterAlgorithm.compute()
93 trainingResult = masterAlgorithm.finalizeCompute()
94 printNumericTable(trainingResult.get(training.model).getBeta(),
"Linear Regression coefficients:")
98 global trainingResult, predictionResult
101 testDataSource = FileDataSource(
102 testDatasetFileName, DataSourceIface.doAllocateNumericTable,
103 DataSourceIface.doDictionaryFromContext
107 testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
108 testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
109 mergedData = MergedNumericTable(testData, testGroundTruth)
112 testDataSource.loadDataBlock(mergedData)
115 algorithm = prediction.Batch()
118 algorithm.input.setTable(prediction.data, testData)
119 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
122 predictionResult = algorithm.compute()
123 printNumericTable(predictionResult.get(prediction.prediction),
"Linear Regression prediction results: (first 10 rows):", 10)
124 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10)
126 if __name__ ==
"__main__":