35 from daal
import step1Local, step2Master
36 from daal.algorithms.ridge_regression
import training, prediction
37 from daal.data_management
import DataSource, FileDataSource, NumericTable, HomogenNumericTable, MergedNumericTable
39 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
40 if utils_folder
not in sys.path:
41 sys.path.insert(0, utils_folder)
42 from utils
import printNumericTable
44 trainDatasetFileNames = [
45 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_1.csv"),
46 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_2.csv"),
47 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_3.csv"),
48 os.path.join(
"..",
"data",
"distributed",
"linear_regression_train_4.csv"),
52 testDatasetFileName = os.path.join(
"..",
"data",
"distributed",
"linear_regression_test.csv")
56 nDependentVariables = 2
61 masterAlgorithm = training.Distributed(step=step2Master)
63 for i
in range(nBlocks):
65 trainDataSource = FileDataSource(trainDatasetFileNames[i],
66 DataSource.notAllocateNumericTable,
67 DataSource.doDictionaryFromContext)
70 trainData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
71 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
72 mergedData = MergedNumericTable(trainData, trainDependentVariables)
75 trainDataSource.loadDataBlock(mergedData)
78 localAlgorithm = training.Distributed(step=step1Local)
81 localAlgorithm.input.set(training.data, trainData)
82 localAlgorithm.input.set(training.dependentVariables, trainDependentVariables)
85 presult = localAlgorithm.compute()
88 masterAlgorithm.input.add(training.partialModels, presult)
92 masterAlgorithm.compute()
95 trainingResult = masterAlgorithm.finalizeCompute()
97 printNumericTable(trainingResult.get(training.model).getBeta(),
"Ridge Regression coefficients:")
101 def testModel(trainingResult):
103 testDataSource = FileDataSource(testDatasetFileName,
104 DataSource.doAllocateNumericTable,
105 DataSource.doDictionaryFromContext)
108 testData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
109 testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
110 mergedData = MergedNumericTable(testData, testGroundTruth)
113 testDataSource.loadDataBlock(mergedData)
116 algorithm = prediction.Batch()
119 algorithm.input.setTable(prediction.data, testData)
120 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
123 predictionResult = algorithm.compute()
125 printNumericTable(predictionResult.get(prediction.prediction),
"Ridge Regression prediction results: (first 10 rows):", 10)
126 printNumericTable(testGroundTruth,
"Ground truth (first 10 rows):", 10)
129 if __name__ ==
"__main__":
130 trainingResult = trainModel()
131 testModel(trainingResult)