24 import daal.algorithms.linear_regression
as linear_regression
25 import daal.algorithms.linear_regression.quality_metric_set
as quality_metric_set
26 from daal.algorithms.linear_regression
import training, prediction
27 from daal.algorithms.linear_regression.quality_metric
import single_beta, group_of_betas
28 from daal.data_management
import (
29 DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
30 NumericTableIface, BlockDescriptor, readWrite
33 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
34 if utils_folder
not in sys.path:
35 sys.path.insert(0, utils_folder)
36 from utils
import printNumericTable
38 trainDatasetFileName = os.path.join(
'..',
'data',
'batch',
'linear_regression_train.csv')
41 nDependentVariables = 2
47 trainDependentVariables =
None
49 def trainModel(algorithm):
50 global trainingResult, trainData, trainDependentVariables
53 algorithm.input.set(training.data, trainData)
54 algorithm.input.set(training.dependentVariables, trainDependentVariables)
57 trainingResult = algorithm.compute()
58 printNumericTable(trainingResult.get(training.model).getBeta(),
"Linear Regression coefficients:")
60 def predictResults(trainData):
62 algorithm = prediction.Batch()
65 algorithm.input.setTable(prediction.data, trainData)
66 algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
69 predictionResult = algorithm.compute()
70 return predictionResult.get(prediction.prediction)
72 def predictReducedModelResults(trainData):
73 model = trainingResult.get(training.model)
75 betas = model.getBeta()
76 nBetas = model.getNumberOfBetas()
80 savedBeta = [[
None] * nBetas
for _
in range(nDependentVariables)]
82 block = BlockDescriptor()
83 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
84 pBeta = block.getArray()
86 for i
in range(0, nDependentVariables):
87 savedBeta[i][j1] = pBeta[i][j1]
88 savedBeta[i][j2] = pBeta[i][j2]
91 betas.releaseBlockOfRows(block)
93 predictedResults = predictResults(trainData)
95 block = BlockDescriptor()
96 betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
97 pBeta = block.getArray()
99 for i
in range(0, nDependentVariables):
100 pBeta[i][j1] = savedBeta[i][j1]
101 pBeta[i][j2] = savedBeta[i][j2]
102 betas.releaseBlockOfRows(block)
103 return predictedResults
105 def testModelQuality():
106 global trainingResult, qmsResult
108 predictedResults = predictResults(trainData)
109 printNumericTable(trainDependentVariables,
"Expected responses (first 20 rows):", 20)
110 printNumericTable(predictedResults,
"Predicted responses (first 20 rows):", 20)
112 model = trainingResult.get(linear_regression.training.model)
113 predictedReducedModelResults = predictReducedModelResults(trainData)
114 printNumericTable(predictedReducedModelResults,
"Responses predicted with reduced model (first 20 rows):", 20)
117 nBetaReducedModel = model.getNumberOfBetas() - 2
118 qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
119 singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
120 singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
121 singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
122 singleBeta.setModelInput(single_beta.model, model)
125 groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
126 groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
127 groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
128 groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
131 qualityMetricSet.compute()
134 qmsResult = qualityMetricSet.getResultCollection()
138 print (
"Quality metrics for a single beta")
139 result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
140 printNumericTable(result.getResult(single_beta.rms),
"Root means square errors for each response (dependent variable):")
141 printNumericTable(result.getResult(single_beta.variance),
"Variance for each response (dependent variable):")
142 printNumericTable(result.getResult(single_beta.zScore),
"Z-score statistics:")
143 printNumericTable(result.getResult(single_beta.confidenceIntervals),
"Confidence intervals for each beta coefficient:")
144 printNumericTable(result.getResult(single_beta.inverseOfXtX),
"Inverse(Xt * X) matrix:")
146 coll = result.getResultDataCollection(single_beta.betaCovariances)
147 for i
in range(0, coll.size()):
148 message =
"Variance-covariance matrix for betas of " + str(i) +
"-th response\n"
149 betaCov = result.get(single_beta.betaCovariances, i)
150 printNumericTable(betaCov, message)
153 print (
"Quality metrics for a group of betas")
154 result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
156 printNumericTable(result.get(group_of_betas.expectedMeans),
"Means of expected responses for each dependent variable:", 0, 0, 20)
157 printNumericTable(result.get(group_of_betas.expectedVariance),
"Variance of expected responses for each dependent variable:", 0, 0, 20)
158 printNumericTable(result.get(group_of_betas.regSS),
"Regression sum of squares of expected responses:", 0, 0, 20)
159 printNumericTable(result.get(group_of_betas.resSS),
"Sum of squares of residuals for each dependent variable:", 0, 0, 20)
160 printNumericTable(result.get(group_of_betas.tSS),
"Total sum of squares for each dependent variable:", 0, 0, 20)
161 printNumericTable(result.get(group_of_betas.determinationCoeff),
"Determination coefficient for each dependent variable:", 0, 0, 20)
162 printNumericTable(result.get(group_of_betas.fStatistics),
"F-statistics for each dependent variable:", 0, 0, 20)
164 if __name__ ==
"__main__":
167 dataSource = FileDataSource(trainDatasetFileName,
168 DataSourceIface.notAllocateNumericTable,
169 DataSourceIface.doDictionaryFromContext)
172 trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
173 trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
174 mergedData = MergedNumericTable(trainData, trainDependentVariables)
177 dataSource.loadDataBlock(mergedData)
179 for i
in range(0, 2):
181 print (
"Train model with normal equation algorithm.")
182 algorithm = training.Batch()
183 trainModel(algorithm)
185 print (
"Train model with QR algorithm.")
186 algorithm = training.Batch(method=training.qrDense)
187 trainModel(algorithm)