Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

lin_reg_metrics_dense_batch.py

1 # file: lin_reg_metrics_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 
19 
20 
21 import os
22 import sys
23 
24 import daal.algorithms.linear_regression as linear_regression
25 import daal.algorithms.linear_regression.quality_metric_set as quality_metric_set
26 from daal.algorithms.linear_regression import training, prediction
27 from daal.algorithms.linear_regression.quality_metric import single_beta, group_of_betas
28 from daal.data_management import (
29  DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable,
30  NumericTableIface, BlockDescriptor, readWrite
31 )
32 
33 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
34 if utils_folder not in sys.path:
35  sys.path.insert(0, utils_folder)
36 from utils import printNumericTable
37 
38 trainDatasetFileName = os.path.join('..', 'data', 'batch', 'linear_regression_train.csv')
39 
40 nFeatures = 10
41 nDependentVariables = 2
42 
43 trainingResult = None
44 # predictionResult = None
45 qmsResult = None
46 trainData = None
47 trainDependentVariables = None
48 
49 def trainModel(algorithm):
50  global trainingResult, trainData, trainDependentVariables
51 
52  # Pass a training data set and dependent values to the algorithm
53  algorithm.input.set(training.data, trainData)
54  algorithm.input.set(training.dependentVariables, trainDependentVariables)
55 
56  # Build the multiple linear regression model and retrieve the algorithm results
57  trainingResult = algorithm.compute()
58  printNumericTable(trainingResult.get(training.model).getBeta(), "Linear Regression coefficients:")
59 
60 def predictResults(trainData):
61  # Create an algorithm object to predict values of multiple linear regression
62  algorithm = prediction.Batch()
63 
64  # Pass a testing data set and the trained model to the algorithm
65  algorithm.input.setTable(prediction.data, trainData)
66  algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
67 
68  # Predict values of multiple linear regression and retrieve the algorithm results
69  predictionResult = algorithm.compute()
70  return predictionResult.get(prediction.prediction)
71 
72 def predictReducedModelResults(trainData):
73  model = trainingResult.get(training.model)
74 
75  betas = model.getBeta()
76  nBetas = model.getNumberOfBetas()
77 
78  j1 = 2
79  j2 = 10
80  savedBeta = [[None] * nBetas for _ in range(nDependentVariables)]
81 
82  block = BlockDescriptor()
83  betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
84  pBeta = block.getArray()
85 
86  for i in range(0, nDependentVariables):
87  savedBeta[i][j1] = pBeta[i][j1]
88  savedBeta[i][j2] = pBeta[i][j2]
89  pBeta[i][j1] = 0
90  pBeta[i][j2] = 0
91  betas.releaseBlockOfRows(block)
92 
93  predictedResults = predictResults(trainData)
94 
95  block = BlockDescriptor()
96  betas.getBlockOfRows(0, nDependentVariables, readWrite, block)
97  pBeta = block.getArray()
98 
99  for i in range(0, nDependentVariables):
100  pBeta[i][j1] = savedBeta[i][j1]
101  pBeta[i][j2] = savedBeta[i][j2]
102  betas.releaseBlockOfRows(block)
103  return predictedResults
104 
105 def testModelQuality():
106  global trainingResult, qmsResult
107 
108  predictedResults = predictResults(trainData)
109  printNumericTable(trainDependentVariables, "Expected responses (first 20 rows):", 20)
110  printNumericTable(predictedResults, "Predicted responses (first 20 rows):", 20)
111 
112  model = trainingResult.get(linear_regression.training.model)
113  predictedReducedModelResults = predictReducedModelResults(trainData)
114  printNumericTable(predictedReducedModelResults, "Responses predicted with reduced model (first 20 rows):", 20)
115 
116  # Create a quality metric set object to compute quality metrics of the linear regression algorithm
117  nBetaReducedModel = model.getNumberOfBetas() - 2
118  qualityMetricSet = quality_metric_set.Batch(model.getNumberOfBetas(), nBetaReducedModel)
119  singleBeta = single_beta.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.singleBeta))
120  singleBeta.setDataInput(single_beta.expectedResponses, trainDependentVariables)
121  singleBeta.setDataInput(single_beta.predictedResponses, predictedResults)
122  singleBeta.setModelInput(single_beta.model, model)
123 
124  # Set input for a group of betas metrics algorithm
125  groupOfBetas = group_of_betas.Input.downCast(qualityMetricSet.getInputDataCollection().getInput(quality_metric_set.groupOfBetas))
126  groupOfBetas.set(group_of_betas.expectedResponses, trainDependentVariables)
127  groupOfBetas.set(group_of_betas.predictedResponses, predictedResults)
128  groupOfBetas.set(group_of_betas.predictedReducedModelResponses, predictedReducedModelResults)
129 
130  # Compute quality metrics
131  qualityMetricSet.compute()
132 
133  # Retrieve the quality metrics
134  qmsResult = qualityMetricSet.getResultCollection()
135 
136 def printResults():
137  # Print the quality metrics for a single beta
138  print ("Quality metrics for a single beta")
139  result = single_beta.Result.downCast(qmsResult.getResult(quality_metric_set.singleBeta))
140  printNumericTable(result.getResult(single_beta.rms), "Root means square errors for each response (dependent variable):")
141  printNumericTable(result.getResult(single_beta.variance), "Variance for each response (dependent variable):")
142  printNumericTable(result.getResult(single_beta.zScore), "Z-score statistics:")
143  printNumericTable(result.getResult(single_beta.confidenceIntervals), "Confidence intervals for each beta coefficient:")
144  printNumericTable(result.getResult(single_beta.inverseOfXtX), "Inverse(Xt * X) matrix:")
145 
146  coll = result.getResultDataCollection(single_beta.betaCovariances)
147  for i in range(0, coll.size()):
148  message = "Variance-covariance matrix for betas of " + str(i) + "-th response\n"
149  betaCov = result.get(single_beta.betaCovariances, i)
150  printNumericTable(betaCov, message)
151 
152  # Print quality metrics for a group of betas
153  print ("Quality metrics for a group of betas")
154  result = group_of_betas.Result.downCast(qmsResult.getResult(quality_metric_set.groupOfBetas))
155 
156  printNumericTable(result.get(group_of_betas.expectedMeans), "Means of expected responses for each dependent variable:", 0, 0, 20)
157  printNumericTable(result.get(group_of_betas.expectedVariance), "Variance of expected responses for each dependent variable:", 0, 0, 20)
158  printNumericTable(result.get(group_of_betas.regSS), "Regression sum of squares of expected responses:", 0, 0, 20)
159  printNumericTable(result.get(group_of_betas.resSS), "Sum of squares of residuals for each dependent variable:", 0, 0, 20)
160  printNumericTable(result.get(group_of_betas.tSS), "Total sum of squares for each dependent variable:", 0, 0, 20)
161  printNumericTable(result.get(group_of_betas.determinationCoeff), "Determination coefficient for each dependent variable:", 0, 0, 20)
162  printNumericTable(result.get(group_of_betas.fStatistics), "F-statistics for each dependent variable:", 0, 0, 20)
163 
164 if __name__ == "__main__":
165 
166  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
167  dataSource = FileDataSource(trainDatasetFileName,
168  DataSourceIface.notAllocateNumericTable,
169  DataSourceIface.doDictionaryFromContext)
170 
171  # Create Numeric Tables for data and values for dependent variable
172  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
173  trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
174  mergedData = MergedNumericTable(trainData, trainDependentVariables)
175 
176  # Retrieve the data from the input file
177  dataSource.loadDataBlock(mergedData)
178 
179  for i in range(0, 2):
180  if i == 0:
181  print ("Train model with normal equation algorithm.")
182  algorithm = training.Batch()
183  trainModel(algorithm)
184  else:
185  print ("Train model with QR algorithm.")
186  algorithm = training.Batch(method=training.qrDense)
187  trainModel(algorithm)
188  testModelQuality()
189  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.