Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

svm_two_class_metrics_dense_batch.py

1 # file: svm_two_class_metrics_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 #
19 # ! Content:
20 # ! Python example of two-class support vector machine (SVM) quality metrics
21 # !
22 # !*****************************************************************************
23 
24 #
25 
26 
27 #
28 
29 import os
30 import sys
31 
32 from daal.algorithms import kernel_function
33 from daal.algorithms.classifier.quality_metric import binary_confusion_matrix
34 from daal.algorithms import svm
35 from daal.algorithms import classifier
36 from daal.data_management import (
37  DataSourceIface, FileDataSource, readOnly, BlockDescriptor,
38  HomogenNumericTable, NumericTableIface, MergedNumericTable
39 )
40 
41 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
42 if utils_folder not in sys.path:
43  sys.path.insert(0, utils_folder)
44 from utils import printNumericTables, printNumericTable
45 
46 # Input data set parameters
47 DATA_PREFIX = os.path.join('..', 'data', 'batch')
48 trainDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_train_dense.csv')
49 testDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_test_dense.csv')
50 
51 nFeatures = 20
52 
53 # Parameters for the SVM kernel function
54 kernel = kernel_function.linear.Batch()
55 
56 # Model object for the SVM algorithm
57 trainingResult = None
58 predictionResult = None
59 qualityMetricSetResult = None
60 
61 predictedLabels = None
62 groundTruthLabels = None
63 
64 
65 def trainModel():
66  global trainingResult
67 
68  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
69  trainDataSource = FileDataSource(
70  trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
71  DataSourceIface.doDictionaryFromContext
72  )
73 
74  # Create Numeric Tables for training data and labels
75  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
76  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
77  mergedData = MergedNumericTable(trainData, trainGroundTruth)
78 
79  # Retrieve the data from the input file
80  trainDataSource.loadDataBlock(mergedData)
81 
82  # Create an algorithm object to train the SVM model
83  algorithm = svm.training.Batch()
84 
85  algorithm.parameter.kernel = kernel
86  algorithm.parameter.cacheSize = 600000000
87 
88  # Pass a training data set and dependent values to the algorithm
89  algorithm.input.set(classifier.training.data, trainData)
90  algorithm.input.set(classifier.training.labels, trainGroundTruth)
91 
92  # Build the SVM model and get the algorithm results
93  trainingResult = algorithm.compute()
94 
95 def testModel():
96  global predictionResult, groundTruthLabels
97 
98  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
99  testDataSource = FileDataSource(
100  testDatasetFileName, DataSourceIface.doAllocateNumericTable,
101  DataSourceIface.doDictionaryFromContext
102  )
103 
104  # Create Numeric Tables for testing data and labels
105  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
106  groundTruthLabels = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
107  mergedData = MergedNumericTable(testData, groundTruthLabels)
108 
109  # Retrieve the data from input file
110  testDataSource.loadDataBlock(mergedData)
111 
112  # Create an algorithm object to predict SVM values
113  algorithm = svm.prediction.Batch()
114 
115  algorithm.parameter.kernel = kernel
116 
117  # Pass a testing data set and the trained model to the algorithm
118  algorithm.input.setTable(classifier.prediction.data, testData)
119  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
120 
121  # Predict SVM values
122  # returns Result class from daal.algorithms.classifier.prediction
123  predictionResult = algorithm.compute()
124 
125 
126 def testModelQuality():
127  global predictedLabels, qualityMetricSetResult, groundTruthLabels
128 
129  # Retrieve predicted labels
130  predictedLabels = predictionResult.get(classifier.prediction.prediction)
131 
132  # Create a quality metric set object to compute quality metrics of the SVM algorithm
133  qualityMetricSet = svm.quality_metric_set.Batch()
134 
135  input = qualityMetricSet.getInputDataCollection().getInput(svm.quality_metric_set.confusionMatrix)
136 
137  input.set(binary_confusion_matrix.predictedLabels, predictedLabels)
138  input.set(binary_confusion_matrix.groundTruthLabels, groundTruthLabels)
139 
140  # Compute quality metrics and get the quality metrics
141  # returns ResultCollection class from svm.quality_metric_set
142  qualityMetricSetResult = qualityMetricSet.compute()
143 
144 
145 def printResults():
146 
147  # Print the classification results
148  printNumericTables(
149  groundTruthLabels, predictedLabels,
150  "Ground truth", "Classification results",
151  "SVM classification results (first 20 observations):", 20, interval=15, flt64=False
152  )
153 
154  # Print the quality metrics
155  qualityMetricResult = qualityMetricSetResult.getResult(svm.quality_metric_set.confusionMatrix)
156  printNumericTable(qualityMetricResult.get(binary_confusion_matrix.confusionMatrix), "Confusion matrix:")
157 
158  block = BlockDescriptor()
159  qualityMetricsTable = qualityMetricResult.get(binary_confusion_matrix.binaryMetrics)
160  qualityMetricsTable.getBlockOfRows(0, 1, readOnly, block)
161  qualityMetricsData = block.getArray().flatten()
162  print("Accuracy: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.accuracy]))
163  print("Precision: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.precision]))
164  print("Recall: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.recall]))
165  print("F-score: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.fscore]))
166  print("Specificity: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.specificity]))
167  print("AUC: {0:.3f}".format(qualityMetricsData[binary_confusion_matrix.AUC]))
168  qualityMetricsTable.releaseBlockOfRows(block)
169 
170 if __name__ == "__main__":
171  trainModel()
172  testModel()
173  testModelQuality()
174  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.