Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

svm_two_class_dense_batch.py

1 # file: svm_two_class_dense_batch.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 
19 
20 
21 import os
22 import sys
23 
24 from daal.algorithms.svm import training, prediction
25 from daal.algorithms import kernel_function, classifier
26 from daal.data_management import (
27  DataSourceIface, FileDataSource, HomogenNumericTable, MergedNumericTable, NumericTableIface
28 )
29 
30 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
31 if utils_folder not in sys.path:
32  sys.path.insert(0, utils_folder)
33 from utils import printNumericTables
34 
35 # Input data set parameters
36 DATA_PREFIX = os.path.join('..', 'data', 'batch')
37 
38 trainDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_train_dense.csv')
39 testDatasetFileName = os.path.join(DATA_PREFIX, 'svm_two_class_test_dense.csv')
40 
41 nFeatures = 20
42 
43 # Parameters for the SVM kernel function
44 kernel = kernel_function.linear.Batch()
45 
46 # Model object for the SVM algorithm
47 trainingResult = None
48 predictionResult = None
49 testGroundTruth = None
50 
51 
52 def trainModel():
53  global trainingResult
54 
55  # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
56  trainDataSource = FileDataSource(
57  trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
58  DataSourceIface.doDictionaryFromContext
59  )
60 
61  # Create Numeric Tables for training data and labels
62  trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
63  trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
64  mergedData = MergedNumericTable(trainData, trainGroundTruth)
65 
66  # Retrieve the data from the input file
67  trainDataSource.loadDataBlock(mergedData)
68 
69  # Create an algorithm object to train the SVM model
70  algorithm = training.Batch()
71 
72  algorithm.parameter.kernel = kernel
73  algorithm.parameter.cacheSize = 600000000
74 
75  # Pass a training data set and dependent values to the algorithm
76  algorithm.input.set(classifier.training.data, trainData)
77  algorithm.input.set(classifier.training.labels, trainGroundTruth)
78 
79  # Build the SVM model
80  trainingResult = algorithm.compute()
81 
82 
83 def testModel():
84  global predictionResult, testGroundTruth
85 
86  # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
87  testDataSource = FileDataSource(
88  testDatasetFileName, DataSourceIface.notAllocateNumericTable,
89  DataSourceIface.doDictionaryFromContext
90  )
91 
92  # Create Numeric Tables for testing data and labels
93  testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
94  testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
95  mergedData = MergedNumericTable(testData, testGroundTruth)
96 
97  # Retrieve the data from input file
98  testDataSource.loadDataBlock(mergedData)
99 
100  # Create an algorithm object to predict SVM values
101  algorithm = prediction.Batch()
102 
103  algorithm.parameter.kernel = kernel
104 
105  # Pass a testing data set and the trained model to the algorithm
106  algorithm.input.setTable(classifier.prediction.data, testData)
107  algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
108 
109  # Predict SVM values
110  algorithm.compute()
111 
112  # Retrieve the algorithm results
113  predictionResult = algorithm.getResult()
114 
115 
116 def printResults():
117 
118  printNumericTables(
119  testGroundTruth, predictionResult.get(classifier.prediction.prediction),
120  "Ground truth\t", "Classification results",
121  "SVM classification results (first 20 observations):", 20, flt64=False
122  )
123 
124 if __name__ == "__main__":
125 
126  trainModel()
127  testModel()
128  printResults()

For more complete information about compiler optimizations, see our Optimization Notice.