Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

neural_net_dense_distr.py

1 # file: neural_net_dense_distr.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 #
19 # ! Content:
20 # ! Python example of neural network training and scoring in the distributed processing mode
21 # !*****************************************************************************
22 
23 #
24 
25 
26 #
27 
28 import os
29 import sys
30 
31 import numpy as np
32 
33 from daal import step1Local, step2Master
34 from daal.algorithms.neural_networks import initializers
35 from daal.algorithms.neural_networks import layers
36 from daal.algorithms import optimization_solver
37 from daal.algorithms.neural_networks import prediction, training
38 from daal.data_management import NumericTable, HomogenNumericTable, readOnly, SubtensorDescriptor, HomogenTensor
39 
40 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
41 if utils_folder not in sys.path:
42  sys.path.insert(0, utils_folder)
43 from utils import printTensors, readTensorFromCSV
44 
45 # Input data set parameters
46 trainDatasetFileNames = [
47  os.path.join("..", "data", "distributed", "neural_network_train_dense_1.csv"),
48  os.path.join("..", "data", "distributed", "neural_network_train_dense_2.csv"),
49  os.path.join("..", "data", "distributed", "neural_network_train_dense_3.csv"),
50  os.path.join("..", "data", "distributed", "neural_network_train_dense_4.csv")
51 ]
52 trainGroundTruthFileNames = [
53  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_1.csv"),
54  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_2.csv"),
55  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_3.csv"),
56  os.path.join("..", "data", "distributed", "neural_network_train_ground_truth_4.csv")
57 ]
58 
59 testDatasetFile = os.path.join("..", "data", "batch", "neural_network_test.csv")
60 testGroundTruthFile = os.path.join("..", "data", "batch", "neural_network_test_ground_truth.csv")
61 
62 nNodes = 4
63 batchSize = 100
64 batchSizeLocal = int(batchSize / nNodes)
65 
66 
67 def configureNet():
68  m2 = 40
69  # Create layers of the neural network
70  # Create fully-connected layer and initialize layer parameters
71  fullyConnectedLayer1 = layers.fullyconnected.Batch(20)
72  fullyConnectedLayer1.parameter.weightsInitializer = initializers.uniform.Batch(-0.001, 0.001)
73  fullyConnectedLayer1.parameter.biasesInitializer = initializers.uniform.Batch(0, 0.5)
74 
75  # Create fully-connected layer and initialize layer parameters
76  fullyConnectedLayer2 = layers.fullyconnected.Batch(m2)
77  fullyConnectedLayer2.parameter.weightsInitializer = initializers.uniform.Batch(0.5, 1)
78  fullyConnectedLayer2.parameter.biasesInitializer = initializers.uniform.Batch(0.5, 1)
79 
80  # Create fully-connected layer and initialize layer parameters
81  fullyConnectedLayer3 = layers.fullyconnected.Batch(2)
82  fullyConnectedLayer3.parameter.weightsInitializer = initializers.uniform.Batch(-0.005, 0.005)
83  fullyConnectedLayer3.parameter.biasesInitializer = initializers.uniform.Batch(0, 1)
84 
85  # Create softmax layer and initialize layer parameters
86  softmaxCrossEntropyLayer = layers.loss.softmax_cross.Batch()
87 
88  # Create topology of the neural network
89  topology = training.Topology()
90 
91  # Add layers to the topology of the neural network
92  fc1 = topology.add(fullyConnectedLayer1)
93  fc2 = topology.add(fullyConnectedLayer2)
94  fc3 = topology.add(fullyConnectedLayer3)
95  sm = topology.add(softmaxCrossEntropyLayer)
96  topology.get(fc1).addNext(fc2)
97  topology.get(fc2).addNext(fc3)
98  topology.get(fc3).addNext(sm)
99 
100  return topology
101 
102 
103 def getNextSubtensor(inputTensor, startPos, nElements):
104  dims = inputTensor.getDimensions()
105  dims[0] = nElements
106 
107  subtensorBlock = SubtensorDescriptor(ntype=np.float32)
108  inputTensor.getSubtensor([], startPos, nElements, readOnly, subtensorBlock)
109  subtensorData = np.array(subtensorBlock.getArray(), dtype=np.float32)
110  inputTensor.releaseSubtensor(subtensorBlock)
111 
112  return HomogenTensor(subtensorData, ntype=np.float32)
113 
114 
115 def initializeNetwork():
116  trainingData = [None] * nNodes
117  trainingGroundTruth = [None] * nNodes
118  # Read training data set from a .csv file and create tensors to store input data
119  for node in range(nNodes):
120  trainingData[node] = readTensorFromCSV(trainDatasetFileNames[node])
121  trainingGroundTruth[node] = readTensorFromCSV(trainGroundTruthFileNames[node], True)
122 
123  sampleSize = trainingData[0].getDimensions()
124  sampleSize[0] = batchSizeLocal
125 
126  # Create stochastic gradient descent (SGD) optimization solver algorithm
127  sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
128  sgdAlgorithm.parameter.batchSize = batchSizeLocal
129 
130  # Configure the neural network
131  topologyMaster = configureNet()
132  net = training.Distributed(step2Master, sgdAlgorithm)
133  net.parameter.batchSize = batchSizeLocal
134 
135  # Initialize the neural network on master node
136  net.initialize(sampleSize, topologyMaster)
137 
138  topology = [None] * nNodes
139  netLocal = [None] * nNodes
140  for node in range(nNodes):
141  # Configure the neural network
142  topology[node] = configureNet()
143 
144  # Pass a model from master node to the algorithms on local nodes
145  trainingModel = training.Model()
146  trainingModel.initialize_Float32(sampleSize, topology[node])
147 
148  netLocal[node] = training.Distributed(step1Local)
149  netLocal[node].input.setStep1LocalInput(training.inputModel, trainingModel)
150 
151  # Set the batch size for the neural network training
152  netLocal[node].parameter.batchSize = batchSizeLocal
153 
154  return (net, netLocal, trainingData, trainingGroundTruth)
155 
156 
157 def trainModel(net, netLocal, trainingData, trainingGroundTruth):
158  # Create stochastic gradient descent (SGD) optimization solver algorithm
159  sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
160 
161  # Set learning rate for the optimization solver used in the neural network
162  learningRate = 0.001
163  sgdAlgorithm.parameter.learningRateSequence = HomogenNumericTable(1, 1, NumericTable.doAllocate, learningRate)
164 
165  # Set the optimization solver for the neural network training
166  net.parameter.optimizationSolver = sgdAlgorithm
167 
168  # Run the neural network training
169  nSamples = trainingData[0].getDimensions()[0]
170  for i in range(0, nSamples - batchSizeLocal + 1, batchSizeLocal):
171  # Compute weights and biases for the batch of inputs on local nodes
172  for node in range(nNodes):
173  # Pass a training data set and dependent values to the algorithm
174  netLocal[node].input.setInput(training.data, getNextSubtensor(trainingData[node], i, batchSizeLocal))
175  netLocal[node].input.setInput(training.groundTruth, getNextSubtensor(trainingGroundTruth[node], i, batchSizeLocal))
176 
177  # Compute weights and biases on local node
178  pres = netLocal[node].compute()
179 
180  # Pass computed weights and biases to the master algorithm
181  net.input.add(training.partialResults, node, pres)
182 
183  # Update weights and biases on master node
184  net.compute()
185  wb = net.getPartialResult().get(training.resultFromMaster).get(training.model).getWeightsAndBiases()
186 
187  # Update weights and biases on local nodes
188  for node in range(nNodes):
189  netLocal[node].input.getStep1LocalInput(training.inputModel).setWeightsAndBiases(wb)
190 
191  # Finalize neural network training on the master node
192  res = net.finalizeCompute()
193 
194  # Retrieve training and prediction models of the neural network
195  return res.get(training.model).getPredictionModel_Float32()
196 
197 
198 def testModel(predictionModel):
199  # Read testing data set from a .csv file and create a tensor to store input data
200  predictionData = readTensorFromCSV(testDatasetFile)
201 
202  # Create an algorithm to compute the neural network predictions
203  net = prediction.Batch()
204 
205  # Set the batch size for the neural network prediction
206  net.parameter.batchSize = predictionData.getDimensionSize(0)
207 
208  # Set input objects for the prediction neural network
209  net.input.setModelInput(prediction.model, predictionModel)
210  net.input.setTensorInput(prediction.data, predictionData)
211 
212  # Run the neural network prediction and return result
213  return net.compute()
214 
215 
216 def printResults(testGroundTruthFile, predictionResult):
217  # Read testing ground truth from a .csv file and create a tensor to store the data
218  predictionGroundTruth = readTensorFromCSV(testGroundTruthFile)
219 
220  printTensors(predictionGroundTruth, predictionResult.getResult(prediction.prediction),
221  "Ground truth", "Neural network predictions: each class probability",
222  "Neural network classification results (first 20 observations):", 20)
223 
224 
225 def main():
226  init = initializeNetwork()
227  predictionModel = trainModel(*init)
228  predictionResult = testModel(predictionModel)
229  printResults(testGroundTruthFile, predictionResult)
230 
231 
232 if __name__ == "__main__":
233  main()

For more complete information about compiler optimizations, see our Optimization Notice.