Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

compression_batch.py

1 # file: compression_batch.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 #
19 # ! Content:
20 # ! Python example of compression in the batch processing mode
21 # !
22 # !*****************************************************************************
23 
24 #
25 
26 
27 #
28 
29 import os
30 import sys
31 
32 import numpy as np
33 
34 from daal.data_management import Compressor_Zlib, Decompressor_Zlib, level9, DecompressionStream, CompressionStream
35 
36 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
37 if utils_folder not in sys.path:
38  sys.path.insert(0, utils_folder)
39 from utils import getCRC32, readTextFile
40 
41 DATA_PREFIX = os.path.join('..', 'data', 'batch')
42 datasetFileName = os.path.join(DATA_PREFIX, 'logitboost_train.csv')
43 
44 
45 def printCRC32(rawData, deCompressedData):
46 
47  # Compute checksums for raw data and the decompressed data
48  crcRawData = getCRC32(rawData)
49  crcDecompressedData = getCRC32(deCompressedData)
50 
51  print("\nCompression example program results:\n")
52 
53  print("Raw data checksum: 0x{:02X}".format(crcRawData))
54  print("Decompressed data checksum: 0x{:02X}".format(crcDecompressedData))
55 
56  if rawData.size != deCompressedData.size:
57  print("ERROR: Decompressed data size mismatches with the raw data size")
58 
59  elif crcRawData != crcDecompressedData:
60  print("ERROR: Decompressed data CRC mismatches with the raw data CRC")
61 
62  else:
63  print("OK: Decompressed data CRC matches with the raw data CRC")
64 
65 
66 if __name__ == "__main__":
67  # Read data from a file
68  rawData = readTextFile(datasetFileName)
69 
70  # Create a compressor
71  compressor = Compressor_Zlib()
72  compressor.parameter.gzHeader = True
73  compressor.parameter.level = level9
74 
75  # Create a stream for compression
76  comprStream = CompressionStream(compressor)
77 
78  # Write raw data to the compression stream and compress if needed
79  comprStream.push_back(rawData)
80 
81  # Allocate memory to store the compressed data
82  compressedData = np.empty(comprStream.getCompressedDataSize(), dtype=np.uint8)
83 
84  # Store the compressed data
85  comprStream.copyCompressedArray(compressedData)
86 
87  # Create a decompressor
88  decompressor = Decompressor_Zlib()
89  decompressor.parameter.gzHeader = True
90 
91  # Create a stream for decompression
92  deComprStream = DecompressionStream(decompressor)
93 
94  # Write the compressed data to the decompression stream and decompress it
95  deComprStream.push_back(compressedData)
96 
97  # Allocate memory to store the decompressed data
98  deCompressedData = np.empty(deComprStream.getDecompressedDataSize(), dtype=np.uint8)
99 
100  # Store the decompressed data
101  deComprStream.copyDecompressedArray(deCompressedData)
102 
103  # Compute and print checksums for raw data and the decompressed data
104  printCRC32(rawData, deCompressedData)

For more complete information about compiler optimizations, see our Optimization Notice.