Python* API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

custom_csv_feature_modifiers.py

1 # file: custom_csv_feature_modifiers.py
2 #===============================================================================
3 # Copyright 2014-2020 Intel Corporation
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 #===============================================================================
17 
18 # ! Content:
19 # ! Python example of modifiers usage with file data source
20 # !*****************************************************************************
21 
22 #
23 
24 
25 #
26 
27 from daal.data_management import FileDataSource, CsvDataSourceOptions, modifiers
28 from daal.data_management.modifiers.csv import FeatureModifier
29 
30 import os, sys
31 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
32 if utils_folder not in sys.path:
33  sys.path.insert(0, utils_folder)
34 from utils import printNumericTable
35 
36 # User-defined feature modifier that computes a square for every feature
37 class MySquaringModifier(FeatureModifier):
38  def apply(self, tokens):
39  return [[float(x)*float(x) for x in t] for t in tokens]
40 
41 
42 # User-defined feature modifier that selects max element among all features
43 class MyMaxFeatureModifier(FeatureModifier):
44  def __init__(self):
45  super(MyMaxFeatureModifier, self).__init__(1,4)
46 
47  # This method is called for every row in CSV file
48  def apply(self, tokens):
49  return [[float(max(t))] for t in tokens]
50 
51 
52 if __name__ == "__main__":
53  # Path to the CSV to be read
54  csvFileName = "../data/batch/mixed_text_and_numbers.csv"
55 
56  # Define options for CSV data source
57  csvOptions = CsvDataSourceOptions(CsvDataSourceOptions.allocateNumericTable | CsvDataSourceOptions.createDictionaryFromContext | CsvDataSourceOptions.parseHeader)
58 
59  # Define CSV file data source
60  ds = FileDataSource(csvFileName, csvOptions)
61 
62  # Configure format of output numeric table by applying modifiers.
63  # Output numeric table will have the following format:
64  # | Numeric1 | Numeric2 ^ 2 | Numeric5 ^ 2 | max(Numeric0, Numeric5) |
65  fm = ds.getFeatureManager()
66  fm.addModifier(["Numeric1"], modifiers.csv.continuous())
67  fm.addModifier(["Numeric2", "Numeric5"], MySquaringModifier())
68  fm.addModifier(["Numeric0", "Numeric5"], MyMaxFeatureModifier())
69 
70  # Load and parse CSV file
71  ds.loadDataBlock()
72  printNumericTable(ds.getNumericTable(), "Loaded numeric table:")

For more complete information about compiler optimizations, see our Optimization Notice.