custom_csv_feature_modifiers.cpp

/* file: custom_csv_feature_modifiers.cpp */
/*******************************************************************************
* Copyright 2014-2019 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
!  Content:
!    C++ example of modifiers usage with file data source
!******************************************************************************/

#include <cassert>
#include <algorithm>

#include "daal.h"
#include "service.h"

using namespace daal::data_management;

class MySquaringModifier : public modifiers::csv::FeatureModifier
{
public:
    /* This method is called for every row in CSV file */
    virtual void apply(modifiers::csv::Context &context)
    {
        const size_t numberOfTokens = context.getNumberOfTokens();
        daal::services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();

        /* By default number of tokens (token is one word separated by commas) is equals to the
         * buffer size. This behavior can be redefined by calling 'setNumberOfOutputFeatures' on
         * initialization stage of the modifier (see 'MyMaxFeatureModifier') */
        assert(numberOfTokens == outputBuffer.size());

        for (size_t i = 0; i < numberOfTokens; i++)
        {
            const float x = context.getTokenAs<float>(i);
            outputBuffer[i] = x * x;
        }
    }
};

class MyMaxFeatureModifier : public modifiers::csv::FeatureModifier
{
public:
    /* This method is called once before CSV parsing */
    virtual void initialize(modifiers::csv::Config &config)
    {
        /* Set number of output features for the modifier. We assume modifier
         * computes function y = max { x_1, ..., x_n }, where x_i is input
         * features and y is output feature, so there is single output feature  */
        config.setNumberOfOutputFeatures(1);
    }

    /* This method is called for every row in CSV file */
    virtual void apply(modifiers::csv::Context &context)
    {
        const size_t numberOfTokens = context.getNumberOfTokens();

        /* Iterate throughout tokens, parse every token as float and compute max value  */
        float maxFeature = context.getTokenAs<float>(0);
        for (size_t i = 1; i < numberOfTokens; i++)
        {
            maxFeature = std::max(maxFeature, context.getTokenAs<float>(i));
        }

        /* Write max value to the output buffer, buffer size is equal to the
         * number of output features that specified in 'initialize' method */
        context.getOutputBuffer()[0] = maxFeature;
    }
};

int main(int argc, char *argv[])
{
    /* Path to the CSV to be read */
    const std::string csvFileName = "../data/batch/mixed_text_and_numbers.csv";

    checkArguments(argc, argv, 1, &csvFileName);

    /* Define options for CSV data source */
    const CsvDataSourceOptions csvOptions = CsvDataSourceOptions::allocateNumericTable |
                                            CsvDataSourceOptions::createDictionaryFromContext |
                                            CsvDataSourceOptions::parseHeader;

    /* Define CSV file data source */
    FileDataSource<CSVFeatureManager> ds(csvFileName, csvOptions);

    /* Configure format of output numeric table by applying modifiers.
     * Output numeric table will have the following format:
     * | Numeric1 | Numeric2 ^ 2 | Numeric5 ^ 2 | max(Numeric0, Numeric5) | */
    ds.getFeatureManager()
        .addModifier( features::list("Numeric1"), modifiers::csv::continuous() )
        .addModifier( features::list("Numeric2", "Numeric5"), modifiers::csv::custom<MySquaringModifier>() )
        .addModifier( features::list("Numeric0", "Numeric5"), modifiers::csv::custom<MyMaxFeatureModifier>() );

    /* Load and parse CSV file */
    ds.loadDataBlock();

    printNumericTable(ds.getNumericTable(), "Loaded numeric table:");

    return 0;
}
For more complete information about compiler optimizations, see our Optimization Notice.
Select sticky button color: 
Orange (only for download buttons)