KMeansDenseDistr.java

/* file: KMeansDenseDistr.java */
/*******************************************************************************
* Copyright 2014-2019 Intel Corporation.
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
 //  Content:
 //     Java example of dense K-Means clustering in the distributed processing mode
 */

package com.intel.daal.examples.kmeans;

import com.intel.daal.algorithms.kmeans.*;
import com.intel.daal.algorithms.kmeans.init.*;
import com.intel.daal.data_management.data.NumericTable;
import com.intel.daal.data_management.data_source.DataSource;
import com.intel.daal.data_management.data_source.FileDataSource;
import com.intel.daal.examples.utils.Service;
import com.intel.daal.services.DaalContext;

class KMeansDenseDistr {
    /* Input data set parameters */
    private static final String[] datasetFileNames = {
        "../data/distributed/kmeans_dense_1.csv", "../data/distributed/kmeans_dense_2.csv",
        "../data/distributed/kmeans_dense_3.csv", "../data/distributed/kmeans_dense_4.csv"};

    private static final int    nClusters       = 20;
    private static final int    nBlocks         = 4;
    private static final int    nIterations     = 5;
    private static final int    nVectorsInBlock = 2500;

    private static DaalContext context = new DaalContext();

    public static void main(String[] args) throws java.io.FileNotFoundException, java.io.IOException {

        /* Get initial centroids */
        InitDistributedStep2Master initMaster = new InitDistributedStep2Master(context, Float.class,
                InitMethod.randomDense, nClusters);

        NumericTable[] data = new NumericTable[nBlocks];

        for (int node = 0; node < nBlocks; node++) {
            /* Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file */
            FileDataSource dataSource = new FileDataSource(context, datasetFileNames[node],
                    DataSource.DictionaryCreationFlag.DoDictionaryFromContext,
                    DataSource.NumericTableAllocationFlag.DoAllocateNumericTable);

            /* Retrieve the input data */
            dataSource.loadDataBlock();

            data[node] = dataSource.getNumericTable();

            /* Create an algorithm object to initialize the K-Means algorithm */
            InitDistributedStep1Local initLocal = new InitDistributedStep1Local(context, Float.class,
                    InitMethod.randomDense, nClusters, nBlocks * nVectorsInBlock, node * nVectorsInBlock);

            /* Set the input data to the algorithm */
            initLocal.input.set(InitInputId.data, data[node]);

            InitPartialResult initPres = initLocal.compute();
            initMaster.input.add(InitDistributedStep2MasterInputId.partialResults, initPres);
        }

        initMaster.compute();
        InitResult initResult = initMaster.finalizeCompute();

        NumericTable centroids = initResult.get(InitResultId.centroids);
        NumericTable[] assignments = new NumericTable[nBlocks];
        NumericTable objectiveFunction = null;

        /* Create an algorithm for K-Means clustering */
        DistributedStep2Master masterAlgorithm = new DistributedStep2Master(context, Float.class, Method.defaultDense,
                nClusters);

        /* Calculate centroids */
        for (int it = 0; it < nIterations; it++) {
            for (int node = 0; node < nBlocks; node++) {
                /* Create an algorithm object for the K-Means algorithm */
                DistributedStep1Local algorithm = new DistributedStep1Local(context, Float.class, Method.defaultDense,
                        nClusters);

                /* Set the input data to the algorithm */
                algorithm.input.set(InputId.data, data[node]);
                algorithm.input.set(InputId.inputCentroids, centroids);

                PartialResult pres = algorithm.compute();

                masterAlgorithm.input.add(DistributedStep2MasterInputId.partialResults, pres);
            }

            masterAlgorithm.compute();
            Result result = masterAlgorithm.finalizeCompute();

            centroids = result.get(ResultId.centroids);
            objectiveFunction = result.get(ResultId.objectiveFunction);
        }

        /* Calculate assignments */
        for (int node = 0; node < nBlocks; node++) {
            /* Create an algorithm object for the K-Means algorithm */
            Batch algorithm = new Batch(context, Float.class, Method.lloydDense, nClusters, 0);

            algorithm.parameter.setAssignFlag(true);

            /* Set the input data to the algorithm */
            algorithm.input.set(InputId.data, data[node]);
            algorithm.input.set(InputId.inputCentroids, centroids);

            Result result = algorithm.compute();

            assignments[node] = result.get(ResultId.assignments);
        }

        /* Print the results */
        Service.printNumericTable("First 10 cluster assignments from 1st node:", assignments[0], 10);
        Service.printNumericTable("First 10 dimensions of centroids:", centroids, 20, 10);
        Service.printNumericTable("Objective function value:", objectiveFunction);

        context.dispose();
    }
}
For more complete information about compiler optimizations, see our Optimization Notice.
Select sticky button color: 
Orange (only for download buttons)