24 import daal.algorithms.kmeans
as kmeans
25 import daal.algorithms.kmeans.init
as init
26 from daal
import step1Local, step2Master
27 from daal.data_management
import FileDataSource, DataSourceIface
29 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
30 if utils_folder
not in sys.path:
31 sys.path.insert(0, utils_folder)
32 from utils
import printNumericTable
34 DAAL_PREFIX = os.path.join(
'..',
'data')
37 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_1.csv'),
38 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_2.csv'),
39 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_3.csv'),
40 os.path.join(DAAL_PREFIX,
'distributed',
'kmeans_dense_4.csv')
46 nVectorsInBlock = 2500
48 dataTable = [0] * nBlocks
50 if __name__ ==
"__main__":
52 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydDense)
55 assignments = [0] * nBlocks
57 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
58 for i
in range(nBlocks):
60 dataSource = FileDataSource(
61 dataFileNames[i], DataSourceIface.doAllocateNumericTable,
62 DataSourceIface.doDictionaryFromContext
66 dataSource.loadDataBlock()
68 dataTable[i] = dataSource.getNumericTable()
71 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
73 localInit.input.set(init.data, dataTable[i])
74 res = localInit.compute()
75 masterInitAlgorithm.input.add(init.partialResults, res)
77 masterInitAlgorithm.compute()
78 res = masterInitAlgorithm.finalizeCompute()
79 centroids = res.get(init.centroids)
81 for it
in range(nIterations):
82 for i
in range(nBlocks):
84 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydDense)
87 localAlgorithm.input.set(kmeans.data, dataTable[i])
88 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
90 pres = localAlgorithm.compute()
92 masterAlgorithm.input.add(kmeans.partialResults, pres)
94 masterAlgorithm.compute()
95 result = masterAlgorithm.finalizeCompute()
97 centroids = result.get(kmeans.centroids)
98 goalFunction = result.get(kmeans.goalFunction)
100 for i
in range(nBlocks):
102 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydDense)
105 localAlgorithm.input.set(kmeans.data, dataTable[i])
106 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
108 res = localAlgorithm.compute()
110 assignments[i] = res.get(kmeans.assignments)
113 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
114 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
115 printNumericTable(goalFunction,
"Goal function value:")