24 import daal.algorithms.kmeans
as kmeans
25 import daal.algorithms.kmeans.init
as init
26 from daal
import step1Local, step2Master
28 utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
29 if utils_folder
not in sys.path:
30 sys.path.insert(0, utils_folder)
31 from utils
import printNumericTable, createSparseTable
33 DAAL_PREFIX = os.path.join(
'..',
'data')
39 nVectorsInBlock = 8000
42 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
43 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
44 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv'),
45 os.path.join(DAAL_PREFIX,
'batch',
'kmeans_csr.csv')
48 dataTable = [0] * nBlocks
50 if __name__ ==
"__main__":
52 masterAlgorithm = kmeans.Distributed(step2Master, nClusters, method=kmeans.lloydCSR, )
55 assignments = [0] * nBlocks
57 masterInitAlgorithm = init.Distributed(step2Master, nClusters, method=init.randomDense)
59 for i
in range(nBlocks):
62 dataTable[i] = createSparseTable(dataFileNames[i])
65 localInit = init.Distributed(step1Local, nClusters, nBlocks * nVectorsInBlock, i * nVectorsInBlock, method=init.randomDense)
67 localInit.input.set(init.data, dataTable[i])
69 masterInitAlgorithm.input.add(init.partialResults, localInit.compute())
71 masterInitAlgorithm.compute()
72 res = masterInitAlgorithm.finalizeCompute()
73 centroids = res.get(init.centroids)
75 for it
in range(nIterations):
76 for i
in range(nBlocks):
78 localAlgorithm = kmeans.Distributed(step1Local, nClusters, it == nIterations, method=kmeans.lloydCSR)
81 localAlgorithm.input.set(kmeans.data, dataTable[i])
82 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
84 pres = localAlgorithm.compute()
86 masterAlgorithm.input.add(kmeans.partialResults, pres)
88 masterAlgorithm.compute()
89 result = masterAlgorithm.finalizeCompute()
91 centroids = result.get(kmeans.centroids)
92 objectiveFunction = result.get(kmeans.objectiveFunction)
94 for i
in range(nBlocks):
96 localAlgorithm = kmeans.Batch(nClusters, 0, method=kmeans.lloydCSR)
99 localAlgorithm.input.set(kmeans.data, dataTable[i])
100 localAlgorithm.input.set(kmeans.inputCentroids, centroids)
102 res = localAlgorithm.compute()
104 assignments[i] = res.get(kmeans.assignments)
107 printNumericTable(assignments[0],
"First 10 cluster assignments from 1st node:", 10)
108 printNumericTable(centroids,
"First 10 dimensions of centroids:", 20, 10)
109 printNumericTable(objectiveFunction,
"Objective function value:")