31 from daal.algorithms
import kmeans
32 import daal.algorithms.kmeans.init
33 from daal.data_management
import HomogenNumericTable, FileDataSource, DataSource, BlockDescriptor, readOnly
35 DAAL_PREFIX = os.path.join(
'..',
'data')
37 datasetFileName = os.path.join(DAAL_PREFIX,
'batch',
'kmeans_init_dense.csv')
41 cAccuracyThreshold = 0.01
44 def getSingleValue(pTbl, ntype):
45 block = BlockDescriptor(ntype=ntype)
46 pTbl.getBlockOfRows(0, 1, readOnly, block)
47 value = block.getArray().flatten()[0]
48 pTbl.releaseBlockOfRows(block)
52 def runKmeans(inputData, nClusters, method, methodName, oversamplingFactor = -1.0):
54 init = kmeans.init.Batch(nClusters, fptype=np.float32, method=method)
55 init.input.set(kmeans.init.data, inputData)
56 if oversamplingFactor > 0:
57 init.parameter.oversamplingFactor = oversamplingFactor
58 if method == kmeans.init.parallelPlusDense:
59 print(
"K-means init parameters: method = " + methodName +
", oversamplingFactor = "
60 + str(init.parameter.oversamplingFactor) +
", nRounds = " + str(init.parameter.nRounds))
62 print(
"K-means init parameters: method = " + methodName)
64 centroids = init.compute().get(kmeans.init.centroids)
67 algorithm = kmeans.Batch(nClusters, nMaxIterations)
69 algorithm.input.set(kmeans.data, inputData)
70 algorithm.input.set(kmeans.inputCentroids, centroids)
71 algorithm.parameter.accuracyThreshold = cAccuracyThreshold
72 print(
"K-means algorithm parameters: maxIterations = " + str(algorithm.parameter.maxIterations)
73 +
", accuracyThreshold = " + str(algorithm.parameter.accuracyThreshold))
74 res = algorithm.compute()
77 goalFunc = getSingleValue(res.get(kmeans.objectiveFunction), ntype=np.float32)
78 nIterations = getSingleValue(res.get(kmeans.nIterations), ntype=np.intc)
79 print(
"K-means algorithm results: Objective function value = " + str(goalFunc*1e-6)
80 +
"*1E+6, number of iterations = " + str(nIterations) +
"\n")
83 if __name__ ==
"__main__":
85 inputData = HomogenNumericTable(ntype=np.float32)
86 dataSource = FileDataSource(datasetFileName,
87 DataSource.notAllocateNumericTable,
88 DataSource.doDictionaryFromContext)
91 dataSource.loadDataBlock(inputData)
93 runKmeans(inputData, nClusters, kmeans.init.deterministicDense,
"deterministicDense")
94 runKmeans(inputData, nClusters, kmeans.init.randomDense,
"randomDense")
95 runKmeans(inputData, nClusters, kmeans.init.plusPlusDense,
"plusPlusDense")
96 runKmeans(inputData, nClusters, kmeans.init.parallelPlusDense,
"parallelPlusDense", 0.5)
97 runKmeans(inputData, nClusters, kmeans.init.parallelPlusDense,
"parallelPlusDense", 2.0)