Hi, I'd like to write a simple sample to demonstrate the out-of-order execution model and synchronization mechanism based on events. It seems work on intel opencl platform but the schedule of command is different each time. The sample is simple.There are three kernels: empty, caesarCipher and divergence.
//The most simple kernel. Just do nothing
__kernel void empty(){
return;
}
//Naive cipher.
__kernel void caesarCipher(__global char* plainText){
size_t id=get_global_id(0);
plainText[id]=(plainText[id]+id)%256;
}
//Simple divergence computation for 1D data.
__kernel void divergence(__global float* fieldData, __global float* divergenceData, float factor){
size_t id=get_global_id(0);
float upValue=fieldData[id];
float bottomValue=upValue;
if(id>=2){
bottomValue=fieldData[id-2];
}
size_t globalSize=get_global_size(0);
if(id upValue=fieldData[id+2];
}
divergenceData[id]=(upValue-bottomValue)*factor;
}
Kernel caesarCipher and deivergence need source data. So there five main steps includes writing two data buffer and executing three kernel.
clCommandQueue= cl::CommandQueue(clContext, *(clDevices.begin()),CL_QUEUE_PROFILING_ENABLE|CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,I run the sample on openSUSE 12.1 with core i5-2500k using Intel OpenCL SDK 1.5. The start-finish schedule is shown based on profiling info like this. Line 1. Write data to fieldDataBuffer Line2. Execute the kernel empty Line3. Execute the kernel divergence Line4. Write data to plaintextBuffer Line5. Execute the kernel caesarCipher
&clError);
cl::Kernel emptyKernel(clProgram, "empty");
cl::Kernel divergenceKernel(clProgram, "divergence");
cl::Kernel caesarCipherKernel(clProgram, "caesarCipher");
//Data buffer
cl::Buffer plaintextBuffer(clContext, CL_MEM_READ_WRITE, num*sizeof(cl_char));
cl::Buffer fieldDataBuffer(clContext, CL_MEM_READ_ONLY, num*sizeof(cl_float));
cl::Buffer divergenceDataBuffer(clContext, CL_MEM_WRITE_ONLY, num*sizeof(cl_float));
caesarCipherKernel.setArg (0, plaintextBuffer);
divergenceKernel.setArg (0, fieldDataBuffer) ;
divergenceKernel.setArg (1, divergenceDataBuffer) ;
divergenceKernel.setArg (2, 0.25) ;
cl::Event writePlainTextEvent;
cl::Event writeFieldDataEvent;
cl::Event executeEmptyKernelEvent;
cl::Event executeCaesarKernelEvent;
cl::Event executeDivergenceKernelEvent;
//Write source data for kernel divergence
clCommandQueue.enqueueWriteBuffer(fieldDataBuffer, CL_FALSE, 0, num*sizeof(cl_float), fieldData.data(), NULL, &writeFieldDataEvent);
//Execute the kernel empty
clCommandQueue.enqueueNDRangeKernel(emptyKernel, cl::NullRange, cl::NDRange(num), cl::NullRange,NULL,&executeEmptyKernelEvent);
//Execute the kernel divergence
std::vector divergenceWaitEvents;
divergenceWaitEvents.push_back(writeFieldDataEvent);
clCommandQueue.enqueueNDRangeKernel(divergenceKernel, cl::NullRange, cl::NDRange(num), cl::NullRange,&divergenceWaitEvents,&executeDivergenceKernelEvent);
//Write source data for kernel caesarCipher
clCommandQueue.enqueueWriteBuffer(plaintextBuffer, CL_FALSE, 0, num*sizeof(cl_char),plaintext.data(),NULL,&writePlainTextEvent) ;
//Execute the kernel caesarCipher
std::vector caesarCipherWaitEvents;
caesarCipherWaitEvents.push_back(writePlainTextEvent);
clCommandQueue.enqueueNDRangeKernel(caesarCipherKernel, cl::NullRange, cl::NDRange(num), cl::NullRange,&caesarCipherWaitEvents,&executeCaesarKernelEvent);
clCommandQueue.finish();

