struct myStruct {
unsigned int A;
unsigned int B;
unsigned int C;
unsigned int D;
};
__kernel void ckInit( struct myStruct ctl )
{
// just do nothing :-)
}... within the host code, the struct looks like following:
struct myStruct {
cl_uint A;
cl_uint B;
cl_uint C;
cl_uint D;
};... and within the host code function I set the kernel arguments like this:
struct myStruct my_test_struct;
my_test_struct.A = 1;
my_test_struct.B = 2;
my_test_struct.C = 3;
my_test_struct.D = 4;
error = clSetKernelArg(ckInit, 0, sizeof(my_test_struct), (void*)&my_test_struct);
if (error != CL_SUCCESS) {
printf("Error creating clSetKernelArg!n");
exit(error);
}
size_t szGlobalWorkSize[2] = {8,8};
size_t szLocalWorkSize[2] = {8,8};
error = clEnqueueNDRangeKernel(queue, ckInit, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL);
if (error != CL_SUCCESS) {
printf("Error creating clEnqueueNDRangeKernel!n");
exit(error);
}... running this on Nvidia OpenCL devices or the cell OpenCL device works fine.
On Intel devices I get an error and the error code is: "CL_INVALID_ARG_SIZE"
You could say: struct forwarding is not supported ... maybe?!? ... but reducing the struct to two unsigned int(or also float) works on Intel.
Furthermore, increasing the size of the unsigned int to 8, I get a bad segmentation fault. (You can also take 8 floats and it happens just the same.)
GDB shows the following:
(gdb) backtrace #0 0x00007ffff71e71cc in Intel::OpenCL::Framework::MemoryObject::GetDeviceMemoryObject(_cl_device_id*) () from /opt/intel/opencl-sdk/libintelocl.so #1 0x00007ffff71e91f8 in Intel::OpenCL::Framework::MemoryObject::IsAllocated(_cl_device_id*) () from /opt/intel/opencl-sdk/libintelocl.so #2 0x00007ffff7203931 in Intel::OpenCL::Framework::NDRangeKernelCommand::Init() () from /opt/intel/opencl-sdk/libintelocl.so #3 0x00007ffff71f7ff5 in Intel::OpenCL::Framework::ExecutionModule::EnqueueNDRangeKernel(_cl_command_queue*, _cl_kernel*, unsigned int, unsigned long const*, unsigned long const*, unsigned long const*, unsigned int, _cl_event* const*, _cl_event**) () from /opt/intel/opencl-sdk/libintelocl.so #4 0x00007ffff71dc876 in clEnqueueNDRangeKernel () from /opt/intel/opencl-sdk/libintelocl.so #5 0x000000000040145d in oclInterface () #6 0x00000000004010b7 in main ()Within the last few days I tried to get this fixed and read that I have to align my struct (within the host code I think) or pack it ... But I got confused, because I'm not really familiar with aligning structs and I think it can't be right, because on other devices from other vendors it works fine :-) So the only way around this problem was following OpenCL kernel (here with 8 unsigned int):
struct myStruct {
unsigned int A;
unsigned int B;
unsigned int C;
unsigned int D;
unsigned int A1;
unsigned int B1;
unsigned int C1;
unsigned int D1;
};
__kernel void ckInit( __constant struct myStruct* ctl )
{
}Within the host code I'm creating a buffer and writing the struct into the buffer. This works fine on Nvidia GPU, IBM Cell and Intel ... but this is only a way around and no solution ... :-/
... the host code struct looks like this:
struct myStruct {
cl_uint A;
cl_uint B;
cl_uint C;
cl_uint D;
cl_uint A1;
cl_uint B1;
cl_uint C1;
cl_uint D1;
};... and the host code function like this:
struct myStruct my_test_struct;
my_test_struct.A = 1;
my_test_struct.B = 2;
my_test_struct.C = 3;
my_test_struct.D = 4;
my_test_struct.A1 = 5;
my_test_struct.B1 = 6;
my_test_struct.C1 = 7;
my_test_struct.D1 = 8;
cl_mem clBuffer_struct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(my_test_struct), NULL, &error);
error = clSetKernelArg(ckInit, 0, sizeof(clBuffer_struct), (void*)&clBuffer_struct));
if (error != CL_SUCCESS) {
printf("Error creating clSetKernelArg!n");
exit(error);
}
error = clEnqueueWriteBuffer(queue, clBuffer_struct, CL_TRUE, 0, sizeof(my_test_struct), (void*) &my_test_struct, 0, NULL, NULL);
if (error != CL_SUCCESS) {
printf("Error creating clEnqueueWriteBuffer!n");
exit(error);
}
Is this a bug or a feature?
I hope you will fix this :-)
Regards
Patrick

