CL_BUILD_PROGRAM_FAILURE - unknown problem

CL_BUILD_PROGRAM_FAILURE - unknown problem


const char* OpenCLSource[] = {
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable",

"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable",

"",

"struct complex_double",
"{",
"	double real;",
"	double imag;",
"};",

"",

"double atom_add_double(__global double* const address, const double value)",

"{",

"  long oldval, newval, readback;",

"",

"  *(double*)&oldval = *address;",

"  *(double*)&newval = (*(double*)&oldval + value);",

"  while ((readback = atom_cmpxchg((__global long*)address, oldval, newval)) != oldval) {",

"    oldval = readback;",

"    *(double*)&newval = (*(double*)&oldval + value);",

"  }",

"  return *(double*)&oldval;",

"}",

"",

"",

"__kernel void dqft_ocl_kernel(__global complex_double *q_register, __global complex_double *init, __global unsigned long long *q)",

"{",

" // Index of the elements to add n",

" unsigned long long int idx = get_global_id(0);",

" // Sum the n'th element of vectors a and b and store in c n",

"/*	complex_double tmpcomp;",

"	complex_double tmpcomp2;",

"",

"	double epsilon;",

"",

"	epsilon = pow(10,-14);",
"",
"",
"	if ((pow(q_register[2*idx], 2) + pow(q_register[2*idx+1], 2)) > epsilon)",
"	{",
"		for (unsigned long long int c = 0 ; c < q[0] ; c++)",
"		{",
"			tmpcomp.real = pow(q,-.5) * cos(2*PI*idx*c/q);",
"			tmpcomp.imag = pow(q,-.5) * sin(2*PI*idx*c/q);",

"",

"			tmpcomp2.real = (q_register[idx].real * tmpcomp.real) - (q_register[idx].imag * tmpcomp.imag);",

"			tmpcomp2.imag = (q_register[idx].imag * tmpcomp.real) - (q_register[idx].real * tmpcomp.imag);",

"",

"			atom_add_double(&init[c].real, tmpcomp2_real);",

"			atom_add_double(&init[c].imag, tmpcomp2_imag);",
"",
"			//init[c] = complex_add(init[c], complex_mul(q_register[idx], tmpcomp));",
"		}",
"	}",
"*/",

"}"

"",

"",

"",

"",

};
void dqft_ocl(complex_double q_register[], unsigned long long int q)
{
	// EN: The Fourier transform maps functions in the time domain to
	//     functions in the frequency domain.  Frequency is 1/period, thus
	//     this Fourier transform will take our periodic register, and peak it
	//     at multiples of the inverse period.  Our Fourier transformation on
	//     the state a takes it to the state: q^(-.5) * Sum[c = 0 -> c = q - 1,
	//     c * e^(2*Pi*i*a*c / q)].  Remember, e^ix = cos x + i*sin x.
	complex_double init[q];
	unsigned long long i;
	for(i = 0; i < q; i++)
	{
		init[i].real = 0;

		init[i].imag = 0;
	}
	// Query platform ID

	cl_platform_id platform;

	clGetPlatformIDs (1, &platform, NULL);
	// Setup context properties

	cl_context_properties props[3];

	props[0] = (cl_context_properties)CL_CONTEXT_PLATFORM;

	props[1] = (cl_context_properties)platform;

	props[2] = (cl_context_properties)0;
	// Create a context to run OpenCL on our CUDA-enabled NVIDIA GPU

	cl_context GPUContext = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU,NULL, NULL, NULL);
	// Get the list of GPU devices associated with this context

	size_t ParmDataBytes;

	clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes);

	cl_device_id* GPUDevices = (cl_device_id*)malloc(ParmDataBytes);

	clGetContextInfo(GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes, GPUDevices, NULL);
	// Create a command-queue on the first GPU device

	cl_command_queue GPUCommandQueue = clCreateCommandQueue(GPUContext, GPUDevices[0], 0, NULL);
	// Allocate GPU memory for source vectors AND initialize from CPU memory

	cl_mem GPUVector1 = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY |

	CL_MEM_COPY_HOST_PTR, sizeof(complex_double) * q, q_register, NULL);
	cl_mem GPUVector2 = clCreateBuffer(GPUContext, CL_MEM_READ_WRITE |

	CL_MEM_COPY_HOST_PTR, sizeof(complex_double) * q, init, NULL);
	cl_mem GPU_q = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY |

	CL_MEM_COPY_HOST_PTR, sizeof(unsigned long long int), &q, NULL);
	// Create OpenCL program with source code

	cl_program OpenCLProgram = clCreateProgramWithSource(GPUContext, 57, OpenCLSource, NULL, NULL);
	int err;
	// Build the program (OpenCL JIT compilation)

	err = clBuildProgram(OpenCLProgram, 0, NULL, NULL, NULL, NULL);
	// Create a handle to the compiled OpenCL function (Kernel)

	cl_kernel OpenCLVectorAdd = clCreateKernel(OpenCLProgram, "dqft_ocl_kernel", NULL);
	// In the next step we associate the GPU memory with the Kernel arguments

	clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem), (void*)&GPUVector1);

	clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&GPUVector2);

	clSetKernelArg(OpenCLVectorAdd, 2, sizeof(unsigned long long int), &GPU_q);
	// Launch the Kernel on the GPU

	size_t WorkSize[1] = {12}; // one dimensional Range

	err = clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 1, NULL, WorkSize, NULL, 0, NULL, NULL);
	clEnqueueReadBuffer(GPUCommandQueue, GPUVector2, CL_TRUE, 0, q * sizeof(complex_double), q_register, 0, NULL, NULL);
	// Cleanup

	free(GPUDevices);

	clReleaseKernel(OpenCLVectorAdd);

	clReleaseProgram(OpenCLProgram);

	clReleaseCommandQueue(GPUCommandQueue);

	clReleaseContext(GPUContext);

	clReleaseMemObject(GPUVector1);

	clReleaseMemObject(GPUVector2);
	vector_normalization_sp(q_register, q);
}
I have a problem. The function clBuildProgram returns CL_BUILD_PROGRAM_FAILURE . Where is the problem?

7 posts / 0 nouveau(x)
Dernière contribution
Reportez-vous à notre Notice d'optimisation pour plus d'informations sur les choix et l'optimisation des performances dans les produits logiciels Intel.

This is where clGetProgramBuildInfo() becomes so useful.

size_t log;

clGetProgramBuildInfo(program, NULL, CL_PROGRAM_BUILD_LOG, 0, NULL, &log);

char *buildlog = malloc(log*sizeof(char));

clGetProgramBuildInfo(program, NULL, CL_PROGRAM_BUILD_LOG, log, buildlog, NULL);

printf(buildlog);

put these statements right after your program build call. You can see what exactly is the error..

You can also use the Intel's offline compiler to see why your program failed to build. But I think your problem may be related to the cl_khr_int64_base_atomics extension. Make sure the particular extension is supported on the platform by calling clGetDeviceInfo() with CL_DEVICE_EXTENSIONS before using any functions supported by the extension.

Thanks,
Raghu

=== 1 OpenCL platform(s) found: ===
-- 0 --
PROFILE = FULL_PROFILE
VERSION = OpenCL 1.0 CUDA 3.2.1
NAME = NVIDIA CUDA
VENDOR = NVIDIA Corporation
EXTENSIONS = cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll
=== 1 OpenCL device(s) found on platform:
-- 0 --
DEVICE_NAME = Tesla C2050
DEVICE_VENDOR = NVIDIA Corporation
DEVICE_VERSION = OpenCL 1.0 CUDA
DRIVER_VERSION = 260.19.26
CL_DEVICE_EXTENSIONS = cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_fp64
DEVICE_MAX_COMPUTE_UNITS = 14
DEVICE_MAX_CLOCK_FREQUENCY = 1147
DEVICE_GLOBAL_MEM_SIZE = 2817982464
-- 1 --
DEVICE_NAME = GeForce GTS 250
DEVICE_VENDOR = NVIDIA Corporation
DEVICE_VERSION = OpenCL 1.0 CUDA
DRIVER_VERSION = 260.19.26
CL_DEVICE_EXTENSIONS = cl_khr_byte_addressable_store cl_khr_icd cl_khr_gl_sharing cl_nv_compiler_options cl_nv_device_attribute_query cl_nv_pragma_unroll cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics
DEVICE_MAX_COMPUTE_UNITS = 16
DEVICE_MAX_CLOCK_FREQUENCY = 1458
DEVICE_GLOBAL_MEM_SIZE = 1073414144

I use nVidia Tesla C2050. This card supports integer atomic functions operating on 64-bit words in global memory (required compute capability >= 1.2).

Why is not available extend cl_khr_int64_base_atomics?

>> I use nVidia Tesla C2050. This card supports integer atomic functions operating on 64-bit words in global memory (required compute capability >= 1.2).

>> Why is not available extend cl_khr_int64_base_atomics?

Because looks like your driver only supports OpenCL 1.0. May be you need to upgrade the driver? 295.59 is the latest driver (from NVidia's website) for this card.

Raghu

Card does not perform operations on the double. CL_INVALID_BINARY is returned.

I add a pragma:

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

The device has cl_khr_fp64 extensions. This extension is visible in the system.

This is a problem with the version of OpenCL?

I am not really sure I understand your problem/question.

Earlier you reported you were running into an issue with cl_khr_int64_base_atomics, but now it appears there is a problem with cl_khr_fp64. Before debugging the issue please make sure you have the latest driver for your device andbefore enabling an extension make sure the device supports it.

Moreover, if you are running on an Nvidia device, please post on their forum since you are most likely to get correct advice there for running applications on their devices.

Thanks,
Raghu

Laisser un commentaire

Veuillez ouvrir une session pour ajouter un commentaire. Pas encore membre ? Rejoignez-nous dès aujourd’hui