2010-09-16 5 views
3

J'ai construit mon propre petit exemple Opencl en utilisant différentes sources sur le net. Le noyau actuel fonctionne, et j'obtiens la sortie que je veux, mais les fonctions de nettoyage, que j'ai trouvées dans l'un des exemples, provoquent des erreurs de segmentation. Qu'ai-je fait de mal?Le nettoyage OpenCl provoque la segfault

#include <stdio.h> 
#include <stdlib.h> 
#include <errno.h> 
#include <CL/cl.h> //opencl 

#define CL_CHECK(_expr)               \ 
    do {                   \ 
    cl_int _err = _expr;              \ 
    if (_err == CL_SUCCESS)             \ 
     break;                 \ 
    fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ 
    abort();                 \ 
    } while (0) 

#define CL_CHECK_ERR(_expr)              \ 
    ({                   \ 
    cl_int _err = CL_INVALID_VALUE;           \ 
    typeof(_expr) _ret = _expr;            \ 
    if (_err != CL_SUCCESS) {             \ 
     fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ 
     abort();                 \ 
    }                   \ 
    _ret;                  \ 
    }) 

const char* OpenCLSource[] = { 
     "__kernel void VectorAdd(__global int* c, __global int* a,__global int* b)", 
     "{", 
     "  // Index of the elements to add \n", 
     "  unsigned int n = get_global_id(0);", 
     "  // Sum the n’th element of vectors a and b and store in c \n", 
     "  c[n] = a[n] + b[n];", 
     "}" 
}; 

cl_device_id* init_opencl(cl_context *GPUContext,cl_command_queue *GPUCommandQueue, cl_kernel* cl_forward1,cl_program* OpenCLProgram){ 

    // Create a context to run OpenCL on our CUDA-enabled NVIDIA GPU 
    cl_int _err; 
    *GPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &_err) ; 
    printf("\n1-%i\n",_err); 
    // Get the list of GPU devices associated with this context 
    size_t ParmDataBytes; 
    CL_CHECK(clGetContextInfo(*GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes)); 
    cl_device_id* GPUDevices; 
    GPUDevices = (cl_device_id*)malloc(ParmDataBytes); 
    CL_CHECK(clGetContextInfo(*GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes, GPUDevices, NULL)); 
    // Create a command-queue on the first GPU device 
    *GPUCommandQueue = clCreateCommandQueue(*GPUContext, GPUDevices[0], 0, &_err); 
    printf("\n2-%i\n",_err); 
    // Create OpenCL program with source code 
    *OpenCLProgram = clCreateProgramWithSource(*GPUContext, 7, OpenCLSource, NULL, &_err); 
    printf("\n3-%i\n",_err); 

    CL_CHECK(clBuildProgram(*OpenCLProgram, 0, 
       NULL, NULL, NULL, NULL)); 


    cl_int errcode; 
    *cl_forward1 = clCreateKernel(*OpenCLProgram, 
       "VectorAdd", &errcode); 
       printf("\n7-%i\n",errcode); 

    return GPUDevices; 
} 


int main(int argc, char** argv) 
{ 
    cl_context GPUContext; 
    cl_command_queue GPUCommandQueue; 
    cl_program OpenCLProgram; 
    cl_kernel OpenCLVectorAdd; 
    cl_device_id* GPUDevices; 

    GPUDevices=init_opencl(&GPUContext,&GPUCommandQueue,&OpenCLVectorAdd,&OpenCLProgram); 

    // Two integer source vectors in Host memory 
    int n=5 ; 
    int x[5]={1,2,4,6,8}; 
    int y[5]={1,2,4,6,8}; 
    int output[n]; 
    int size_x = n*sizeof(x); 
    int size_y = n*sizeof(y); 

    int size_output = n*sizeof(output); // this changes for the second forward1 
    cl_int _err; 
    // Allocate GPU memory for source vectors AND initialize from CPU memory 
    cl_mem x_cl = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | 
        CL_MEM_COPY_HOST_PTR, size_x, x, &_err); 
        printf("\n4-%i\n",_err); 
    cl_mem y_cl = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | 
        CL_MEM_COPY_HOST_PTR, size_y, y, &_err); 
        printf("\n5-%i\n",_err); 


    // Allocate output memory on GPU 
    cl_mem total_cl = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, 
              size_output, NULL, &_err); 
              printf("\n6-%i\n",_err); 

    // In the next step we associate the GPU memory with the Kernel arguments 
    clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&total_cl); 
    clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&x_cl); 
    clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&y_cl); 


    // 7. Launch OpenCL kernel 
    size_t localWorkSize[1], globalWorkSize[1]; 
    //localWorkSize = ; 
    globalWorkSize[0] = n; 

    // Launch the Kernel on the GPU 
    CL_CHECK(clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL)); 
    // Copy the output in GPU memory back to CPU memory 

    //float* h_C = (float*) malloc(size_output); 
    CL_CHECK(clEnqueueReadBuffer(GPUCommandQueue, 
       total_cl, CL_TRUE, 0, size_output, 
       output, 0, NULL, NULL)); 
    for (int i=0; i<n;i++){ 
     printf("\n%i",output[i]); 
    } 

    // Cleanup (each of the following lines causes a seg fault 
    // ****************************** 
    CL_CHECK(free(GPUDevices)); 
    CL_CHECK(clReleaseKernel(OpenCLVectorAdd)); 
    CL_CHECK(clReleaseProgram(OpenCLProgram)); 
    CL_CHECK(clReleaseCommandQueue(GPUCommandQueue)); 
    CL_CHECK(clReleaseContext(GPUContext)); 
    CL_CHECK(clReleaseMemObject(total_cl)); 
    CL_CHECK(clReleaseMemObject(x_cl)); 
    CL_CHECK(clReleaseMemObject(y_cl)); 
    /* **************** 

    return 0; 
} 

Merci!

+0

Quelle ligne exacte plante? –

+0

N'êtes-vous pas supposé attendre la fin de la commande de lecture en file d'attente avant d'aller lire le tableau de sortie []? –

+0

D'ailleurs, n'est-ce pas censé être une attente après l'appel du noyau mis en file d'attente également? –

Répondre

-2

J'ai corrigé et changé plusieurs petites choses. Donc, ce code devrait fonctionner maintenant.

#include <stdio.h> 
#include <stdlib.h> 
#include <errno.h> 
#include <CL/cl.h> //opencl 

#define CL_CHECK(_expr)               \ 
    do {                   \ 
    cl_int _err = _expr;              \ 
    if (_err == CL_SUCCESS)             \ 
     break;                 \ 
    fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ 
    abort();                 \ 
    } while (0) 

#define CL_CHECK_ERR(_expr)              \ 
    ({                   \ 
    cl_int _err = CL_INVALID_VALUE;           \ 
    typeof(_expr) _ret = _expr;            \ 
    if (_err != CL_SUCCESS) {             \ 
     fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ 
     abort();                 \ 
    }                   \ 
    _ret;                  \ 
    }) 

const char* OpenCLSource[] = { 
     "__kernel void VectorAdd(__global int* c, __global int* a,__global int* b)", 
     "{", 
     "  // Index of the elements to add \n", 
     "  unsigned int n = get_global_id(0);", 
     "  // Sum the n’th element of vectors a and b and store in c \n", 
     "  c[n] = a[n] + b[n];", 
     "}" 
}; 

cl_device_id* init_opencl(cl_context *GPUContext,cl_command_queue *GPUCommandQueue, cl_kernel* cl_forward1,cl_program* OpenCLProgram){ 

    // Create a context to run OpenCL on our CUDA-enabled NVIDIA GPU 
    cl_int _err; 
    *GPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &_err) ; 
    printf("\nclCreateContextFromType:%i\n",_err); 
    // Get the list of GPU devices associated with this context 
    size_t ParmDataBytes; 
    CL_CHECK(clGetContextInfo(*GPUContext, CL_CONTEXT_DEVICES, 0, NULL, &ParmDataBytes)); 
    cl_device_id* GPUDevices; 
    GPUDevices = (cl_device_id*)malloc(ParmDataBytes); 
    CL_CHECK(clGetContextInfo(*GPUContext, CL_CONTEXT_DEVICES, ParmDataBytes, GPUDevices, NULL)); 
    // Create a command-queue on the first GPU device 
    *GPUCommandQueue = clCreateCommandQueue(*GPUContext, GPUDevices[0], 0, &_err); 
    printf("\nclCreateCommandQueue:%i\n",_err); 
    // Create OpenCL program with source code 
    *OpenCLProgram = clCreateProgramWithSource(*GPUContext, 7, OpenCLSource, NULL, &_err); 
    printf("\nclCreateProgramWithSource:%i\n",_err); 

    CL_CHECK(clBuildProgram(*OpenCLProgram, 0, 
       NULL, NULL, NULL, NULL)); 


    cl_int errcode; 
    *cl_forward1 = clCreateKernel(*OpenCLProgram, 
       "VectorAdd", &errcode); 
       printf("\nclCreateKernel:%i\n",errcode); 

    return GPUDevices; 
} 


int main(int argc, char** argv) 
{ 
    cl_context GPUContext; 
    cl_command_queue GPUCommandQueue; 
    cl_program OpenCLProgram; 
    cl_kernel OpenCLVectorAdd; 
    cl_device_id* GPUDevices; 

    GPUDevices=init_opencl(&GPUContext,&GPUCommandQueue,&OpenCLVectorAdd,&OpenCLProgram); 

    int n=5 ; 
    int x[5]={1,2,4,6,8}; 
    int y[5]={1,2,4,6,8}; 
    int output[n]; 
    int size_x = n*sizeof(x); 
    int size_y = n*sizeof(y); 
    int size_output = n*sizeof(output); 

    cl_int _err; 

    // Allocate GPU memory for source vectors AND initialize from CPU memory 
    cl_mem x_cl = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | 
        CL_MEM_COPY_HOST_PTR, size_x, x, &_err); 
        printf("\nclCreateBuffer:%i\n",_err); 
    cl_mem y_cl = clCreateBuffer(GPUContext, CL_MEM_READ_ONLY | 
        CL_MEM_COPY_HOST_PTR, size_y, y, &_err); 
        printf("\nclCreateBuffer:%i\n",_err); 


    // Allocate output memory on GPU 
    cl_mem total_cl = clCreateBuffer(GPUContext, CL_MEM_WRITE_ONLY, 
              size_output, NULL, &_err); 
              printf("\nclCreateBuffer:%i\n",_err); 

    // In the next step we associate the GPU memory with the Kernel arguments 
    clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem),(void*)&total_cl); 
    clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), (void*)&x_cl); 
    clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), (void*)&y_cl); 


    size_t globalWorkSize[1]; 
    globalWorkSize[0] = n; 

    // Launch the Kernel on the GPU 
    CL_CHECK(clEnqueueNDRangeKernel(GPUCommandQueue, OpenCLVectorAdd, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL)); 
    clFinish(GPUCommandQueue); 
    // Copy the output in GPU memory back to CPU memory 

    int* h_c = (int*) malloc(size_output); 
    CL_CHECK(clEnqueueReadBuffer(GPUCommandQueue, 
       total_cl, CL_TRUE, 0, size_output, 
       h_c, 0, NULL, NULL)); 
    clFinish(GPUCommandQueue); 
    for (int i=0; i<n;i++){ 
     printf("\noutput[%i]=%i",i,h_c[i]); 
    } 

    // Cleanup 
    free(GPUDevices); 
    CL_CHECK(clReleaseKernel(OpenCLVectorAdd)); 
    CL_CHECK(clReleaseProgram(OpenCLProgram)); 
    CL_CHECK(clReleaseCommandQueue(GPUCommandQueue)); 
    CL_CHECK(clReleaseContext(GPUContext)); 
    CL_CHECK(clReleaseMemObject(x_cl)); 
    CL_CHECK(clReleaseMemObject(total_cl)); 
    CL_CHECK(clReleaseMemObject(y_cl)); 

    return 0; 
} 
+4

Ce n'est pas utile, il n'y a aucune explication de ce que le problème était ou quoi dans le code que vous avez modifié le corrige – o0rebelious0o

0

Pour les personnes qui arrive ici à l'avenir:

Comme suggéré Brafford, cela est résolu en ajoutant clFinish (GPUCommandQueue) après clEnqueueNDRangeKernel ainsi que clEnqueueReadBuffer.

Apparemment, essayer de nettoyer un objet (par exemple libérer une file d'attente) qui est encore en cours d'exécution engendre une erreur de segmentation.