J'ai ce code pour l'addition de tableaux multidimensionnels en utilisant PyOpenCL. Mon problème est que le résultat est faux pour tous, sauf pour la première dimension. J'ai consulté ce Link.PyOpenCL Multidimensional Array
from __future__ import absolute_import, print_function
import numpy as np
import pyopencl as cl
N = 4
a_np = np.random.rand(N,N).astype(np.float32)
b_np = np.random.rand(N,N).astype(np.float32)
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
prg = cl.Program(ctx, """
__kernel void sum(
__global const float *a_g, __global const float *b_g, __global float *res_g) {
int i = get_global_id(1);
int j = get_global_id(0);
res_g[i,j] = a_g[i,j] + b_g[i,j];
}
""").build()
res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
prg.sum(queue, a_np.shape, None, a_g, b_g, res_g)
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, res_g)
# Check on CPU with Numpy:
print(res_np - (a_np + b_np))
print(np.linalg.norm(res_np - (a_np + b_np)))
print (res_np)
print (a_np + b_np)
Matrice 1:
[[ 0.2990678 0.76585543 0.71866363 0.30202991]
[ 0.20604192 0.01989171 0.02402978 0.82826865]
[ 0.75456071 0.62410605 0.4374246 0.85372066]
[ 0.37000021 0.5734672 0.4250721 0.2456535 ]]
Matrix 2:
[[ 0.83109927 0.53289926 0.24182947 0.39531609]
[ 0.53014964 0.62028325 0.2397541 0.03364789]
[ 0.83543158 0.1162187 0.21168791 0.22438531]
[ 0.2178313 0.76118374 0.23737679 0.41660839]]
Résultat attendu:
[[ 1.13016701 1.29875469 0.96049309 0.69734597]
[ 0.73619157 0.64017498 0.26378387 0.86191654]
[ 1.58999228 0.74032474 0.64911252 1.07810593]
[ 0.5878315 1.33465099 0.66244888 0.6622619 ]]
Script Résultat:
[[ 1.13016701 1.29875469 0.96049309 0.69734597]
[ 0. 0. 0. 0. ]
[ 0. 0. 0. 0. ]
[ 0. 0. 0. 0. ]]