/* adding two vectors */ #include #include #include #define SIZE 1024 int main() { int i; CONTEXT* cp = (stdgpu)? stdgpu : stdcpu; void* clh = clopen(cp, "add_vec.cl",CLLD_NOW); cl_kernel k_addvec = clsym(cp, clh, "addvec_kern", CLLD_NOW); float* aa = (float*)clmalloc(cp, SIZE*sizeof(float), 0); float* bb = (float*)clmalloc(cp, SIZE*sizeof(float), 0); float* cc = (float*)clmalloc(cp, SIZE*sizeof(float), 0); for(i=0; i < SIZE; i++) { aa[i] = 111.0f * i; bb[i] = 222.0f * i; } bzero(cc, SIZE*sizeof(float)); clndrange_t ndr = clndrange_init1d(0, SIZE, 64); clmsync(cp, 0, aa, CL_MEM_DEVICE|CL_EVENT_NOWAIT); clmsync(cp, 0, bb, CL_MEM_DEVICE|CL_EVENT_NOWAIT); clarg_set_global(cp, k_addvec, 0, aa); clarg_set_global(cp, k_addvec, 1, bb); clarg_set_global(cp, k_addvec, 2, cc); clfork(cp, 0, k_addvec, &ndr, CL_EVENT_NOWAIT); clmsync(cp, 0, cc, CL_MEM_HOST|CL_EVENT_NOWAIT); clwait(cp, 0, CL_MEM_EVENT|CL_KERNEL_EVENT|CL_EVENT_RELEASE); for(i=0; i < SIZE; i++) printf("%f %f %f\n", aa[i], bb[i], cc[i]); if (aa) clfree(aa); if (bb) clfree(bb); if (cc) clfree(cc); clclose(cp, clh); }