Actual source code: ex18cu.cu

  1: #include <petscdevice_cuda.h>
  2: #include "ex18.h"

  4: __global__ void FillValues(PetscInt n, PetscScalar *v)
  5: {
  6:   PetscInt     i = blockIdx.x * blockDim.x + threadIdx.x;
  7:   PetscScalar *s;
  8:   if (i < n) {
  9:     s = &v[3 * 3 * i];
 10:     for (PetscInt vi = 0; vi < 3; vi++) {
 11:       for (PetscInt vj = 0; vj < 3; vj++) s[vi * 3 + vj] = vi + 2 * vj;
 12:     }
 13:   }
 14: }

 16: PetscErrorCode FillMatrixCUDACOO(FEStruct *fe, Mat A)
 17: {
 18:   PetscScalar *v;

 21:   cudaMalloc((void **)&v, 3 * 3 * fe->Ne * sizeof(PetscScalar));
 22:   FillValues<<<(fe->Ne + 255) / 256, 256>>>(fe->Ne, v);
 23:   MatSetValuesCOO(A, v, INSERT_VALUES);
 24:   cudaFree(v);
 25:   return 0;
 26: }