Actual source code: ex5k.kokkos.cxx

  1: static char help[] = "Test of Kokkos matrix assemble with 1D Laplacian. Kokkos version of ex5cu \n\n";

  3: #include <petscconf.h>
  4: #include <petscmat.h>

  6: /*
  7:     Include Kokkos files
  8: */
  9: #include <Kokkos_Core.hpp>
 10: #include <Kokkos_OffsetView.hpp>

 12: #include <petscaijdevice.h>

 14: int main(int argc, char **argv)
 15: {
 16:   Mat                        A;
 17:   PetscInt                   N = 11, nz = 3, Istart, Iend, num_threads = 128;
 18:   PetscSplitCSRDataStructure d_mat;
 19:   PetscLogEvent              event;
 20:   Vec                        x, y;
 21:   PetscMPIInt                rank;

 24:   PetscInitialize(&argc, &argv, (char *)0, help);
 25: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
 26:   PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL);
 27:   PetscOptionsGetInt(NULL, NULL, "-nz_row", &nz, NULL); // for debugging, will be wrong if nz<3
 28:   PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL);
 29:   PetscOptionsGetInt(NULL, NULL, "-num_threads", &num_threads, NULL);
 30:   if (nz > N + 1) {
 31:     PetscPrintf(PETSC_COMM_WORLD, "warning decreasing nz\n");
 32:     nz = N + 1;
 33:   }
 34:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);

 36:   PetscLogEventRegister("GPU operator", MAT_CLASSID, &event);
 37:   MatCreate(PETSC_COMM_WORLD, &A);
 38:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N);
 39:   MatSetType(A, MATAIJKOKKOS);
 40:   MatSeqAIJSetPreallocation(A, nz, NULL);
 41:   MatMPIAIJSetPreallocation(A, nz, NULL, nz - 1, NULL);
 42:   MatSetFromOptions(A);
 43:   MatSetOption(A, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE);
 44:   MatCreateVecs(A, &x, &y);
 45:   MatGetOwnershipRange(A, &Istart, &Iend);

 47:   // assemble end on CPU. We are not assembling redundant here, and ignoring off proc entries, but we could
 48:   for (int i = Istart; i < Iend + 1; i++) {
 49:     PetscScalar values[] = {1, 1, 1, 1};
 50:     PetscInt    js[] = {i - 1, i}, nn = (i == N) ? 1 : 2; // negative indices are ignored but >= N are not, so clip end
 51:     MatSetValues(A, nn, js, nn, js, values, ADD_VALUES);
 52:   }
 53:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
 54:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

 56:   // test Kokkos
 57:   VecSet(x, 1.0);
 58:   MatMult(A, x, y);
 59:   VecViewFromOptions(y, NULL, "-ex5_vec_view");

 61:   // assemble on GPU
 62:   if (Iend < N) Iend++; // elements, ignore off processor entries so do redundant
 63:   PetscLogEventBegin(event, 0, 0, 0, 0);
 64:   MatKokkosGetDeviceMatWrite(A, &d_mat);
 65:   Kokkos::fence();
 66:   Kokkos::parallel_for(
 67:     Kokkos::RangePolicy<>(Istart, Iend + 1), KOKKOS_LAMBDA(int i) {
 68:       PetscScalar values[] = {1, 1, 1, 1};
 69:       PetscInt    js[] = {i - 1, i}, nn = (i == N) ? 1 : 2;
 70:       MatSetValuesDevice(d_mat, nn, js, nn, js, values, ADD_VALUES);
 71:     });
 72:   Kokkos::fence();
 73:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
 74:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

 76:   VecSet(x, 1.0);
 77:   MatMult(A, x, y);
 78:   VecViewFromOptions(y, NULL, "-ex5_vec_view");
 79:   PetscLogEventEnd(event, 0, 0, 0, 0);

 81:   MatDestroy(&A);
 82:   VecDestroy(&x);
 83:   VecDestroy(&y);
 84: #else
 85:   SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_COR, "Kokkos kernels required");
 86: #endif
 87:   PetscFinalize();
 88:   return 0;
 89: }

 91: /*TEST

 93:    build:
 94:      requires: kokkos_kernels

 96:    test:
 97:      suffix: 0
 98:      requires: kokkos_kernels double !complex !single
 99:      args: -n 11 -ex5_vec_view
100:      nsize:  2

102: TEST*/