Actual source code: ex5k.kokkos.cxx
1: static char help[] = "Test of Kokkos matrix assemble with 1D Laplacian. Kokkos version of ex5cu \n\n";
3: #include <petscconf.h>
4: #include <petscmat.h>
6: /*
7: Include Kokkos files
8: */
9: #include <Kokkos_Core.hpp>
10: #include <Kokkos_OffsetView.hpp>
12: #include <petscaijdevice.h>
14: int main(int argc, char **argv)
15: {
16: Mat A;
17: PetscInt N = 11, nz = 3, Istart, Iend, num_threads = 128;
18: PetscSplitCSRDataStructure d_mat;
19: PetscLogEvent event;
20: Vec x, y;
21: PetscMPIInt rank;
24: PetscInitialize(&argc, &argv, (char *)0, help);
25: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
26: PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL);
27: PetscOptionsGetInt(NULL, NULL, "-nz_row", &nz, NULL); // for debugging, will be wrong if nz<3
28: PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL);
29: PetscOptionsGetInt(NULL, NULL, "-num_threads", &num_threads, NULL);
30: if (nz > N + 1) {
31: PetscPrintf(PETSC_COMM_WORLD, "warning decreasing nz\n");
32: nz = N + 1;
33: }
34: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
36: PetscLogEventRegister("GPU operator", MAT_CLASSID, &event);
37: MatCreate(PETSC_COMM_WORLD, &A);
38: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N);
39: MatSetType(A, MATAIJKOKKOS);
40: MatSeqAIJSetPreallocation(A, nz, NULL);
41: MatMPIAIJSetPreallocation(A, nz, NULL, nz - 1, NULL);
42: MatSetFromOptions(A);
43: MatSetOption(A, MAT_IGNORE_OFF_PROC_ENTRIES, PETSC_TRUE);
44: MatCreateVecs(A, &x, &y);
45: MatGetOwnershipRange(A, &Istart, &Iend);
47: // assemble end on CPU. We are not assembling redundant here, and ignoring off proc entries, but we could
48: for (int i = Istart; i < Iend + 1; i++) {
49: PetscScalar values[] = {1, 1, 1, 1};
50: PetscInt js[] = {i - 1, i}, nn = (i == N) ? 1 : 2; // negative indices are ignored but >= N are not, so clip end
51: MatSetValues(A, nn, js, nn, js, values, ADD_VALUES);
52: }
53: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
54: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
56: // test Kokkos
57: VecSet(x, 1.0);
58: MatMult(A, x, y);
59: VecViewFromOptions(y, NULL, "-ex5_vec_view");
61: // assemble on GPU
62: if (Iend < N) Iend++; // elements, ignore off processor entries so do redundant
63: PetscLogEventBegin(event, 0, 0, 0, 0);
64: MatKokkosGetDeviceMatWrite(A, &d_mat);
65: Kokkos::fence();
66: Kokkos::parallel_for(
67: Kokkos::RangePolicy<>(Istart, Iend + 1), KOKKOS_LAMBDA(int i) {
68: PetscScalar values[] = {1, 1, 1, 1};
69: PetscInt js[] = {i - 1, i}, nn = (i == N) ? 1 : 2;
70: MatSetValuesDevice(d_mat, nn, js, nn, js, values, ADD_VALUES);
71: });
72: Kokkos::fence();
73: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
74: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
76: VecSet(x, 1.0);
77: MatMult(A, x, y);
78: VecViewFromOptions(y, NULL, "-ex5_vec_view");
79: PetscLogEventEnd(event, 0, 0, 0, 0);
81: MatDestroy(&A);
82: VecDestroy(&x);
83: VecDestroy(&y);
84: #else
85: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_COR, "Kokkos kernels required");
86: #endif
87: PetscFinalize();
88: return 0;
89: }
91: /*TEST
93: build:
94: requires: kokkos_kernels
96: test:
97: suffix: 0
98: requires: kokkos_kernels double !complex !single
99: args: -n 11 -ex5_vec_view
100: nsize: 2
102: TEST*/