Actual source code: ex18.c
1: static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";
3: /*
4: The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work
5: well on both CPUs and GPUs. It is an alternative to using MatSetValues()
7: This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
8: it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
9: linear algebra solvers but are managing their own finite element process.
11: Please do NOT use this example as a starting point to writing your own finite element code from scratch!
13: Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices.
14: */
16: #include <petscmat.h>
17: #include "ex18.h"
19: static PetscErrorCode CreateFEStruct(FEStruct *fe)
20: {
22: fe->Nv = 5;
23: fe->Ne = 3;
24: PetscMalloc1(3 * fe->Ne, &fe->vertices);
25: /* the three vertices associated with each element in order of element */
26: fe->vertices[0 + 0] = 0;
27: fe->vertices[0 + 1] = 1;
28: fe->vertices[0 + 2] = 2;
29: fe->vertices[3 + 0] = 2;
30: fe->vertices[3 + 1] = 1;
31: fe->vertices[3 + 2] = 3;
32: fe->vertices[6 + 0] = 2;
33: fe->vertices[6 + 1] = 4;
34: fe->vertices[6 + 2] = 3;
35: fe->n = 5;
36: return 0;
37: }
39: static PetscErrorCode DestroyFEStruct(FEStruct *fe)
40: {
42: PetscFree(fe->vertices);
43: PetscFree(fe->coo);
44: return 0;
45: }
47: static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A)
48: {
49: PetscInt *oor, *ooc, cnt = 0;
52: MatCreate(PETSC_COMM_WORLD, A);
53: MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE);
54: MatSetFromOptions(*A);
56: /* determine for each entry in each element stiffness matrix the global row and column */
57: /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
58: PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc);
59: for (PetscInt e = 0; e < fe->Ne; e++) {
60: for (PetscInt vi = 0; vi < 3; vi++) {
61: for (PetscInt vj = 0; vj < 3; vj++) {
62: oor[cnt] = fe->vertices[3 * e + vi];
63: ooc[cnt++] = fe->vertices[3 * e + vj];
64: }
65: }
66: }
67: MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc);
68: PetscFree2(oor, ooc);
70: /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
71: /* for lists of elements with different numbers of degrees of freedom associated with each element the offsets will not be uniform */
72: PetscMalloc1(fe->Ne, &fe->coo);
73: fe->coo[0] = 0;
74: for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3;
75: return 0;
76: }
78: static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A)
79: {
80: PetscScalar s[9];
83: /* simulation of traditional PETSc CPU based finite assembly process */
84: for (PetscInt e = 0; e < fe->Ne; e++) {
85: for (PetscInt vi = 0; vi < 3; vi++) {
86: for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
87: }
88: MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES);
89: }
90: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
91: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
92: return 0;
93: }
95: /*
96: Shows an example of tracking element offsets explicitly, which allows for
97: mixed-topology meshes and combining both volume and surface parts into the weak form.
98: */
99: static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A)
100: {
101: PetscScalar *v, *s;
104: /* simulation of CPU based finite assembly process with COO */
105: PetscMalloc1(3 * 3 * fe->Ne, &v);
106: for (PetscInt e = 0; e < fe->Ne; e++) {
107: s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
108: for (PetscInt vi = 0; vi < 3; vi++) {
109: for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
110: }
111: }
112: MatSetValuesCOO(A, v, ADD_VALUES);
113: PetscFree(v);
114: return 0;
115: }
117: /*
118: Uses a multi-dimensional indexing technique that works for homogeneous meshes
119: such as single-topology with volume integral only.
120: */
121: static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A)
122: {
123: PetscScalar(*s)[3][3];
126: /* simulation of CPU based finite assembly process with COO */
127: PetscMalloc1(fe->Ne, &s);
128: for (PetscInt e = 0; e < fe->Ne; e++) {
129: for (PetscInt vi = 0; vi < 3; vi++) {
130: for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj;
131: }
132: }
133: MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES);
134: PetscFree(s);
135: return 0;
136: }
138: int main(int argc, char **args)
139: {
140: Mat A;
141: FEStruct fe;
142: PetscMPIInt size;
143: PetscBool is_kokkos, is_cuda;
146: PetscInitialize(&argc, &args, (char *)0, help);
147: MPI_Comm_size(PETSC_COMM_WORLD, &size);
150: CreateFEStruct(&fe);
151: CreateMatrix(&fe, &A);
153: FillMatrixCPU(&fe, A);
154: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
156: MatZeroEntries(A);
157: FillMatrixCPUCOO(&fe, A);
158: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
160: MatZeroEntries(A);
161: FillMatrixCPUCOO3d(&fe, A);
162: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
164: MatZeroEntries(A);
165: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos);
166: PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda);
167: #if defined(PETSC_HAVE_KOKKOS)
168: if (is_kokkos) FillMatrixKokkosCOO(&fe, A);
169: #endif
170: #if defined(PETSC_HAVE_CUDA)
171: if (is_cuda) FillMatrixCUDACOO(&fe, A);
172: #endif
173: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
175: MatDestroy(&A);
176: DestroyFEStruct(&fe);
177: PetscFinalize();
178: return 0;
179: }
181: /*TEST
182: build:
183: requires: cuda kokkos_kernels
184: depends: ex18cu.cu ex18kok.kokkos.cxx
186: testset:
187: filter: grep -v "type"
188: output_file: output/ex18_1.out
190: test:
191: suffix: kok
192: requires: kokkos_kernels
193: args: -mat_type aijkokkos
195: test:
196: suffix: cuda
197: requires: cuda
198: args: -mat_type aijcusparse
200: TEST*/