Actual source code: ex18.c
1: static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";
3: /*
4: The COO interface for PETSc matrices provides a convenient way to provide finite element stiffness matrices to PETSc matrix that should work
5: well on both CPUs and GPUs. It is an alternative to using MatSetValues()
7: This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
8: it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
9: linear algebra solvers but are managing their own finite element process.
11: Please do NOT use this example as a starting point to writing your own finite element code from scratch!
13: Each element in this example has three vertices; hence the usage below needs to be adjusted for elements of a different number of vertices.
14: */
16: #include <petscmat.h>
17: #include "ex18.h"
19: static PetscErrorCode CreateFEStruct(FEStruct *fe)
20: {
21: PetscFunctionBeginUser;
22: fe->Nv = 5;
23: fe->Ne = 3;
24: PetscCall(PetscMalloc1(3 * fe->Ne, &fe->vertices));
25: /* the three vertices associated with each element in order of element */
26: fe->vertices[0 + 0] = 0;
27: fe->vertices[0 + 1] = 1;
28: fe->vertices[0 + 2] = 2;
29: fe->vertices[3 + 0] = 2;
30: fe->vertices[3 + 1] = 1;
31: fe->vertices[3 + 2] = 3;
32: fe->vertices[6 + 0] = 2;
33: fe->vertices[6 + 1] = 4;
34: fe->vertices[6 + 2] = 3;
35: fe->n = 5;
36: PetscFunctionReturn(PETSC_SUCCESS);
37: }
39: static PetscErrorCode DestroyFEStruct(FEStruct *fe)
40: {
41: PetscFunctionBeginUser;
42: PetscCall(PetscFree(fe->vertices));
43: PetscCall(PetscFree(fe->coo));
44: PetscFunctionReturn(PETSC_SUCCESS);
45: }
47: static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A)
48: {
49: PetscInt *oor, *ooc, cnt = 0;
51: PetscFunctionBeginUser;
52: PetscCall(MatCreate(PETSC_COMM_WORLD, A));
53: PetscCall(MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE));
54: PetscCall(MatSetFromOptions(*A));
56: /* determine for each entry in each element stiffness matrix the global row and column */
57: /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
58: PetscCall(PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc));
59: for (PetscInt e = 0; e < fe->Ne; e++) {
60: for (PetscInt vi = 0; vi < 3; vi++) {
61: for (PetscInt vj = 0; vj < 3; vj++) {
62: oor[cnt] = fe->vertices[3 * e + vi];
63: ooc[cnt++] = fe->vertices[3 * e + vj];
64: }
65: }
66: }
67: PetscCall(MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc));
68: PetscCall(PetscFree2(oor, ooc));
70: /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
71: /* for lists of elements with different numbers of degrees of freedom associated with each element the offsets will not be uniform */
72: PetscCall(PetscMalloc1(fe->Ne, &fe->coo));
73: fe->coo[0] = 0;
74: for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3;
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A)
79: {
80: PetscScalar s[9];
82: PetscFunctionBeginUser;
83: /* simulation of traditional PETSc CPU based finite assembly process */
84: for (PetscInt e = 0; e < fe->Ne; e++) {
85: for (PetscInt vi = 0; vi < 3; vi++) {
86: for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
87: }
88: PetscCall(MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES));
89: }
90: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
91: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: /*
96: Shows an example of tracking element offsets explicitly, which allows for
97: mixed-topology meshes and combining both volume and surface parts into the weak form.
98: */
99: static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A)
100: {
101: PetscScalar *v, *s;
103: PetscFunctionBeginUser;
104: /* simulation of CPU based finite assembly process with COO */
105: PetscCall(PetscMalloc1(3 * 3 * fe->Ne, &v));
106: for (PetscInt e = 0; e < fe->Ne; e++) {
107: s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
108: for (PetscInt vi = 0; vi < 3; vi++) {
109: for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj;
110: }
111: }
112: PetscCall(MatSetValuesCOO(A, v, ADD_VALUES));
113: PetscCall(PetscFree(v));
114: PetscFunctionReturn(PETSC_SUCCESS);
115: }
117: /*
118: Uses a multi-dimensional indexing technique that works for homogeneous meshes
119: such as single-topology with volume integral only.
120: */
121: static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A)
122: {
123: PetscScalar(*s)[3][3];
125: PetscFunctionBeginUser;
126: /* simulation of CPU based finite assembly process with COO */
127: PetscCall(PetscMalloc1(fe->Ne, &s));
128: for (PetscInt e = 0; e < fe->Ne; e++) {
129: for (PetscInt vi = 0; vi < 3; vi++) {
130: for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj;
131: }
132: }
133: PetscCall(MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES));
134: PetscCall(PetscFree(s));
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
138: int main(int argc, char **args)
139: {
140: Mat A, B;
141: FEStruct fe;
142: PetscMPIInt size;
143: PetscBool is_kokkos, is_cuda;
145: PetscFunctionBeginUser;
146: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
147: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
148: PetscCheck(size <= 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Demonstration is only for sequential runs");
150: PetscCall(CreateFEStruct(&fe));
152: PetscCall(CreateMatrix(&fe, &B));
153: PetscCall(MatDuplicate(B, MAT_DO_NOT_COPY_VALUES, &A));
154: PetscCall(MatDestroy(&B));
156: PetscCall(FillMatrixCPU(&fe, A));
157: PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
159: PetscCall(MatZeroEntries(A));
160: PetscCall(FillMatrixCPUCOO(&fe, A));
161: PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
163: PetscCall(MatZeroEntries(A));
164: PetscCall(FillMatrixCPUCOO3d(&fe, A));
165: PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
167: PetscCall(MatZeroEntries(A));
168: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos));
169: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda));
170: #if defined(PETSC_HAVE_KOKKOS)
171: if (is_kokkos) PetscCall(FillMatrixKokkosCOO(&fe, A));
172: #endif
173: #if defined(PETSC_HAVE_CUDA)
174: if (is_cuda) PetscCall(FillMatrixCUDACOO(&fe, A));
175: #endif
176: PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
178: PetscCall(MatDestroy(&A));
179: PetscCall(DestroyFEStruct(&fe));
180: PetscCall(PetscFinalize());
181: return 0;
182: }
184: /*TEST
185: build:
186: requires: cuda kokkos_kernels
187: depends: ex18cu.cu ex18kok.kokkos.cxx
189: testset:
190: filter: grep -v "type"
191: output_file: output/ex18_1.out
193: test:
194: suffix: kok
195: requires: kokkos_kernels
196: args: -mat_type aijkokkos
198: test:
199: suffix: cuda
200: requires: cuda
201: args: -mat_type aijcusparse
203: test:
204: suffix: hip
205: requires: hip
206: args: -mat_type aijhipsparse
208: TEST*/