Actual source code: ex18.c
1: static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";
3: /*
4: The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work
5: well on both CPUs and GPUs. It is an alternative to using MatSetValues()
7: This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
8: it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
9: linear algebra solvers but are managing their own finite element process.
11: Please do NOT use this example as a starting point to writing your own finite element code from scratch!
13: Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices.
14: */
16: #include <petscmat.h>
17: #include "ex18.h"
19: static PetscErrorCode CreateFEStruct(FEStruct *fe)
20: {
22: fe->Nv = 5;
23: fe->Ne = 3;
24: PetscMalloc1(3*fe->Ne,&fe->vertices);
25: /* the three vertices associated with each element in order of element */
26: fe->vertices[0 + 0] = 0;
27: fe->vertices[0 + 1] = 1;
28: fe->vertices[0 + 2] = 2;
29: fe->vertices[3 + 0] = 2;
30: fe->vertices[3 + 1] = 1;
31: fe->vertices[3 + 2] = 3;
32: fe->vertices[6 + 0] = 2;
33: fe->vertices[6 + 1] = 4;
34: fe->vertices[6 + 2] = 3;
35: fe->n = 5;
36: return 0;
37: }
39: static PetscErrorCode DestroyFEStruct(FEStruct *fe)
40: {
42: PetscFree(fe->vertices);
43: PetscFree(fe->coo);
44: return 0;
45: }
47: static PetscErrorCode CreateMatrix(FEStruct *fe,Mat *A)
48: {
49: PetscInt *oor,*ooc,cnt = 0;
52: MatCreate(PETSC_COMM_WORLD,A);
53: MatSetSizes(*A,fe->n,fe->n,PETSC_DECIDE,PETSC_DECIDE);
54: MatSetFromOptions(*A);
56: /* determine for each entry in each element stiffness matrix the global row and colum */
57: /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
58: PetscMalloc2(3*3*fe->Ne,&oor,3*3*fe->Ne,&ooc);
59: for (PetscInt e=0; e<fe->Ne; e++) {
60: for (PetscInt vi=0; vi<3; vi++) {
61: for (PetscInt vj=0; vj<3; vj++) {
62: oor[cnt] = fe->vertices[3*e+vi];
63: ooc[cnt++] = fe->vertices[3*e+vj];
64: }
65: }
66: }
67: MatSetPreallocationCOO(*A,3*3*fe->Ne,oor,ooc);
68: PetscFree2(oor,ooc);
70: /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
71: /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */
72: PetscMalloc1(fe->Ne,&fe->coo);
73: fe->coo[0] = 0;
74: for (PetscInt e=1; e<fe->Ne; e++) {
75: fe->coo[e] = fe->coo[e-1] + 3*3;
76: }
77: return 0;
78: }
80: static PetscErrorCode FillMatrixCPU(FEStruct *fe,Mat A)
81: {
82: PetscScalar s[9];
85: /* simulation of traditional PETSc CPU based finite assembly process */
86: for (PetscInt e=0; e<fe->Ne; e++) {
87: for (PetscInt vi=0; vi<3; vi++) {
88: for (PetscInt vj=0; vj<3; vj++) {
89: s[3*vi+vj] = vi+2*vj;
90: }
91: }
92: MatSetValues(A,3,fe->vertices + 3*e,3, fe->vertices + 3*e,s,ADD_VALUES);
93: }
94: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
95: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
96: return 0;
97: }
99: /*
100: Shows an example of tracking element offsets explicitly, which allows for
101: mixed-topology meshes and combining both volume and surface parts into the weak form.
102: */
103: static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe,Mat A)
104: {
105: PetscScalar *v,*s;
108: /* simulation of CPU based finite assembly process with COO */
109: PetscMalloc1(3*3*fe->Ne,&v);
110: for (PetscInt e=0; e<fe->Ne; e++) {
111: s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
112: for (PetscInt vi=0; vi<3; vi++) {
113: for (PetscInt vj=0; vj<3; vj++) {
114: s[3*vi+vj] = vi+2*vj;
115: }
116: }
117: }
118: MatSetValuesCOO(A,v,ADD_VALUES);
119: PetscFree(v);
120: return 0;
121: }
123: /*
124: Uses a multi-dimensional indexing technique that works for homogeneous meshes
125: such as single-topology with volume integral only.
126: */
127: static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe,Mat A)
128: {
129: PetscScalar (*s)[3][3];
132: /* simulation of CPU based finite assembly process with COO */
133: PetscMalloc1(fe->Ne,&s);
134: for (PetscInt e=0; e<fe->Ne; e++) {
135: for (PetscInt vi=0; vi<3; vi++) {
136: for (PetscInt vj=0; vj<3; vj++) {
137: s[e][vi][vj] = vi+2*vj;
138: }
139: }
140: }
141: MatSetValuesCOO(A,(PetscScalar*)s,INSERT_VALUES);
142: PetscFree(s);
143: return 0;
144: }
146: int main(int argc, char **args)
147: {
148: Mat A;
149: FEStruct fe;
150: PetscMPIInt size;
151: PetscBool is_kokkos,is_cuda;
153: PetscInitialize(&argc,&args,(char*)0,help);
154: MPI_Comm_size(PETSC_COMM_WORLD,&size);
157: CreateFEStruct(&fe);
158: CreateMatrix(&fe,&A);
160: FillMatrixCPU(&fe,A);
161: MatView(A,PETSC_VIEWER_STDOUT_WORLD);
163: MatZeroEntries(A);
164: FillMatrixCPUCOO(&fe,A);
165: MatView(A,PETSC_VIEWER_STDOUT_WORLD);
167: MatZeroEntries(A);
168: FillMatrixCPUCOO3d(&fe,A);
169: MatView(A,PETSC_VIEWER_STDOUT_WORLD);
171: MatZeroEntries(A);
172: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJKOKKOS,&is_kokkos);
173: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJCUSPARSE,&is_cuda);
174: #if defined(PETSC_HAVE_KOKKOS)
175: if (is_kokkos) FillMatrixKokkosCOO(&fe,A);
176: #endif
177: #if defined(PETSC_HAVE_CUDA)
178: if (is_cuda) FillMatrixCUDACOO(&fe,A);
179: #endif
180: MatView(A,PETSC_VIEWER_STDOUT_WORLD);
182: MatDestroy(&A);
183: DestroyFEStruct(&fe);
184: PetscFinalize();
185: return 0;
186: }
188: /*TEST
189: build:
190: requires: cuda kokkos_kernels
191: depends: ex18cu.cu ex18kok.kokkos.cxx
193: testset:
194: filter: grep -v "type"
195: output_file: output/ex18_1.out
197: test:
198: suffix: kok
199: requires: kokkos_kernels
200: args: -mat_type aijkokkos
202: test:
203: suffix: cuda
204: requires: cuda
205: args: -mat_type aijcusparse
207: TEST*/