Actual source code: ex51.c
petsc-3.10.5 2019-03-28
2: static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\
3: uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\
4: matrix assembly, the matrix is intentionally laid out across processors\n\
5: differently from the way it is assembled. Input arguments are:\n\
6: -m <size> -p <order> : mesh size and polynomial order\n\n";
8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
9: based on src/ksp/ksp/examples/tutorials/ex3.c
10: */
12: #include <petscksp.h>
14: /* Declare user-defined routines */
15: static PetscReal src(PetscReal,PetscReal);
16: static PetscReal ubdy(PetscReal,PetscReal);
17: static PetscReal polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
18: static PetscReal derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
19: static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
20: static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
21: static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*);
22: static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*);
23: static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
24: static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
25: static void leggaulob(PetscReal, PetscReal, PetscReal [], PetscReal [], int);
26: static void qAndLEvaluation(int, PetscReal, PetscReal*, PetscReal*, PetscReal*);
28: int main(int argc,char **args)
29: {
30: PetscInt p = 2, m = 5;
31: PetscInt num1Dnodes, num2Dnodes;
32: PetscScalar *Ke1D,*Ke2D,*Me1D,*Me2D;
33: PetscScalar *r,*ue,val;
34: Vec u,ustar,b,q;
35: Mat A,Mass;
36: KSP ksp;
37: PetscInt M,N;
38: PetscMPIInt rank,size;
39: PetscReal x,y,h,norm;
40: PetscInt *idx,indx,count,*rows,i,j,k,start,end,its;
41: PetscReal *rowsx,*rowsy;
42: PetscReal *gllNode, *gllWgts;
45: PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
46: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Options for p-FEM","");
47: PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,NULL);
48: PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,NULL);
49: PetscOptionsEnd();
50: if (p <=0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Option -p value should be greater than zero");
51: N = (p*m+1)*(p*m+1); /* dimension of matrix */
52: M = m*m; /* number of elements */
53: h = 1.0/m; /* mesh width */
54: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
55: MPI_Comm_size(PETSC_COMM_WORLD,&size);
57: /* Create stiffness matrix */
58: MatCreate(PETSC_COMM_WORLD,&A);
59: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N);
60: MatSetFromOptions(A);
61: MatSetUp(A);
63: /* Create matrix */
64: MatCreate(PETSC_COMM_WORLD,&Mass);
65: MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N);
66: MatSetFromOptions(Mass);
67: MatSetUp(Mass);
68: start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
69: end = start + M/size + ((M%size) > rank);
71: /* Allocate element stiffness matrices */
72: num1Dnodes = (p+1);
73: num2Dnodes = num1Dnodes*num1Dnodes;
75: PetscMalloc1(num1Dnodes*num1Dnodes,&Me1D);
76: PetscMalloc1(num1Dnodes*num1Dnodes,&Ke1D);
77: PetscMalloc1(num2Dnodes*num2Dnodes,&Me2D);
78: PetscMalloc1(num2Dnodes*num2Dnodes,&Ke2D);
79: PetscMalloc1(num2Dnodes,&idx);
80: PetscMalloc1(num2Dnodes,&r);
81: PetscMalloc1(num2Dnodes,&ue);
83: /* Allocate quadrature and create stiffness matrices */
84: PetscMalloc1(p+1,&gllNode);
85: PetscMalloc1(p+1,&gllWgts);
86: leggaulob(0.0,1.0,gllNode,gllWgts,p); /* Get GLL nodes and weights */
87: Form1DElementMass(h,p,gllNode,gllWgts,Me1D);
88: Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D);
89: Form2DElementMass(p,Me1D,Me2D);
90: Form2DElementStiffness(p,Ke1D,Me1D,Ke2D);
92: /* Assemble matrix */
93: for (i=start; i<end; i++) {
94: indx = 0;
95: for (k=0;k<(p+1);++k) {
96: for (j=0;j<(p+1);++j) {
97: idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
98: }
99: }
100: MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES);
101: MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES);
102: }
103: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
104: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
105: MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY);
106: MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY);
108: PetscFree(Me1D);
109: PetscFree(Ke1D);
110: PetscFree(Me2D);
111: PetscFree(Ke2D);
113: /* Create right-hand-side and solution vectors */
114: VecCreate(PETSC_COMM_WORLD,&u);
115: VecSetSizes(u,PETSC_DECIDE,N);
116: VecSetFromOptions(u);
117: PetscObjectSetName((PetscObject)u,"Approx. Solution");
118: VecDuplicate(u,&b);
119: PetscObjectSetName((PetscObject)b,"Right hand side");
120: VecDuplicate(u,&q);
121: PetscObjectSetName((PetscObject)q,"Right hand side 2");
122: VecDuplicate(b,&ustar);
123: VecSet(u,0.0);
124: VecSet(b,0.0);
125: VecSet(q,0.0);
127: /* Assemble nodal right-hand-side and soln vector */
128: for (i=start; i<end; i++) {
129: x = h*(i % m);
130: y = h*(i/m);
131: indx = 0;
132: for (k=0;k<(p+1);++k) {
133: for (j=0;j<(p+1);++j) {
134: idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
135: }
136: }
137: FormNodalRhs(p,x,y,h,gllNode,r);
138: FormNodalSoln(p,x,y,h,gllNode,ue);
139: VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES);
140: VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES);
141: }
142: VecAssemblyBegin(q);
143: VecAssemblyEnd(q);
144: VecAssemblyBegin(ustar);
145: VecAssemblyEnd(ustar);
147: PetscFree(idx);
148: PetscFree(r);
149: PetscFree(ue);
151: /* Get FE right-hand side vector */
152: MatMult(Mass,q,b);
154: /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
155: PetscMalloc1(4*p*m,&rows);
156: PetscMalloc1(4*p*m,&rowsx);
157: PetscMalloc1(4*p*m,&rowsy);
158: for (i=0; i<p*m+1; i++) {
159: rows[i] = i; /* bottom */
160: rowsx[i] = (i/p)*h+gllNode[i%p]*h;
161: rowsy[i] = 0.0;
162: rows[3*p*m-1+i] = (p*m)*(p*m+1) + i; /* top */
163: rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h;
164: rowsy[3*p*m-1+i] = 1.0;
165: }
166: count = p*m+1; /* left side */
167: indx = 1;
168: for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
169: rows[count] = i;
170: rowsx[count] = 0.0;
171: rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
172: indx++;
173: }
174: count = 2*p*m; /* right side */
175: indx = 1;
176: for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
177: rows[count] = i;
178: rowsx[count] = 1.0;
179: rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
180: indx++;
181: }
182: for (i=0; i<4*p*m; i++) {
183: x = rowsx[i];
184: y = rowsy[i];
185: val = ubdy(x,y);
186: VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);
187: VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);
188: }
189: MatZeroRows(A,4*p*m,rows,1.0,0,0);
190: PetscFree(rows);
191: PetscFree(rowsx);
192: PetscFree(rowsy);
194: VecAssemblyBegin(u);
195: VecAssemblyEnd(u);
196: VecAssemblyBegin(b);
197: VecAssemblyEnd(b);
199: /* Solve linear system */
200: KSPCreate(PETSC_COMM_WORLD,&ksp);
201: KSPSetOperators(ksp,A,A);
202: KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
203: KSPSetFromOptions(ksp);
204: KSPSolve(ksp,b,u);
206: /* Check error */
207: VecAXPY(u,-1.0,ustar);
208: VecNorm(u,NORM_2,&norm);
209: KSPGetIterationNumber(ksp,&its);
210: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)(norm*h),its);
212: PetscFree(gllNode);
213: PetscFree(gllWgts);
215: KSPDestroy(&ksp);
216: VecDestroy(&u);
217: VecDestroy(&b);
218: VecDestroy(&q);
219: VecDestroy(&ustar);
220: MatDestroy(&A);
221: MatDestroy(&Mass);
223: PetscFinalize();
224: return ierr;
225: }
227: /* --------------------------------------------------------------------- */
229: /* 1d element stiffness mass matrix */
230: static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Me1D)
231: {
232: PetscInt i,j,k;
233: PetscInt indx;
236: for (j=0; j<(P+1); ++j) {
237: for (i=0; i<(P+1); ++i) {
238: indx = j*(P+1)+i;
239: Me1D[indx] = 0.0;
240: for (k=0; k<(P+1);++k) {
241: Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]);
242: }
243: }
244: }
245: return(0);
246: }
248: /* --------------------------------------------------------------------- */
250: /* 1d element stiffness matrix for derivative */
251: static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Ke1D)
252: {
253: PetscInt i,j,k;
254: PetscInt indx;
257: for (j=0;j<(P+1);++j) {
258: for (i=0;i<(P+1);++i) {
259: indx = j*(P+1)+i;
260: Ke1D[indx] = 0.0;
261: for (k=0; k<(P+1);++k) {
262: Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]);
263: }
264: }
265: }
266: return(0);
267: }
269: /* --------------------------------------------------------------------- */
271: /* element mass matrix */
272: static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D)
273: {
274: PetscInt i1,j1,i2,j2;
275: PetscInt indx1,indx2,indx3;
278: for (j2=0;j2<(P+1);++j2) {
279: for (i2=0; i2<(P+1);++i2) {
280: for (j1=0;j1<(P+1);++j1) {
281: for (i1=0;i1<(P+1);++i1) {
282: indx1 = j1*(P+1)+i1;
283: indx2 = j2*(P+1)+i2;
284: indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
285: Me2D[indx3] = Me1D[indx1]*Me1D[indx2];
286: }
287: }
288: }
289: }
290: return(0);
291: }
293: /* --------------------------------------------------------------------- */
295: /* element stiffness for Laplacian */
296: static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,PetscScalar *Ke2D)
297: {
298: PetscInt i1,j1,i2,j2;
299: PetscInt indx1,indx2,indx3;
302: for (j2=0;j2<(P+1);++j2) {
303: for (i2=0; i2<(P+1);++i2) {
304: for (j1=0;j1<(P+1);++j1) {
305: for (i1=0;i1<(P+1);++i1) {
306: indx1 = j1*(P+1)+i1;
307: indx2 = j2*(P+1)+i2;
308: indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
309: Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2];
310: }
311: }
312: }
313: }
314: return(0);
315: }
317: /* --------------------------------------------------------------------- */
319: static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *r)
320: {
321: PetscInt i,j,indx;
324: indx=0;
325: for (j=0;j<(P+1);++j) {
326: for (i=0;i<(P+1);++i) {
327: r[indx] = src(x+H*nds[i],y+H*nds[j]);
328: indx++;
329: }
330: }
331: return(0);
332: }
334: /* --------------------------------------------------------------------- */
336: static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *u)
337: {
338: PetscInt i,j,indx;
341: indx=0;
342: for (j=0;j<(P+1);++j) {
343: for (i=0;i<(P+1);++i) {
344: u[indx] = ubdy(x+H*nds[i],y+H*nds[j]);
345: indx++;
346: }
347: }
348: return(0);
349: }
351: /* --------------------------------------------------------------------- */
353: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
354: {
355: PetscReal denominator = 1.;
356: PetscReal numerator = 1.;
357: PetscInt i =0;
359: for (i=0; i<(order+1); i++) {
360: if (i!=basis) {
361: numerator *= (xval-xLocVal[i]);
362: denominator *= (xLocVal[basis]-xLocVal[i]);
363: }
364: }
365: return (numerator/denominator);
366: }
368: /* --------------------------------------------------------------------- */
370: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
371: {
372: PetscReal denominator;
373: PetscReal numerator;
374: PetscReal numtmp;
375: PetscInt i=0,j=0;
377: denominator=1.;
378: for (i=0; i<(order+1); i++) {
379: if (i!=basis) denominator *= (xLocVal[basis]-xLocVal[i]);
380: }
381: numerator = 0.;
382: for (j=0;j<(order+1);++j) {
383: if (j != basis) {
384: numtmp = 1.;
385: for (i=0;i<(order+1);++i) {
386: if (i!=basis && j!=i) numtmp *= (xval-xLocVal[i]);
387: }
388: numerator += numtmp;
389: }
390: }
392: return (numerator/denominator);
393: }
395: /* --------------------------------------------------------------------- */
397: static PetscReal ubdy(PetscReal x,PetscReal y)
398: {
399: return x*x*y*y;
400: }
402: static PetscReal src(PetscReal x,PetscReal y)
403: {
404: return -2.*y*y - 2.*x*x;
405: }
406: /* --------------------------------------------------------------------- */
408: static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n)
409: /*******************************************************************************
410: Given the lower and upper limits of integration x1 and x2, and given n, this
411: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
412: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
413: *******************************************************************************/
414: {
415: PetscInt j,m;
416: PetscReal z1,z,xm,xl,q,qp,Ln,scale;
417: if (n==1) {
418: x[0] = x1; /* Scale the root to the desired interval, */
419: x[1] = x2; /* and put in its symmetric counterpart. */
420: w[0] = 1.; /* Compute the weight */
421: w[1] = 1.; /* and its symmetric counterpart. */
422: } else {
423: x[0] = x1; /* Scale the root to the desired interval, */
424: x[n] = x2; /* and put in its symmetric counterpart. */
425: w[0] = 2./(n*(n+1));; /* Compute the weight */
426: w[n] = 2./(n*(n+1)); /* and its symmetric counterpart. */
427: m = (n+1)/2; /* The roots are symmetric, so we only find half of them. */
428: xm = 0.5*(x2+x1);
429: xl = 0.5*(x2-x1);
430: for (j=1; j<=(m-1); j++) { /* Loop over the desired roots. */
431: z=-1.0*PetscCosReal((PETSC_PI*(j+0.25)/(n))-(3.0/(8.0*n*PETSC_PI))*(1.0/(j+0.25)));
432: /* Starting with the above approximation to the ith root, we enter */
433: /* the main loop of refinement by Newton's method. */
434: do {
435: qAndLEvaluation(n,z,&q,&qp,&Ln);
436: z1 = z;
437: z = z1-q/qp; /* Newton's method. */
438: } while (PetscAbsReal(z-z1) > 3.0e-11);
439: qAndLEvaluation(n,z,&q,&qp,&Ln);
440: x[j] = xm+xl*z; /* Scale the root to the desired interval, */
441: x[n-j] = xm-xl*z; /* and put in its symmetric counterpart. */
442: w[j] = 2.0/(n*(n+1)*Ln*Ln); /* Compute the weight */
443: w[n-j] = w[j]; /* and its symmetric counterpart. */
444: }
445: }
446: if (n%2==0) {
447: qAndLEvaluation(n,0.0,&q,&qp,&Ln);
448: x[n/2]=(x2-x1)/2.0;
449: w[n/2]=2.0/(n*(n+1)*Ln*Ln);
450: }
451: /* scale the weights according to mapping from [-1,1] to [0,1] */
452: scale = (x2-x1)/2.0;
453: for (j=0; j<=n; ++j) w[j] = w[j]*scale;
454: }
457: /******************************************************************************/
458: static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
459: /*******************************************************************************
460: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
461: addition to L_N(x) as these are needed for the GLL points. See the book titled
462: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
463: for Scientists and Engineers" by David A. Kopriva.
464: *******************************************************************************/
465: {
466: PetscInt k;
468: PetscReal Lnp;
469: PetscReal Lnp1, Lnp1p;
470: PetscReal Lnm1, Lnm1p;
471: PetscReal Lnm2, Lnm2p;
473: Lnm1 = 1.0;
474: *Ln = x;
475: Lnm1p = 0.0;
476: Lnp = 1.0;
478: for (k=2; k<=n; ++k) {
479: Lnm2 = Lnm1;
480: Lnm1 = *Ln;
481: Lnm2p = Lnm1p;
482: Lnm1p = Lnp;
483: *Ln = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2;
484: Lnp = Lnm2p + (2.0*k-1)*Lnm1;
485: }
486: k = n+1;
487: Lnp1 = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1;
488: Lnp1p = Lnm1p + (2.0*k-1)*(*Ln);
489: *q = Lnp1 - Lnm1;
490: *qp = Lnp1p - Lnm1p;
491: }
495: /*TEST
497: test:
498: nsize: 2
499: args: -ksp_monitor_short
501: TEST*/