Actual source code: ex51.c
petsc-3.6.1 2015-08-06
2: static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\
3: uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\
4: matrix assembly, the matrix is intentionally laid out across processors\n\
5: differently from the way it is assembled. Input arguments are:\n\
6: -m <size> -p <order> : mesh size and polynomial order\n\n";
8: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
9: based on src/ksp/ksp/examples/tutorials/ex3.c
10: */
12: #include <petscksp.h>
14: /* Declare user-defined routines */
15: static PetscReal src(PetscReal,PetscReal);
16: static PetscReal ubdy(PetscReal,PetscReal);
17: static PetscReal polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
18: static PetscReal derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
19: static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
20: static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
21: static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*);
22: static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*);
23: static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
24: static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
25: static void leggaulob(PetscReal, PetscReal, PetscReal [], PetscReal [], int);
26: static void qAndLEvaluation(int, PetscReal, PetscReal*, PetscReal*, PetscReal*);
30: int main(int argc,char **args)
31: {
32: PetscInt p = 2, m = 5;
33: PetscInt num1Dnodes, num2Dnodes;
34: PetscScalar *Ke1D,*Ke2D,*Me1D,*Me2D;
35: PetscScalar *r,*ue,val;
36: Vec u,ustar,b,q;
37: Mat A,Mass;
38: KSP ksp;
39: PetscInt M,N;
40: PetscMPIInt rank,size;
41: PetscReal x,y,h,norm;
42: PetscInt *idx,indx,count,*rows,i,j,k,start,end,its;
43: PetscReal *rowsx,*rowsy;
44: PetscReal *gllNode, *gllWgts;
47: PetscInitialize(&argc,&args,(char*)0,help);
48: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Options for p-FEM","");
49: PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,NULL);
50: PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,NULL);
51: PetscOptionsEnd();
52: N = (p*m+1)*(p*m+1); /* dimension of matrix */
53: M = m*m; /* number of elements */
54: h = 1.0/m; /* mesh width */
55: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
56: MPI_Comm_size(PETSC_COMM_WORLD,&size);
58: /* Create stiffness matrix */
59: MatCreate(PETSC_COMM_WORLD,&A);
60: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N);
61: MatSetFromOptions(A);
62: start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
63: end = start + M/size + ((M%size) > rank);
65: /* Create matrix */
66: MatCreate(PETSC_COMM_WORLD,&Mass);
67: MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N);
68: MatSetFromOptions(Mass);
69: start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
70: end = start + M/size + ((M%size) > rank);
72: /* Allocate element stiffness matrices */
73: num1Dnodes = (p+1);
74: num2Dnodes = num1Dnodes*num1Dnodes;
76: PetscMalloc1(num1Dnodes*num1Dnodes,&Me1D);
77: PetscMalloc1(num1Dnodes*num1Dnodes,&Ke1D);
78: PetscMalloc1(num2Dnodes*num2Dnodes,&Me2D);
79: PetscMalloc1(num2Dnodes*num2Dnodes,&Ke2D);
80: PetscMalloc1(num2Dnodes,&idx);
81: PetscMalloc1(num2Dnodes,&r);
82: PetscMalloc1(num2Dnodes,&ue);
84: /* Allocate quadrature and create stiffness matrices */
85: PetscMalloc1(p+1,&gllNode);
86: PetscMalloc1(p+1,&gllWgts);
87: leggaulob(0.0,1.0,gllNode,gllWgts,p); /* Get GLL nodes and weights */
88: Form1DElementMass(h,p,gllNode,gllWgts,Me1D);
89: Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D);
90: Form2DElementMass(p,Me1D,Me2D);
91: Form2DElementStiffness(p,Ke1D,Me1D,Ke2D);
93: /* Assemble matrix */
94: for (i=start; i<end; i++) {
95: indx = 0;
96: for (k=0;k<(p+1);++k) {
97: for (j=0;j<(p+1);++j) {
98: idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
99: }
100: }
101: MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES);
102: MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES);
103: }
104: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
105: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
106: MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY);
107: MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY);
109: PetscFree(Me1D);
110: PetscFree(Ke1D);
111: PetscFree(Me2D);
112: PetscFree(Ke2D);
114: /* Create right-hand-side and solution vectors */
115: VecCreate(PETSC_COMM_WORLD,&u);
116: VecSetSizes(u,PETSC_DECIDE,N);
117: VecSetFromOptions(u);
118: PetscObjectSetName((PetscObject)u,"Approx. Solution");
119: VecDuplicate(u,&b);
120: PetscObjectSetName((PetscObject)b,"Right hand side");
121: VecDuplicate(u,&q);
122: PetscObjectSetName((PetscObject)q,"Right hand side 2");
123: VecDuplicate(b,&ustar);
124: VecSet(u,0.0);
125: VecSet(b,0.0);
126: VecSet(q,0.0);
128: /* Assemble nodal right-hand-side and soln vector */
129: for (i=start; i<end; i++) {
130: x = h*(i % m);
131: y = h*(i/m);
132: indx = 0;
133: for (k=0;k<(p+1);++k) {
134: for (j=0;j<(p+1);++j) {
135: idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
136: }
137: }
138: FormNodalRhs(p,x,y,h,gllNode,r);
139: FormNodalSoln(p,x,y,h,gllNode,ue);
140: VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES);
141: VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES);
142: }
143: VecAssemblyBegin(q);
144: VecAssemblyEnd(q);
145: VecAssemblyBegin(ustar);
146: VecAssemblyEnd(ustar);
148: PetscFree(idx);
149: PetscFree(r);
150: PetscFree(ue);
152: /* Get FE right-hand side vector */
153: MatMult(Mass,q,b);
155: /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
156: PetscMalloc1(4*p*m,&rows);
157: PetscMalloc1(4*p*m,&rowsx);
158: PetscMalloc1(4*p*m,&rowsy);
159: for (i=0; i<p*m+1; i++) {
160: rows[i] = i; /* bottom */
161: rowsx[i] = (i/p)*h+gllNode[i%p]*h;
162: rowsy[i] = 0.0;
163: rows[3*p*m-1+i] = (p*m)*(p*m+1) + i; /* top */
164: rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h;
165: rowsy[3*p*m-1+i] = 1.0;
166: }
167: count = p*m+1; /* left side */
168: indx = 1;
169: for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
170: rows[count] = i;
171: rowsx[count] = 0.0;
172: rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
173: indx++;
174: }
175: count = 2*p*m; /* right side */
176: indx = 1;
177: for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
178: rows[count] = i;
179: rowsx[count] = 1.0;
180: rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
181: indx++;
182: }
183: for (i=0; i<4*p*m; i++) {
184: x = rowsx[i];
185: y = rowsy[i];
186: val = ubdy(x,y);
187: VecSetValues(b,1,&rows[i],&val,INSERT_VALUES);
188: VecSetValues(u,1,&rows[i],&val,INSERT_VALUES);
189: }
190: MatZeroRows(A,4*p*m,rows,1.0,0,0);
191: PetscFree(rows);
192: PetscFree(rowsx);
193: PetscFree(rowsy);
195: VecAssemblyBegin(u);
196: VecAssemblyEnd(u);
197: VecAssemblyBegin(b);
198: VecAssemblyEnd(b);
200: /* Solve linear system */
201: KSPCreate(PETSC_COMM_WORLD,&ksp);
202: KSPSetOperators(ksp,A,A);
203: KSPSetInitialGuessNonzero(ksp,PETSC_TRUE);
204: KSPSetFromOptions(ksp);
205: KSPSolve(ksp,b,u);
207: /* Check error */
208: VecAXPY(u,-1.0,ustar);
209: VecNorm(u,NORM_2,&norm);
210: KSPGetIterationNumber(ksp,&its);
211: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)(norm*h),its);
213: PetscFree(gllNode);
214: PetscFree(gllWgts);
216: KSPDestroy(&ksp);
217: VecDestroy(&u);
218: VecDestroy(&b);
219: VecDestroy(&q);
220: VecDestroy(&ustar);
221: MatDestroy(&A);
222: MatDestroy(&Mass);
224: PetscFinalize();
225: return 0;
226: }
228: /* --------------------------------------------------------------------- */
232: /* 1d element stiffness mass matrix */
233: static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Me1D)
234: {
235: PetscInt i,j,k;
236: PetscInt indx;
239: for (j=0; j<(P+1); ++j) {
240: for (i=0; i<(P+1); ++i) {
241: indx = j*(P+1)+i;
242: Me1D[indx] = 0.0;
243: for (k=0; k<(P+1);++k) {
244: Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]);
245: }
246: }
247: }
248: return(0);
249: }
251: /* --------------------------------------------------------------------- */
255: /* 1d element stiffness matrix for derivative */
256: static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Ke1D)
257: {
258: PetscInt i,j,k;
259: PetscInt indx;
262: for (j=0;j<(P+1);++j) {
263: for (i=0;i<(P+1);++i) {
264: indx = j*(P+1)+i;
265: Ke1D[indx] = 0.0;
266: for (k=0; k<(P+1);++k) {
267: Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]);
268: }
269: }
270: }
271: return(0);
272: }
274: /* --------------------------------------------------------------------- */
278: /* element mass matrix */
279: static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D)
280: {
281: PetscInt i1,j1,i2,j2;
282: PetscInt indx1,indx2,indx3;
285: for (j2=0;j2<(P+1);++j2) {
286: for (i2=0; i2<(P+1);++i2) {
287: for (j1=0;j1<(P+1);++j1) {
288: for (i1=0;i1<(P+1);++i1) {
289: indx1 = j1*(P+1)+i1;
290: indx2 = j2*(P+1)+i2;
291: indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
292: Me2D[indx3] = Me1D[indx1]*Me1D[indx2];
293: }
294: }
295: }
296: }
297: return(0);
298: }
300: /* --------------------------------------------------------------------- */
304: /* element stiffness for Laplacian */
305: static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,PetscScalar *Ke2D)
306: {
307: PetscInt i1,j1,i2,j2;
308: PetscInt indx1,indx2,indx3;
311: for (j2=0;j2<(P+1);++j2) {
312: for (i2=0; i2<(P+1);++i2) {
313: for (j1=0;j1<(P+1);++j1) {
314: for (i1=0;i1<(P+1);++i1) {
315: indx1 = j1*(P+1)+i1;
316: indx2 = j2*(P+1)+i2;
317: indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
318: Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2];
319: }
320: }
321: }
322: }
323: return(0);
324: }
326: /* --------------------------------------------------------------------- */
330: static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *r)
331: {
332: PetscInt i,j,indx;
335: indx=0;
336: for (j=0;j<(P+1);++j) {
337: for (i=0;i<(P+1);++i) {
338: r[indx] = src(x+H*nds[i],y+H*nds[j]);
339: indx++;
340: }
341: }
342: return(0);
343: }
345: /* --------------------------------------------------------------------- */
349: static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *u)
350: {
351: PetscInt i,j,indx;
354: indx=0;
355: for (j=0;j<(P+1);++j) {
356: for (i=0;i<(P+1);++i) {
357: u[indx] = ubdy(x+H*nds[i],y+H*nds[j]);
358: indx++;
359: }
360: }
361: return(0);
362: }
364: /* --------------------------------------------------------------------- */
368: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
369: {
370: PetscReal denominator = 1.;
371: PetscReal numerator = 1.;
372: PetscInt i =0;
374: for (i=0; i<(order+1); i++) {
375: if (i!=basis) {
376: numerator *= (xval-xLocVal[i]);
377: denominator *= (xLocVal[basis]-xLocVal[i]);
378: }
379: }
380: return (numerator/denominator);
381: }
383: /* --------------------------------------------------------------------- */
387: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
388: {
389: PetscReal denominator;
390: PetscReal numerator;
391: PetscReal numtmp;
392: PetscInt i=0,j=0;
394: denominator=1.;
395: for (i=0; i<(order+1); i++) {
396: if (i!=basis) denominator *= (xLocVal[basis]-xLocVal[i]);
397: }
398: numerator = 0.;
399: for (j=0;j<(order+1);++j) {
400: if (j != basis) {
401: numtmp = 1.;
402: for (i=0;i<(order+1);++i) {
403: if (i!=basis && j!=i) numtmp *= (xval-xLocVal[i]);
404: }
405: numerator += numtmp;
406: }
407: }
409: return (numerator/denominator);
410: }
412: /* --------------------------------------------------------------------- */
414: static PetscReal ubdy(PetscReal x,PetscReal y)
415: {
416: return x*x*y*y;
417: }
419: static PetscReal src(PetscReal x,PetscReal y)
420: {
421: return -2.*y*y - 2.*x*x;
422: }
423: /* --------------------------------------------------------------------- */
425: static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n)
426: /*******************************************************************************
427: Given the lower and upper limits of integration x1 and x2, and given n, this
428: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
429: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
430: *******************************************************************************/
431: {
432: PetscInt j,m;
433: PetscReal z1,z,xm,xl,q,qp,Ln,scale;
434: if (n==1) {
435: x[0] = x1; /* Scale the root to the desired interval, */
436: x[1] = x2; /* and put in its symmetric counterpart. */
437: w[0] = 1.; /* Compute the weight */
438: w[1] = 1.; /* and its symmetric counterpart. */
439: } else {
440: x[0] = x1; /* Scale the root to the desired interval, */
441: x[n] = x2; /* and put in its symmetric counterpart. */
442: w[0] = 2./(n*(n+1));; /* Compute the weight */
443: w[n] = 2./(n*(n+1)); /* and its symmetric counterpart. */
444: m = (n+1)/2; /* The roots are symmetric, so we only find half of them. */
445: xm = 0.5*(x2+x1);
446: xl = 0.5*(x2-x1);
447: for (j=1; j<=(m-1); j++) { /* Loop over the desired roots. */
448: z=-1.0*PetscCosReal((PETSC_PI*(j+0.25)/(n))-(3.0/(8.0*n*PETSC_PI))*(1.0/(j+0.25)));
449: /* Starting with the above approximation to the ith root, we enter */
450: /* the main loop of refinement by Newton's method. */
451: do {
452: qAndLEvaluation(n,z,&q,&qp,&Ln);
453: z1 = z;
454: z = z1-q/qp; /* Newton's method. */
455: } while (fabs(z-z1) > 3.0e-11);
456: qAndLEvaluation(n,z,&q,&qp,&Ln);
457: x[j] = xm+xl*z; /* Scale the root to the desired interval, */
458: x[n-j] = xm-xl*z; /* and put in its symmetric counterpart. */
459: w[j] = 2.0/(n*(n+1)*Ln*Ln); /* Compute the weight */
460: w[n-j] = w[j]; /* and its symmetric counterpart. */
461: }
462: }
463: if (n%2==0) {
464: qAndLEvaluation(n,0.0,&q,&qp,&Ln);
465: x[n/2]=(x2-x1)/2.0;
466: w[n/2]=2.0/(n*(n+1)*Ln*Ln);
467: }
468: /* scale the weights according to mapping from [-1,1] to [0,1] */
469: scale = (x2-x1)/2.0;
470: for (j=0; j<=n; ++j) w[j] = w[j]*scale;
471: }
474: /******************************************************************************/
475: static void qAndLEvaluation(PetscInt n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
476: /*******************************************************************************
477: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
478: addition to L_N(x) as these are needed for the GLL points. See the book titled
479: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
480: for Scientists and Engineers" by David A. Kopriva.
481: *******************************************************************************/
482: {
483: PetscInt k;
485: PetscReal Lnp;
486: PetscReal Lnp1, Lnp1p;
487: PetscReal Lnm1, Lnm1p;
488: PetscReal Lnm2, Lnm2p;
490: Lnm1 = 1.0;
491: *Ln = x;
492: Lnm1p = 0.0;
493: Lnp = 1.0;
495: for (k=2; k<=n; ++k) {
496: Lnm2 = Lnm1;
497: Lnm1 = *Ln;
498: Lnm2p = Lnm1p;
499: Lnm1p = Lnp;
500: *Ln = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2;
501: Lnp = Lnm2p + (2.0*k-1)*Lnm1;
502: }
503: k = n+1;
504: Lnp1 = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1;
505: Lnp1p = Lnm1p + (2.0*k-1)*(*Ln);
506: *q = Lnp1 - Lnm1;
507: *qp = Lnp1p - Lnm1p;
508: }