Actual source code: pipelcg.c
petsc-3.13.6 2020-09-29
1: #include <petsc/private/kspimpl.h>
2: #include <petsc/private/vecimpl.h>
4: #define offset(j) PetscMax(((j) - (2*l)), 0)
5: #define shift(i,j) ((i) - offset((j)))
6: #define G(i,j) (plcg->G[((j)*(2*l+1))+(shift((i),(j))) ])
7: #define G_noshift(i,j) (plcg->G[((j)*(2*l+1))+(i)])
8: #define alpha(i) (plcg->alpha[(i)])
9: #define gamma(i) (plcg->gamma[(i)])
10: #define delta(i) (plcg->delta[(i)])
11: #define sigma(i) (plcg->sigma[(i)])
12: #define req(i) (plcg->req[(i)])
14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
15: struct KSP_CG_PIPE_L_s {
16: PetscInt l; /* pipeline depth */
17: Vec *Z; /* Z vectors (shifted base) */
18: Vec *U; /* U vectors (unpreconditioned shifted base) */
19: Vec *V; /* V vectors (original base) */
20: Vec *Q; /* Q vectors (auxiliary bases) */
21: Vec p; /* work vector */
22: PetscScalar *G; /* such that Z = VG (band matrix)*/
23: PetscScalar *gamma,*delta,*alpha;
24: PetscReal lmin,lmax; /* min and max eigen values estimates to compute base shifts */
25: PetscReal *sigma; /* base shifts */
26: MPI_Request *req; /* request array for asynchronous global collective */
27: PetscBool show_rstrt; /* flag to show restart information in output (default: not shown) */
28: };
30: /**
31: * KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.
32: *
33: * This is called once, usually automatically by KSPSolve() or KSPSetUp()
34: * but can be called directly by KSPSetUp()
35: */
36: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
37: {
39: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
40: PetscInt l=plcg->l,max_it=ksp->max_it;
41: MPI_Comm comm;
44: comm = PetscObjectComm((PetscObject)ksp);
45: if (max_it < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: max_it argument must be positive.",((PetscObject)ksp)->type_name);
46: if (l < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be positive.",((PetscObject)ksp)->type_name);
47: if (l > max_it) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be less than max_it.",((PetscObject)ksp)->type_name);
49: KSPSetWorkVecs(ksp,1); /* get work vectors needed by PIPELCG */
50: plcg->p = ksp->work[0];
52: VecDuplicateVecs(plcg->p,PetscMax(3,l+1),&plcg->Z);
53: VecDuplicateVecs(plcg->p,3,&plcg->U);
54: VecDuplicateVecs(plcg->p,3,&plcg->V);
55: VecDuplicateVecs(plcg->p,3*(l-1)+1,&plcg->Q);
56: PetscCalloc1(2,&plcg->alpha);
57: PetscCalloc1(l,&plcg->sigma);
58:
59: return(0);
60: }
62: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
63: {
64: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
65: PetscInt ierr=0,l=plcg->l;
68: PetscFree(plcg->sigma);
69: PetscFree(plcg->alpha);
70: VecDestroyVecs(PetscMax(3,l+1),&plcg->Z);
71: VecDestroyVecs(3,&plcg->U);
72: VecDestroyVecs(3,&plcg->V);
73: VecDestroyVecs(3*(l-1)+1,&plcg->Q);
74: return(0);
75: }
77: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
78: {
79: PetscInt ierr=0;
82: KSPReset_PIPELCG(ksp);
83: KSPDestroyDefault(ksp);
84: return(0);
85: }
87: static PetscErrorCode KSPSetFromOptions_PIPELCG(PetscOptionItems *PetscOptionsObject,KSP ksp)
88: {
89: PetscInt ierr=0;
90: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
91: PetscBool flag=PETSC_FALSE;
94: PetscOptionsHead(PetscOptionsObject,"KSP PIPELCG options");
95: PetscOptionsInt("-ksp_pipelcg_pipel","Pipeline length","",plcg->l,&plcg->l,&flag);
96: if (!flag) plcg->l = 1;
97: PetscOptionsReal("-ksp_pipelcg_lmin","Estimate for smallest eigenvalue","",plcg->lmin,&plcg->lmin,&flag);
98: if (!flag) plcg->lmin = 0.0;
99: PetscOptionsReal("-ksp_pipelcg_lmax","Estimate for largest eigenvalue","",plcg->lmax,&plcg->lmax,&flag);
100: if (!flag) plcg->lmax = 0.0;
101: PetscOptionsBool("-ksp_pipelcg_monitor","Output information on restarts when they occur? (default: 0)","",plcg->show_rstrt,&plcg->show_rstrt,&flag);
102: if (!flag) plcg->show_rstrt = PETSC_FALSE;
103: PetscOptionsTail();
104: return(0);
105: }
107: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
108: {
109: PetscErrorCode ierr=0;
112: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
113: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
114: #else
115: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
116: *request = MPI_REQUEST_NULL;
117: #endif
118: return(0);
119: }
121: static PetscErrorCode KSPView_PIPELCG(KSP ksp,PetscViewer viewer)
122: {
123: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
124: PetscErrorCode ierr=0;
125: PetscBool iascii=PETSC_FALSE,isstring=PETSC_FALSE;
128: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
129: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
130: if (iascii) {
131: PetscViewerASCIIPrintf(viewer," Pipeline depth: %D\n", plcg->l);
132: PetscViewerASCIIPrintf(viewer," Minimal eigenvalue estimate %g\n",plcg->lmin);
133: PetscViewerASCIIPrintf(viewer," Maximal eigenvalue estimate %g\n",plcg->lmax);
134: } else if (isstring) {
135: PetscViewerStringSPrintf(viewer," Pipeline depth: %D\n", plcg->l);
136: PetscViewerStringSPrintf(viewer," Minimal eigenvalue estimate %g\n",plcg->lmin);
137: PetscViewerStringSPrintf(viewer," Maximal eigenvalue estimate %g\n",plcg->lmax);
138: }
139: return(0);
140: }
142: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
143: {
144: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
145: Mat A=NULL,Pmat=NULL;
146: PetscInt it=0,max_it=ksp->max_it,ierr=0,l=plcg->l,i=0,j=0,k=0;
147: PetscInt start=0,middle=0,end=0;
148: Vec *Z=plcg->Z,*U=plcg->U,*V=plcg->V,*Q=plcg->Q;
149: Vec x=NULL,p=NULL,temp=NULL;
150: PetscScalar sum_dummy=0.0,eta=0.0,zeta=0.0,lambda=0.0;
151: PetscReal dp=0.0,tmp=0.0,beta=0.0,invbeta2=0.0;
152: MPI_Comm comm;
155: x = ksp->vec_sol;
156: p = plcg->p;
158: comm = PetscObjectComm((PetscObject)ksp);
159: PCGetOperators(ksp->pc,&A,&Pmat);
161: for (it = 0; it < max_it+l; ++it) {
162: /* ----------------------------------- */
163: /* Multiplication z_{it+1} = Az_{it} */
164: /* ----------------------------------- */
165: /* Shift the U vector pointers */
166: temp = U[2];
167: for (i = 2; i>0; i--) {
168: U[i] = U[i-1];
169: }
170: U[0] = temp;
171: if (it < l) {
172: /* SpMV and Sigma-shift and Prec */
173: MatMult(A,Z[l-it],U[0]);
174: VecAXPY(U[0],-sigma(it),U[1]);
175: KSP_PCApply(ksp,U[0],Z[l-it-1]);
176: if (it < l-1) {
177: VecCopy(Z[l-it-1],Q[3*it]);
178: }
179: } else {
180: /* Shift the Z vector pointers */
181: temp = Z[PetscMax(l,2)];
182: for (i = PetscMax(l,2); i > 0; --i) {
183: Z[i] = Z[i-1];
184: }
185: Z[0] = temp;
186: /* SpMV and Prec */
187: MatMult(A,Z[1],U[0]);
188: KSP_PCApply(ksp,U[0],Z[0]);
189: }
191: /* ----------------------------------- */
192: /* Adjust the G matrix */
193: /* ----------------------------------- */
194: if (it >= l) {
195: if (it == l) {
196: /* MPI_Wait for G(0,0),scale V0 and Z and U and Q vectors with 1/beta */
197: MPI_Wait(&req(0),MPI_STATUS_IGNORE);
198: beta = PetscSqrtReal(PetscRealPart(G(0,0)));
199: G(0,0) = 1.0;
200: VecAXPY(V[0],1.0/beta,p); /* this assumes V[0] to be zero initially */
201: for (j = 0; j <= PetscMax(l,2); ++j) {
202: VecScale(Z[j],1.0/beta);
203: }
204: for (j = 0; j <= 2; ++j) {
205: VecScale(U[j],1.0/beta);
206: }
207: for (j = 0; j < l-1; ++j) {
208: VecScale(Q[3*j],1.0/beta);
209: }
210: }
212: /* MPI_Wait until the dot products,started l iterations ago,are completed */
213: MPI_Wait(&req(it-l+1),MPI_STATUS_IGNORE);
214: if (it >= 2*l) {
215: for (j = PetscMax(0,it-3*l+1); j <= it-2*l; j++) {
216: G(j,it-l+1) = G(it-2*l+1,j+l); /* exploit symmetry in G matrix */
217: }
218: }
220: if (it <= 2*l-1) {
221: invbeta2 = 1.0 / (beta * beta);
222: /* Scale columns 1 up to l of G with 1/beta^2 */
223: for (j = PetscMax(it-3*l+1,0); j <= it-l+1; ++j) {
224: G(j,it-l+1) *= invbeta2;
225: }
226: }
228: for (j = PetscMax(it-2*l+2,0); j <= it-l; ++j) {
229: sum_dummy = 0.0;
230: for (k = PetscMax(it-3*l+1,0); k <= j-1; ++k) {
231: sum_dummy = sum_dummy + G(k,j) * G(k,it-l+1);
232: }
233: G(j,it-l+1) = (G(j,it-l+1) - sum_dummy) / G(j,j);
234: }
236: sum_dummy = 0.0;
237: for (k = PetscMax(it-3*l+1,0); k <= it-l; ++k) {
238: sum_dummy = sum_dummy + G(k,it-l+1) * G(k,it-l+1);
239: }
241: tmp = PetscRealPart(G(it-l+1,it-l+1) - sum_dummy);
242: /* Breakdown check */
243: if (tmp < 0) {
244: if (plcg->show_rstrt) {
245: PetscPrintf(comm,"Sqrt breakdown in iteration %D: sqrt argument is %e. Iteration was restarted.\n",ksp->its+1,(double)tmp);
246: }
247: /* End hanging dot-products in the pipeline before exiting for-loop */
248: start = it-l+2;
249: end = PetscMin(it+1,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
250: for (i = start; i < end; ++i) {
251: MPI_Wait(&req(i),MPI_STATUS_IGNORE);
252: }
253: break;
254: }
255: G(it-l+1,it-l+1) = PetscSqrtReal(tmp);
257: if (it < 2*l) {
258: if (it == l) {
259: gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)) / G(it-l,it-l);
260: } else {
261: gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)
262: - delta(it-l-1) * G(it-l-1,it-l)) / G(it-l,it-l);
263: }
264: delta(it-l) = G(it-l+1,it-l+1) / G(it-l,it-l);
265: } else {
266: gamma(it-l) = (G(it-l,it-l) * gamma(it-2*l)
267: + G(it-l,it-l+1) * delta(it-2*l)
268: - G(it-l-1,it-l) * delta(it-l-1)) / G(it-l,it-l);
269: delta(it-l) = (G(it-l+1,it-l+1) * delta(it-2*l)) / G(it-l,it-l);
270: }
272: /* -------------------------------------------------- */
273: /* Recursively compute the next V, Q, Z and U vectors */
274: /* -------------------------------------------------- */
275: /* Shift the V vector pointers */
276: temp = V[2];
277: for (i = 2; i>0; i--) {
278: V[i] = V[i-1];
279: }
280: V[0] = temp;
282: /* Recurrence V vectors */
283: if (l == 1) {
284: VecCopy(Z[1],V[0]);
285: } else {
286: VecCopy(Q[0],V[0]);
287: }
288: if (it == l) {
289: VecAXPY(V[0],sigma(0)-gamma(it-l),V[1]);
290: } else {
291: alpha(0) = sigma(0)-gamma(it-l);
292: alpha(1) = -delta(it-l-1);
293: VecMAXPY(V[0],2,&alpha(0),&V[1]);
294: }
295: VecScale(V[0],1.0/delta(it-l));
297: /* Recurrence Q vectors */
298: for (j = 0; j < l-1; ++j) {
299: /* Shift the Q vector pointers */
300: temp = Q[3*j+2];
301: for (i = 2; i>0; i--) {
302: Q[3*j+i] = Q[3*j+i-1];
303: }
304: Q[3*j] = temp;
306: if (j < l-2){
307: VecCopy(Q[3*(j+1)],Q[3*j]);
308: } else {
309: VecCopy(Z[1],Q[3*j]);
310: }
311: if (it == l){
312: VecAXPY(Q[3*j],sigma(j+1)-gamma(it-l),Q[3*j+1]);
313: } else {
314: alpha(0) = sigma(j+1)-gamma(it-l);
315: alpha(1) = -delta(it-l-1);
316: VecMAXPY(Q[3*j],2,&alpha(0),&Q[3*j+1]);
317: }
318: VecScale(Q[3*j],1.0/delta(it-l));
319: }
321: /* Recurrence Z and U vectors */
322: if (it == l) {
323: VecAXPY(Z[0],-gamma(it-l),Z[1]);
324: VecAXPY(U[0],-gamma(it-l),U[1]);
325: } else {
326: alpha(0) = -gamma(it-l);
327: alpha(1) = -delta(it-l-1);
328: VecMAXPY(Z[0],2,&alpha(0),&Z[1]);
329: VecMAXPY(U[0],2,&alpha(0),&U[1]);
330: }
331: VecScale(Z[0],1.0/delta(it-l));
332: VecScale(U[0],1.0/delta(it-l));
333: }
335: /* ---------------------------------------- */
336: /* Compute and communicate the dot products */
337: /* ---------------------------------------- */
338: if (it < l) {
339: for (j = 0; j < it+2; ++j) {
340: (*U[0]->ops->dot_local)(U[0],Z[l-j],&G(j,it+1)); /* dot-products (U[0],Z[j]) */
341: }
342: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,it+1),it+2,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
343: } else if ((it >= l) && (it < max_it)) {
344: middle = it-l+2;
345: end = it+2;
346: (*U[0]->ops->dot_local)(U[0],V[0],&G(it-l+1,it+1)); /* dot-product (U[0],V[0]) */
347: for (j = middle; j < end; ++j) {
348: (*U[0]->ops->dot_local)(U[0],plcg->Z[it+1-j],&G(j,it+1)); /* dot-products (U[0],Z[j]) */
349: }
350: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(it-l+1,it+1),l+1,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
351: }
353: /* ----------------------------------------- */
354: /* Compute solution vector and residual norm */
355: /* ----------------------------------------- */
356: if (it >= l) {
357: if (it == l) {
358: if (ksp->its != 0) {
359: ++ ksp->its;
360: }
361: eta = gamma(0);
362: zeta = beta;
363: VecCopy(V[1],p);
364: VecScale(p,1.0/eta);
365: VecAXPY(x,zeta,p);
366: dp = beta;
367: } else if (it > l) {
368: k = it-l;
369: ++ ksp->its;
370: lambda = delta(k-1)/eta;
371: eta = gamma(k) - lambda * delta(k-1);
372: zeta = -lambda * zeta;
373: VecScale(p,-delta(k-1)/eta);
374: VecAXPY(p,1.0/eta,V[1]);
375: VecAXPY(x,zeta,p);
376: dp = PetscAbsScalar(zeta);
377: }
378: ksp->rnorm = dp;
379: KSPLogResidualHistory(ksp,dp);
380: KSPMonitor(ksp,ksp->its,dp);
381: (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);
383: if (ksp->its >= max_it-1) {
384: ksp->reason = KSP_DIVERGED_ITS;
385: }
386: if (ksp->reason) {
387: /* End hanging dot-products in the pipeline before exiting for-loop */
388: start = it-l+2;
389: end = PetscMin(it+2,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
390: for (i = start; i < end; ++i) {
391: MPI_Wait(&req(i),MPI_STATUS_IGNORE);
392: }
393: break;
394: }
395: }
396: } /* End inner for loop */
397: return(0);
398: }
400: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
401: {
402: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
403: PetscInt ierr=0,i=0,j=0,l=plcg->l,max_it=ksp->max_it;
406: for (i = 0; i < PetscMax(3,l+1); ++i) {
407: VecSet(plcg->Z[i],0.0);
408: }
409: for (i = 1; i < 3; ++i) {
410: VecSet(plcg->U[i],0.0);
411: }
412: for (i = 0; i < 3; ++i) {
413: VecSet(plcg->V[i],0.0);
414: }
415: for (i = 0; i < 3*(l-1)+1; ++i) {
416: VecSet(plcg->Q[i],0.0);
417: }
418: for (j = 0; j < (max_it+1); ++j) {
419: gamma(j) = 0.0;
420: delta(j) = 0.0;
421: for (i = 0; i < (2*l+1); ++i) {
422: G_noshift(i,j) = 0.0;
423: }
424: }
425: return(0);
426: }
428: /**
429: * KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
430: *
431: * Input Parameter:
432: * ksp - the Krylov space object that was set to use conjugate gradient,by,for
433: * example,KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
434: */
435: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
436: {
437: PetscErrorCode ierr=0;
438: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
439: Mat A=NULL,Pmat=NULL;
440: Vec b=NULL,x=NULL,p=NULL;
441: PetscInt max_it=ksp->max_it,l=plcg->l;
442: PetscInt i=0,outer_it=0,curr_guess_zero=0;
443: PetscReal lmin=plcg->lmin,lmax=plcg->lmax;
444: PetscBool diagonalscale=PETSC_FALSE;
445: MPI_Comm comm;
448: comm = PetscObjectComm((PetscObject)ksp);
449: PCGetDiagonalScale(ksp->pc,&diagonalscale);
450: if (diagonalscale) {
451: SETERRQ1(comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
452: }
454: x = ksp->vec_sol;
455: b = ksp->vec_rhs;
456: p = plcg->p;
458: PetscCalloc1((max_it+1)*(2*l+1),&plcg->G);
459: PetscCalloc1(max_it+1,&plcg->gamma);
460: PetscCalloc1(max_it+1,&plcg->delta);
461: PetscCalloc1(max_it+1,&plcg->req);
463: PCGetOperators(ksp->pc,&A,&Pmat);
465: for (i = 0; i < l; ++i) {
466: sigma(i) = (0.5*(lmin+lmax) + (0.5*(lmax-lmin) * PetscCosReal(PETSC_PI*(2.0*i+1.0)/(2.0*l))));
467: }
469: ksp->its = 0;
470: outer_it = 0;
471: curr_guess_zero = !! ksp->guess_zero;
473: while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
474: /* RESTART LOOP */
475: if (!curr_guess_zero) {
476: KSP_MatMult(ksp,A,x,plcg->U[0]); /* u <- b - Ax */
477: VecAYPX(plcg->U[0],-1.0,b);
478: } else {
479: VecCopy(b,plcg->U[0]); /* u <- b (x is 0) */
480: }
481: KSP_PCApply(ksp,plcg->U[0],p); /* p <- Bu */
483: if (outer_it > 0) {
484: /* Re-initialize Z,U,V,Q,gamma,delta,G after restart occurred */
485: KSPSolve_ReInitData_PIPELCG(ksp);
486: }
488: (*plcg->U[0]->ops->dot_local)(plcg->U[0],p,&G(0,0));
489: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,0),1,MPIU_SCALAR,MPIU_SUM,comm,&req(0));
490: VecCopy(p,plcg->Z[l]);
492: KSPSolve_InnerLoop_PIPELCG(ksp);
494: if (ksp->reason) break; /* convergence or divergence */
495: ++ outer_it;
496: curr_guess_zero = 0;
497: }
499: if (ksp->its >= max_it-1) {
500: ksp->reason = KSP_DIVERGED_ITS;
501: }
502: PetscFree(plcg->G);
503: PetscFree(plcg->gamma);
504: PetscFree(plcg->delta);
505: PetscFree(plcg->req);
506: return(0);
507: }
509: /*MC
510: KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method. This method has only a single non-blocking global
511: reduction per iteration, compared to 2 blocking reductions for standard CG. The reduction is overlapped by the
512: matrix-vector product and preconditioner Section 1.5 Writing Application Codes with PETSc of the next l iterations. The pipeline length l is a parameter
513: of the method.
515: Options Database Keys:
516: + -ksp_pipelcg_pipel - pipelined length
517: . -ksp_pipelcg_lmin - approximation to the smallest eigenvalue of the preconditioned operator (default: 0.0)
518: . -ksp_pipelcg_lmax - approximation to the largest eigenvalue of the preconditioned operator (default: 0.0)
519: . -ksp_pipelcg_monitor - output where/why the method restarts when a sqrt breakdown occurs
520: - see KSPSolve() for additional options
522: Level: advanced
524: Notes:
525: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
526: performance of pipelined methods. See the FAQ on the PETSc website for details.
528: Contributed by:
529: Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
530: funded by Flemish Research Foundation (FWO) grant number 12H4617N.
532: Example usage:
533: [*] KSP ex2, no preconditioner, pipel = 2, lmin = 0.0, lmax = 8.0 :
534: $mpirun -ppn 14 ./ex2 -m 1000 -n 1000 -ksp_type pipelcg -pc_type none -ksp_norm_type UNPRECONDITIONED
535: -ksp_rtol 1e-10 -ksp_max_it 1000 -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 8.0 -log_summary
536: [*] SNES ex48, bjacobi preconditioner, pipel = 3, lmin = 0.0, lmax = 2.0, show restart information :
537: $mpirun -ppn 14 ./ex48 -M 150 -P 100 -ksp_type pipelcg -pc_type bjacobi -ksp_rtol 1e-10 -ksp_pipelcg_pipel 3
538: -ksp_pipelcg_lmin 0.0 -ksp_pipelcg_lmax 2.0 -ksp_pipelcg_monitor -log_summary
540: References:
541: [*] J. Cornelis, S. Cools and W. Vanroose,
542: "The Communication-Hiding Conjugate Gradient Method with Deep Pipelines"
543: Submitted to SIAM Journal on Scientific Computing (SISC), 2018.
544: [*] S. Cools, J. Cornelis and W. Vanroose,
545: "Numerically Stable Recurrence Relations for the Communication Hiding Pipelined Conjugate Gradient Method"
546: Submitted to IEEE Transactions on Parallel and Distributed Systems, 2019.
548: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSPCG, KSPPIPECG, KSPPIPECGRR, KSPPGMRES,
549: KSPPIPEBCGS, KSPSetPCSide()
550: M*/
551: PETSC_EXTERN
552: PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
553: {
555: KSP_CG_PIPE_L *plcg = NULL;
558: PetscNewLog(ksp,&plcg);
559: ksp->data = (void*)plcg;
561: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
562: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
564: ksp->ops->setup = KSPSetUp_PIPELCG;
565: ksp->ops->solve = KSPSolve_PIPELCG;
566: ksp->ops->reset = KSPReset_PIPELCG;
567: ksp->ops->destroy = KSPDestroy_PIPELCG;
568: ksp->ops->view = KSPView_PIPELCG;
569: ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
570: ksp->ops->buildsolution = KSPBuildSolutionDefault;
571: ksp->ops->buildresidual = KSPBuildResidualDefault;
572: return(0);
573: }