Actual source code: pipelcg.c
petsc-3.10.5 2019-03-28
1: #include <petsc/private/kspimpl.h>
2: #include <petsc/private/vecimpl.h>
4: #define offset(j) PetscMax(((j) - (2*l)), 0)
5: #define shift(i,j) ((i) - offset((j)))
6: #define G(i,j) (plcg->G[((j)*(2*l+1))+(shift((i),(j))) ])
7: #define G_noshift(i,j) (plcg->G[((j)*(2*l+1))+(i)])
8: #define alpha(i) (plcg->alpha[(i)])
9: #define gamma(i) (plcg->gamma[(i)])
10: #define delta(i) (plcg->delta[(i)])
11: #define sigma(i) (plcg->sigma[(i)])
12: #define req(i) (plcg->req[(i)])
14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
15: struct KSP_CG_PIPE_L_s {
16: PetscInt l; /* pipeline depth */
17: Vec *Z; /* Z vector (shifted base) */
18: Vec *V; /* V vector (original base) */
19: Vec z_2; /* additional vector needed when l == 1 */
20: Vec p,u,up,upp; /* some work vectors */
21: PetscScalar *G; /* such that Z = VG (band matrix)*/
22: PetscScalar *gamma,*delta,*alpha;
23: PetscReal lmin,lmax; /* min and max eigen values estimates to compute base shifts */
24: PetscReal *sigma; /* base shifts */
25: MPI_Request *req; /* request array for asynchronous global collective */
26: };
28: /**
29: * KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.
30: *
31: * This is called once, usually automatically by KSPSolve() or KSPSetUp()
32: * but can be called directly by KSPSetUp()
33: */
34: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
35: {
37: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
38: PetscInt l=plcg->l,max_it=ksp->max_it;
39: MPI_Comm comm;
42: comm = PetscObjectComm((PetscObject)ksp);
43: if (max_it < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: max_it argument must be positive.",((PetscObject)ksp)->type_name);
44: if (l < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be positive.",((PetscObject)ksp)->type_name);
45: if (l > max_it) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be less than max_it.",((PetscObject)ksp)->type_name);
47: KSPSetWorkVecs(ksp,4); /* get work vectors needed by PIPELCG */
48: plcg->p = ksp->work[0];
49: plcg->u = ksp->work[1];
50: plcg->up = ksp->work[2];
51: plcg->upp = ksp->work[3];
53: VecDuplicateVecs(plcg->p,l+1,&plcg->Z);
54: VecDuplicateVecs(plcg->p,2*l+1,&plcg->V);
55: PetscCalloc1(2*l,&plcg->alpha);
56: PetscCalloc1(l,&plcg->sigma);
57: if (l == 1) {
58: VecDuplicate(plcg->p,&plcg->z_2);
59: }
61: return(0);
62: }
64: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
65: {
66: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
67: PetscInt ierr=0,l=plcg->l;
70: PetscFree(plcg->sigma);
71: PetscFree(plcg->alpha);
72: VecDestroyVecs(l+1,&plcg->Z);
73: VecDestroyVecs(2*l+1,&plcg->V);
74: VecDestroy(&plcg->z_2);
75: return(0);
76: }
78: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
79: {
80: PetscInt ierr=0;
83: KSPReset_PIPELCG(ksp);
84: KSPDestroyDefault(ksp);
85: return(0);
86: }
88: static PetscErrorCode KSPSetFromOptions_PIPELCG(PetscOptionItems *PetscOptionsObject,KSP ksp)
89: {
90: PetscInt ierr=0;
91: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
92: PetscBool flag=PETSC_FALSE;
95: PetscOptionsHead(PetscOptionsObject,"KSP PIPELCG options");
96: PetscOptionsInt("-ksp_pipelcg_pipel","Pipeline length","",plcg->l,&plcg->l,&flag);
97: if (!flag) plcg->l = 1;
98: PetscOptionsReal("-ksp_pipelcg_lmin","Estimate for smallest eigenvalue","",plcg->lmin,&plcg->lmin,&flag);
99: if (!flag) plcg->lmin = 0.0;
100: PetscOptionsReal("-ksp_pipelcg_lmax","Estimate for largest eigenvalue","",plcg->lmax,&plcg->lmax,&flag);
101: if (!flag) plcg->lmax = 0.0;
102: PetscOptionsTail();
103: return(0);
104: }
106: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
107: {
108: PetscErrorCode ierr=0;
111: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
112: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
113: #else
114: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
115: *request = MPI_REQUEST_NULL;
116: #endif
117: return(0);
118: }
120: static PetscErrorCode KSPView_PIPELCG(KSP ksp,PetscViewer viewer)
121: {
122: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
123: PetscErrorCode ierr=0;
124: PetscBool iascii=PETSC_FALSE,isstring=PETSC_FALSE;
127: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
128: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
129: if (iascii) {
130: PetscViewerASCIIPrintf(viewer," Pipeline depth: %D\n", plcg->l);
131: PetscViewerASCIIPrintf(viewer," Minimal eigen value estimate %g\n",plcg->lmin);
132: PetscViewerASCIIPrintf(viewer," Maximal eigen value estimate %g\n",plcg->lmax);
133: } else if (isstring) {
134: PetscViewerStringSPrintf(viewer," Pipeline depth: %D\n", plcg->l);
135: PetscViewerStringSPrintf(viewer," Minimal eigen value estimate %g\n",plcg->lmin);
136: PetscViewerStringSPrintf(viewer," Maximal eigen value estimate %g\n",plcg->lmax);
137: }
138: return(0);
139: }
141: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
142: {
143: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
144: Mat A=NULL,Pmat=NULL;
145: PetscInt it=0,max_it=ksp->max_it,ierr=0,l=plcg->l,i=0,j=0,k=0;
146: PetscInt start=0,middle=0,end=0;
147: Vec *Z = plcg->Z,*V=plcg->V;
148: Vec x=NULL,p=NULL,u=NULL,up=NULL,upp=NULL,temp=NULL,temp2[2],z_2=NULL;
149: PetscScalar sum_dummy=0.0,eta=0.0,zeta=0.0,lambda=0.0;
150: PetscReal dp=0.0,tmp=0.0,beta=0.0,invbeta2=0.0;
151: MPI_Comm comm;
154: x = ksp->vec_sol;
155: p = plcg->p;
156: u = plcg->u;
157: up = plcg->up;
158: upp = plcg->upp;
159: z_2 = plcg->z_2;
161: comm = PetscObjectComm((PetscObject)ksp);
162: PCGetOperators(ksp->pc,&A,&Pmat);
164: for (it = 0; it < max_it+l; ++it) {
165: /* ----------------------------------- */
166: /* Multiplication z_{it+1} = Az_{it} */
167: /* ----------------------------------- */
168: VecCopy(up,upp);
169: VecCopy(u,up);
170: if (it < l) {
171: /* SpMV and Prec */
172: MatMult(A,Z[l-it],u);
173: VecAXPY(u,-sigma(it),up);
174: KSP_PCApply(ksp,u,Z[l-it-1]);
175: /* Apply shift */
176: } else {
177: /* Shift the Z vector pointers */
178: if (l == 1) {
179: VecCopy(Z[l],z_2);
180: }
181: temp = Z[l];
182: for (i = l; i > 0; --i) {
183: Z[i] = Z[i-1];
184: }
185: Z[0] = temp;
186: MatMult(A,Z[1],u);
187: KSP_PCApply(ksp,u,Z[0]);
188: }
190: /* ----------------------------------- */
191: /* Adjust the G matrix */
192: /* ----------------------------------- */
193: if (it >= l) {
194: if (it == l) {
195: /* MPI_Wait for G(0,0),scale V0 and Z and u vectors with 1/beta */
196: MPI_Wait(&req(0),MPI_STATUS_IGNORE);
197: beta = PetscSqrtReal(PetscRealPart(G(0,0)));
198: G(0,0) = 1.0;
199: VecAXPY(V[2*l],1.0/beta,p);
200: for (j = 0; j <= l; ++j) {
201: VecScale(Z[j],1.0/beta);
202: }
203: VecScale(u,1.0/beta);
204: VecScale(up,1.0/beta);
205: VecScale(upp,1.0/beta);
206: }
208: /* MPI_Wait until the dot products,started l iterations ago,are completed */
209: MPI_Wait(&req(it-l+1),MPI_STATUS_IGNORE);
210: if (it <= 2*l-1) {
211: invbeta2 = 1.0 / (beta * beta);
212: /* scale column 1 up to column l of G with 1/beta^2 */
213: for (j = PetscMax(it-3*l+1,0); j <= it-l+1; ++j) {
214: G(j,it-l+1) *= invbeta2;
215: }
216: }
218: for (j = PetscMax(it-2*l+2,0); j <= it-l; ++j) {
219: sum_dummy = 0.0;
220: for (k = PetscMax(it-3*l+1,0); k <= j-1; ++k) {
221: sum_dummy = sum_dummy + G(k,j) * G(k,it-l+1);
222: }
223: G(j,it-l+1) = (G(j,it-l+1) - sum_dummy) / G(j,j);
224: }
226: sum_dummy = 0.0;
227: for (k = PetscMax(it-3*l+1,0); k <= it-l; ++k) {
228: sum_dummy = sum_dummy + G(k,it-l+1) * G(k,it-l+1);
229: }
231: /* Breakdown check */
232: tmp = PetscRealPart(G(it-l+1,it-l+1) - sum_dummy);
233: if (tmp < 0) {
234: PetscPrintf(comm,"sqrt breakdown in iteration %d: value is %e. Iteration was restarted.\n",ksp->its+1,tmp);
235: /* End hanging dot-products in the pipeline before exiting for-loop */
236: start = it-l+2;
237: end = PetscMin(it+1,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
238: for (i = start; i < end; ++i) {
239: MPI_Wait(&req(i),MPI_STATUS_IGNORE);
240: }
241: break;
242: }
243: G(it-l+1,it-l+1) = PetscSqrtReal(tmp);
245: if (it < 2*l) {
246: if (it == l) {
247: gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)) / G(it-l,it-l);
248: } else {
249: gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)
250: - delta(it-l-1) * G(it-l-1,it-l)) / G(it-l,it-l);
251: }
252: delta(it-l) = G(it-l+1,it-l+1) / G(it-l,it-l);
253: } else {
254: gamma(it-l) = (G(it-l,it-l) * gamma(it-2*l)
255: + G(it-l,it-l+1) * delta(it-2*l)
256: - G(it-l-1,it-l) * delta(it-l-1)) / G(it-l,it-l);
257: delta(it-l) = (G(it-l+1,it-l+1) * delta(it-2*l)) / G(it-l,it-l);
258: }
260: /* -------------------------------------------- */
261: /* Recursively compute the next V and Z vectors */
262: /* -------------------------------------------- */
263: /* Recurrence V vectors */
264: if (it < 3*l) {
265: VecAXPY(V[3*l-it-1],1.0/G(it-l+1,it-l+1),Z[l]);
266: for (j = 0; j <= it-l; ++j) {
267: alpha(it-l-j) = -G(j,it-l+1)/G(it-l+1,it-l+1);
268: }
269: VecMAXPY(V[3*l-it-1],it-l+1,&alpha(0),&V[3*l-it]);
270: } else {
271: /* Shift the V vector pointers */
272: temp = V[2*l];
273: for (i = 2*l; i>0; i--) {
274: V[i] = V[i-1];
275: }
276: V[0] = temp;
278: VecSet(V[0],0.0);
279: VecAXPY(V[0],1.0/G(it-l+1,it-l+1),Z[l]);
280: for (j = 0; j < 2*l; ++j) {
281: k = (it-3*l+1)+j;
282: alpha(2*l-1-j) = -G(k,it-l+1)/G(it-l+1,it-l+1);
283: }
284: VecMAXPY(V[0],2*l,&alpha(0),&V[1]);
285: }
286: /* Recurrence Z and U vectors */
287: if (it <= l) {
288: VecAXPY(Z[0],-gamma(it-l),Z[1]);
289: VecAXPY(u,-gamma(it-l),up);
290: } else {
291: alpha(0) = -delta(it-l-1);
292: alpha(1) = -gamma(it-l);
294: temp2[0] = (l==1) ? z_2 : Z[2];
295: temp2[1] = Z[1];
296: VecMAXPY(Z[0],2,&alpha(0),temp2);
298: temp2[0] = upp;
299: temp2[1] = up;
300: VecMAXPY(u,2,&alpha(0),temp2);
301: }
302: VecScale(Z[0],1.0/delta(it-l));
303: VecScale(u,1.0/delta(it-l));
304: }
306: /* ---------------------------------------- */
307: /* Compute and communicate the dot products */
308: /* ---------------------------------------- */
309: if (it < l) {
310: /* dot-product (Z_{it+1},z_j) */
311: for (j = 0; j < it+2; ++j) {
312: (*u->ops->dot_local)(u,Z[l-j],&G(j,it+1));
313: }
314: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,it+1),it+2,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
315: } else if ((it >= l) && (it < max_it)) {
316: start = PetscMax(0,it-2*l+1);
317: middle = it-l+2;
318: end = it+2;
319: for (j = start; j < middle; ++j) { /* dot-product (Z_{it+1},v_j) */
320: temp = (it < 3*l) ? plcg->V[2*l-j] : plcg->V[it-l+1-j];
321: (*u->ops->dot_local)(u,temp,&G(j,it+1));
322: }
323: for (j = middle; j < end; ++j) { /* dot-product (Z_{it+1},z_j) */
324: (*u->ops->dot_local)(u,plcg->Z[it+1-j],&G(j,it+1));
325: }
326: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(start,it+1),end-start,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
327: }
329: /* ----------------------------------------- */
330: /* Compute solution vector and residual norm */
331: /* ----------------------------------------- */
332: if (it >= l) {
333: if (it == l) {
334: if (ksp->its != 0) {
335: ++ ksp->its;
336: }
337: eta = gamma(0);
338: zeta = beta;
339: VecCopy(V[2*l],p);
340: VecScale(p,1.0/eta);
341: VecAXPY(x,zeta,p);
343: dp = beta;
344: ksp->rnorm = dp;
345: KSPLogResidualHistory(ksp,dp);
346: KSPMonitor(ksp,ksp->its,dp);
347: (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);
348: } else if (it > l) {
349: k = it-l;
350: ++ ksp->its;
351: lambda = delta(k-1) / eta;
352: eta = gamma(k) - lambda * delta(k-1);
353: zeta = -lambda * zeta;
354: VecScale(p,-delta(k-1)/eta);
355: VecAXPY(p,1.0/eta,(it < 3*l) ? V[3*l-it] : V[1]);
356: VecAXPY(x,zeta,p);
358: dp = PetscAbsScalar(zeta);
359: ksp->rnorm = dp;
360: KSPLogResidualHistory(ksp,dp);
361: KSPMonitor(ksp,ksp->its,dp);
362: (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);
363: }
364: if (ksp->its >= max_it-1) {
365: ksp->reason = KSP_DIVERGED_ITS;
366: }
367: if (ksp->reason) {
368: /* End hanging dot-products in the pipeline before exiting for-loop */
369: start = it-l+2;
370: end = PetscMin(it+2,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
371: for (i = start; i < end; ++i) {
372: MPI_Wait(&req(i),MPI_STATUS_IGNORE);
373: }
374: break;
375: }
376: }
377: } /* End inner for loop */
378: return(0);
379: }
381: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
382: {
383: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
384: PetscInt ierr=0,i=0,j=0,l=plcg->l,max_it=ksp->max_it;
387: VecSet(plcg->up,0.0);
388: VecSet(plcg->upp,0.0);
389: for (i = 0; i < l+1; ++i) {
390: VecSet(plcg->Z[i],0.0);
391: }
392: for (i = 0; i < (2*l+1); ++i) {
393: VecSet(plcg->V[i],0.0);
394: }
395: for (j = 0; j < (max_it+1); ++j) {
396: gamma(j) = 0.0;
397: delta(j) = 0.0;
398: for (i = 0; i < (2*l+1); ++i) {
399: G_noshift(i,j) = 0.0;
400: }
401: }
402: return(0);
403: }
405: /**
406: * KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
407: *
408: * Input Parameter:
409: * ksp - the Krylov space object that was set to use conjugate gradient,by,for
410: * example,KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
411: */
412: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
413: {
414: PetscErrorCode ierr=0;
415: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
416: Mat A=NULL,Pmat=NULL;
417: Vec b=NULL,x=NULL,p=NULL,u=NULL;
418: PetscInt max_it=ksp->max_it,l=plcg->l;
419: PetscInt i=0,outer_it=0,curr_guess_zero=0;
420: PetscReal lmin=plcg->lmin,lmax=plcg->lmax;
421: PetscBool diagonalscale=PETSC_FALSE;
422: MPI_Comm comm;
425: comm = PetscObjectComm((PetscObject)ksp);
426: PCGetDiagonalScale(ksp->pc,&diagonalscale);
427: if (diagonalscale) {
428: SETERRQ1(comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
429: }
431: x = ksp->vec_sol;
432: b = ksp->vec_rhs;
433: p = plcg->p;
434: u = plcg->u;
436: PetscCalloc1((max_it+1)*(2*l+1),&plcg->G);
437: PetscCalloc1(max_it+1,&plcg->gamma);
438: PetscCalloc1(max_it+1,&plcg->delta);
439: PetscCalloc1(max_it+1,&plcg->req);
441: PCGetOperators(ksp->pc,&A,&Pmat);
443: for (i = 0; i < l; ++i) {
444: sigma(i) = (0.5*(lmin+lmax) + (0.5*(lmax-lmin) * PetscCosReal(PETSC_PI*(2.0*i+1.0)/(2.0*l))));
445: }
447: ksp->its = 0;
448: outer_it = 0;
449: curr_guess_zero = !! ksp->guess_zero;
451: while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
452: /* * */
453: /* RESTART LOOP */
454: /* * */
455: if (!curr_guess_zero) {
456: KSP_MatMult(ksp,A,x,u); /* u <- b - Ax */
457: VecAYPX(u,-1.0,b);
458: } else {
459: VecCopy(b,u); /* u <- b (x is 0) */
460: }
461: KSP_PCApply(ksp,u,p); /* p <- Bu */
463: if (outer_it > 0) {
464: /* Re-initialize Z,V,gamma,delta,G,u,up,upp after restart occurred */
465: KSPSolve_ReInitData_PIPELCG(ksp);
466: }
468: (*u->ops->dot_local)(u,p,&G(0,0));
469: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,0),1,MPIU_SCALAR,MPIU_SUM,comm,&req(0));
470: VecCopy(p,plcg->Z[l]);
472: KSPSolve_InnerLoop_PIPELCG(ksp);
474: if (ksp->reason) break; /* convergence or divergence */
475: ++ outer_it;
476: curr_guess_zero = 0;
477: }
479: if (ksp->its >= max_it-1) {
480: ksp->reason = KSP_DIVERGED_ITS;
481: }
482: PetscFree(plcg->G);
483: PetscFree(plcg->gamma);
484: PetscFree(plcg->delta);
485: PetscFree(plcg->req);
486: return(0);
487: }
489: /*MC
490: KSPPIPELCG - Deep pipelined (length l) Conjugate Gradient method. This method has only a single non-blocking global
491: reduction per iteration, compared to 2 blocking reductions for standard CG. The reduction is overlapped by the
492: matrix-vector product and preconditioner application of the next l iterations. The pipeline length l is a parameter
493: of the method.
495: Options Database Keys:
496: . see KSPSolve()
498: Level: intermediate
500: Notes:
501: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
502: performance of pipelined methods. See the FAQ on the PETSc website for details.
504: Contributed by:
505: Siegfried Cools, University of Antwerp, Dept. Mathematics and Computer Science,
506: funded by Flemish Research Foundation (FWO) grant number 12H4617N.
508: Reference:
509: J. Cornelis, S. Cools and W. Vanroose,
510: "The Communication-Hiding Conjugate Gradient Method with Deep Pipelines", Submitted to SIAM Journal on Scientific
511: Computing (SISC), 2018.
513: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSPCG, KSPPIPECG, KSPPIPECGRR, KSPPGMRES,
514: KSPPIPEBCGS, KSPSetPCSide()
515: M*/
516: PETSC_EXTERN
517: PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
518: {
520: KSP_CG_PIPE_L *plcg = NULL;
523: PetscNewLog(ksp,&plcg);
524: ksp->data = (void*)plcg;
526: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
527: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
529: ksp->ops->setup = KSPSetUp_PIPELCG;
530: ksp->ops->solve = KSPSolve_PIPELCG;
531: ksp->ops->reset = KSPReset_PIPELCG;
532: ksp->ops->destroy = KSPDestroy_PIPELCG;
533: ksp->ops->view = KSPView_PIPELCG;
534: ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
535: ksp->ops->buildsolution = KSPBuildSolutionDefault;
536: ksp->ops->buildresidual = KSPBuildResidualDefault;
537: return(0);
538: }