Actual source code: redistribute.c


  2: /*
  3:   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
  4: */
  5: #include <petsc/private/pcimpl.h>
  6: #include <petscksp.h>

  8: typedef struct {
  9:   KSP         ksp;
 10:   Vec         x,b;
 11:   VecScatter  scatter;
 12:   IS          is;
 13:   PetscInt    dcnt,*drows;    /* these are the local rows that have only diagonal entry */
 14:   PetscScalar *diag;
 15:   Vec         work;
 16: } PC_Redistribute;

 18: static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
 19: {
 20:   PC_Redistribute *red = (PC_Redistribute*)pc->data;
 21:   PetscErrorCode  ierr;
 22:   PetscBool       iascii,isstring;
 23:   PetscInt        ncnt,N;

 26:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 27:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
 28:   if (iascii) {
 29:     MPIU_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
 30:     MatGetSize(pc->pmat,&N,NULL);
 31:     PetscViewerASCIIPrintf(viewer,"    Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));
 32:     PetscViewerASCIIPrintf(viewer,"  Redistribute preconditioner: \n");
 33:     KSPView(red->ksp,viewer);
 34:   } else if (isstring) {
 35:     PetscViewerStringSPrintf(viewer," Redistribute preconditioner");
 36:     KSPView(red->ksp,viewer);
 37:   }
 38:   return(0);
 39: }

 41: static PetscErrorCode PCSetUp_Redistribute(PC pc)
 42: {
 43:   PC_Redistribute          *red = (PC_Redistribute*)pc->data;
 44:   PetscErrorCode           ierr;
 45:   MPI_Comm                 comm;
 46:   PetscInt                 rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
 47:   PetscLayout              map,nmap;
 48:   PetscMPIInt              size,tag,n;
 49:   PETSC_UNUSED PetscMPIInt imdex;
 50:   PetscInt                 *source = NULL;
 51:   PetscMPIInt              *sizes = NULL,nrecvs;
 52:   PetscInt                 j,nsends;
 53:   PetscInt                 *owner = NULL,*starts = NULL,count,slen;
 54:   PetscInt                 *rvalues,*svalues,recvtotal;
 55:   PetscMPIInt              *onodes1,*olengths1;
 56:   MPI_Request              *send_waits = NULL,*recv_waits = NULL;
 57:   MPI_Status               recv_status,*send_status;
 58:   Vec                      tvec,diag;
 59:   Mat                      tmat;
 60:   const PetscScalar        *d,*values;
 61:   const PetscInt           *cols;

 64:   if (pc->setupcalled) {
 65:     KSPGetOperators(red->ksp,NULL,&tmat);
 66:     MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);
 67:     KSPSetOperators(red->ksp,tmat,tmat);
 68:   } else {
 69:     PetscInt NN;

 71:     PetscObjectGetComm((PetscObject)pc,&comm);
 72:     MPI_Comm_size(comm,&size);
 73:     PetscObjectGetNewTag((PetscObject)pc,&tag);

 75:     /* count non-diagonal rows on process */
 76:     MatGetOwnershipRange(pc->mat,&rstart,&rend);
 77:     cnt  = 0;
 78:     for (i=rstart; i<rend; i++) {
 79:       MatGetRow(pc->mat,i,&nz,&cols,&values);
 80:       for (PetscInt j=0; j<nz; j++) {
 81:         if (values[j] != 0 && cols[j] != i) {
 82:           cnt++;
 83:           break;
 84:         }
 85:       }
 86:       MatRestoreRow(pc->mat,i,&nz,&cols,&values);
 87:     }
 88:     PetscMalloc1(cnt,&rows);
 89:     PetscMalloc1(rend - rstart - cnt,&drows);

 91:     /* list non-diagonal rows on process */
 92:     cnt = 0; dcnt = 0;
 93:     for (i=rstart; i<rend; i++) {
 94:       PetscBool diagonly = PETSC_TRUE;
 95:       MatGetRow(pc->mat,i,&nz,&cols,&values);
 96:       for (PetscInt j=0; j<nz; j++) {
 97:         if (values[j] != 0 && cols[j] != i) {
 98:           diagonly = PETSC_FALSE;
 99:           break;
100:         }
101:       }
102:       if (!diagonly) rows[cnt++] = i;
103:       else drows[dcnt++] = i - rstart;
104:       MatRestoreRow(pc->mat,i,&nz,&cols,&values);
105:     }

107:     /* create PetscLayout for non-diagonal rows on each process */
108:     PetscLayoutCreate(comm,&map);
109:     PetscLayoutSetLocalSize(map,cnt);
110:     PetscLayoutSetBlockSize(map,1);
111:     PetscLayoutSetUp(map);
112:     rstart = map->rstart;
113:     rend   = map->rend;

115:     /* create PetscLayout for load-balanced non-diagonal rows on each process */
116:     PetscLayoutCreate(comm,&nmap);
117:     MPIU_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);
118:     PetscLayoutSetSize(nmap,ncnt);
119:     PetscLayoutSetBlockSize(nmap,1);
120:     PetscLayoutSetUp(nmap);

122:     MatGetSize(pc->pmat,&NN,NULL);
123:     PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));

125:     if (size > 1) {
126:       /* the following block of code assumes MPI can send messages to self, which is not supported for MPI-uni hence we need to handle the size 1 case as a special case */
127:       /*
128:        this code is taken from VecScatterCreate_PtoS()
129:        Determines what rows need to be moved where to
130:        load balance the non-diagonal rows
131:        */
132:       /*  count number of contributors to each processor */
133:       PetscMalloc2(size,&sizes,cnt,&owner);
134:       PetscArrayzero(sizes,size);
135:       j      = 0;
136:       nsends = 0;
137:       for (i=rstart; i<rend; i++) {
138:         if (i < nmap->range[j]) j = 0;
139:         for (; j<size; j++) {
140:           if (i < nmap->range[j+1]) {
141:             if (!sizes[j]++) nsends++;
142:             owner[i-rstart] = j;
143:             break;
144:           }
145:         }
146:       }
147:       /* inform other processors of number of messages and max length*/
148:       PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs);
149:       PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1);
150:       PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
151:       recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

153:       /* post receives:  rvalues - rows I will own; count - nu */
154:       PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
155:       count = 0;
156:       for (i=0; i<nrecvs; i++) {
157:         MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
158:         count += olengths1[i];
159:       }

161:       /* do sends:
162:        1) starts[i] gives the starting index in svalues for stuff going to
163:        the ith processor
164:        */
165:       PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts);
166:       starts[0] = 0;
167:       for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
168:       for (i=0; i<cnt; i++)  svalues[starts[owner[i]]++] = rows[i];
169:       for (i=0; i<cnt; i++)  rows[i] = rows[i] - rstart;
170:       red->drows = drows;
171:       red->dcnt  = dcnt;
172:       PetscFree(rows);

174:       starts[0] = 0;
175:       for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
176:       count = 0;
177:       for (i=0; i<size; i++) {
178:         if (sizes[i]) {
179:           MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++);
180:         }
181:       }

183:       /*  wait on receives */
184:       count = nrecvs;
185:       slen  = 0;
186:       while (count) {
187:         MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
188:         /* unpack receives into our local space */
189:         MPI_Get_count(&recv_status,MPIU_INT,&n);
190:         slen += n;
191:         count--;
192:       }
193:       if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
194:       ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);

196:       /* free all work space */
197:       PetscFree(olengths1);
198:       PetscFree(onodes1);
199:       PetscFree3(rvalues,source,recv_waits);
200:       PetscFree2(sizes,owner);
201:       if (nsends) {   /* wait on sends */
202:         PetscMalloc1(nsends,&send_status);
203:         MPI_Waitall(nsends,send_waits,send_status);
204:         PetscFree(send_status);
205:       }
206:       PetscFree3(svalues,send_waits,starts);
207:     } else {
208:       ISCreateGeneral(comm,cnt,rows,PETSC_OWN_POINTER,&red->is);
209:       red->drows = drows;
210:       red->dcnt  = dcnt;
211:       slen = cnt;
212:     }
213:     PetscLayoutDestroy(&map);
214:     PetscLayoutDestroy(&nmap);

216:     VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);
217:     VecDuplicate(red->b,&red->x);
218:     MatCreateVecs(pc->pmat,&tvec,NULL);
219:     VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter);
220:     VecDestroy(&tvec);
221:     MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);
222:     KSPSetOperators(red->ksp,tmat,tmat);
223:     MatDestroy(&tmat);
224:   }

226:   /* get diagonal portion of matrix */
227:   PetscFree(red->diag);
228:   PetscMalloc1(red->dcnt,&red->diag);
229:   MatCreateVecs(pc->pmat,&diag,NULL);
230:   MatGetDiagonal(pc->pmat,diag);
231:   VecGetArrayRead(diag,&d);
232:   for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]];
233:   VecRestoreArrayRead(diag,&d);
234:   VecDestroy(&diag);
235:   KSPSetUp(red->ksp);
236:   return(0);
237: }

239: static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
240: {
241:   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
242:   PetscErrorCode    ierr;
243:   PetscInt          dcnt   = red->dcnt,i;
244:   const PetscInt    *drows = red->drows;
245:   PetscScalar       *xwork;
246:   const PetscScalar *bwork,*diag = red->diag;

249:   if (!red->work) {
250:     VecDuplicate(b,&red->work);
251:   }
252:   /* compute the rows of solution that have diagonal entries only */
253:   VecSet(x,0.0);         /* x = diag(A)^{-1} b */
254:   VecGetArray(x,&xwork);
255:   VecGetArrayRead(b,&bwork);
256:   for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]];
257:   PetscLogFlops(dcnt);
258:   VecRestoreArray(red->work,&xwork);
259:   VecRestoreArrayRead(b,&bwork);
260:   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
261:   MatMult(pc->pmat,x,red->work);
262:   VecAYPX(red->work,-1.0,b);   /* red->work = b - A x */

264:   VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);
265:   VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);
266:   KSPSolve(red->ksp,red->b,red->x);
267:   KSPCheckSolve(red->ksp,pc,red->x);
268:   VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);
269:   VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);
270:   return(0);
271: }

273: static PetscErrorCode PCDestroy_Redistribute(PC pc)
274: {
275:   PC_Redistribute *red = (PC_Redistribute*)pc->data;
276:   PetscErrorCode  ierr;

279:   VecScatterDestroy(&red->scatter);
280:   ISDestroy(&red->is);
281:   VecDestroy(&red->b);
282:   VecDestroy(&red->x);
283:   KSPDestroy(&red->ksp);
284:   VecDestroy(&red->work);
285:   PetscFree(red->drows);
286:   PetscFree(red->diag);
287:   PetscFree(pc->data);
288:   return(0);
289: }

291: static PetscErrorCode PCSetFromOptions_Redistribute(PetscOptionItems *PetscOptionsObject,PC pc)
292: {
293:   PetscErrorCode  ierr;
294:   PC_Redistribute *red = (PC_Redistribute*)pc->data;

297:   KSPSetFromOptions(red->ksp);
298:   return(0);
299: }

301: /*@
302:    PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE

304:    Not Collective

306:    Input Parameter:
307: .  pc - the preconditioner context

309:    Output Parameter:
310: .  innerksp - the inner KSP

312:    Level: advanced

314: @*/
315: PetscErrorCode  PCRedistributeGetKSP(PC pc,KSP *innerksp)
316: {
317:   PC_Redistribute *red = (PC_Redistribute*)pc->data;

322:   *innerksp = red->ksp;
323:   return(0);
324: }

326: /* -------------------------------------------------------------------------------------*/
327: /*MC
328:      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix

330:      Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>

332:      Notes:
333:     Usually run this with -ksp_type preonly

335:      If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
336:      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.

338:      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.

340:      Developer Notes:
341:     Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.

343:    Level: intermediate

345: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
346: M*/

348: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
349: {
350:   PetscErrorCode  ierr;
351:   PC_Redistribute *red;
352:   const char      *prefix;

355:   PetscNewLog(pc,&red);
356:   pc->data = (void*)red;

358:   pc->ops->apply          = PCApply_Redistribute;
359:   pc->ops->applytranspose = NULL;
360:   pc->ops->setup          = PCSetUp_Redistribute;
361:   pc->ops->destroy        = PCDestroy_Redistribute;
362:   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
363:   pc->ops->view           = PCView_Redistribute;

365:   KSPCreate(PetscObjectComm((PetscObject)pc),&red->ksp);
366:   KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure);
367:   PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);
368:   PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp);
369:   PCGetOptionsPrefix(pc,&prefix);
370:   KSPSetOptionsPrefix(red->ksp,prefix);
371:   KSPAppendOptionsPrefix(red->ksp,"redistribute_");
372:   return(0);
373: }