Actual source code: redistribute.c
petsc-3.9.4 2018-09-11
2: /*
3: This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4: */
5: #include <petsc/private/pcimpl.h>
6: #include <petscksp.h>
8: typedef struct {
9: KSP ksp;
10: Vec x,b;
11: VecScatter scatter;
12: IS is;
13: PetscInt dcnt,*drows; /* these are the local rows that have only diagonal entry */
14: PetscScalar *diag;
15: Vec work;
16: } PC_Redistribute;
18: static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
19: {
20: PC_Redistribute *red = (PC_Redistribute*)pc->data;
21: PetscErrorCode ierr;
22: PetscBool iascii,isstring;
23: PetscInt ncnt,N;
26: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
27: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
28: if (iascii) {
29: MPIU_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
30: MatGetSize(pc->pmat,&N,NULL);
31: PetscViewerASCIIPrintf(viewer," Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));
32: PetscViewerASCIIPrintf(viewer," Redistribute preconditioner: \n");
33: KSPView(red->ksp,viewer);
34: } else if (isstring) {
35: PetscViewerStringSPrintf(viewer," Redistribute preconditioner");
36: KSPView(red->ksp,viewer);
37: }
38: return(0);
39: }
41: static PetscErrorCode PCSetUp_Redistribute(PC pc)
42: {
43: PC_Redistribute *red = (PC_Redistribute*)pc->data;
44: PetscErrorCode ierr;
45: MPI_Comm comm;
46: PetscInt rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
47: PetscLayout map,nmap;
48: PetscMPIInt size,imdex,tag,n;
49: PetscInt *source = NULL;
50: PetscMPIInt *sizes = NULL,nrecvs;
51: PetscInt j,nsends;
52: PetscInt *owner = NULL,*starts = NULL,count,slen;
53: PetscInt *rvalues,*svalues,recvtotal;
54: PetscMPIInt *onodes1,*olengths1;
55: MPI_Request *send_waits = NULL,*recv_waits = NULL;
56: MPI_Status recv_status,*send_status;
57: Vec tvec,diag;
58: Mat tmat;
59: const PetscScalar *d;
62: if (pc->setupcalled) {
63: KSPGetOperators(red->ksp,NULL,&tmat);
64: MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);
65: KSPSetOperators(red->ksp,tmat,tmat);
66: } else {
67: PetscInt NN;
69: PetscObjectGetComm((PetscObject)pc,&comm);
70: MPI_Comm_size(comm,&size);
71: PetscObjectGetNewTag((PetscObject)pc,&tag);
73: /* count non-diagonal rows on process */
74: MatGetOwnershipRange(pc->mat,&rstart,&rend);
75: cnt = 0;
76: for (i=rstart; i<rend; i++) {
77: MatGetRow(pc->mat,i,&nz,NULL,NULL);
78: if (nz > 1) cnt++;
79: MatRestoreRow(pc->mat,i,&nz,NULL,NULL);
80: }
81: PetscMalloc1(cnt,&rows);
82: PetscMalloc1(rend - rstart - cnt,&drows);
84: /* list non-diagonal rows on process */
85: cnt = 0; dcnt = 0;
86: for (i=rstart; i<rend; i++) {
87: MatGetRow(pc->mat,i,&nz,NULL,NULL);
88: if (nz > 1) rows[cnt++] = i;
89: else drows[dcnt++] = i - rstart;
90: MatRestoreRow(pc->mat,i,&nz,NULL,NULL);
91: }
93: /* create PetscLayout for non-diagonal rows on each process */
94: PetscLayoutCreate(comm,&map);
95: PetscLayoutSetLocalSize(map,cnt);
96: PetscLayoutSetBlockSize(map,1);
97: PetscLayoutSetUp(map);
98: rstart = map->rstart;
99: rend = map->rend;
101: /* create PetscLayout for load-balanced non-diagonal rows on each process */
102: PetscLayoutCreate(comm,&nmap);
103: MPIU_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);
104: PetscLayoutSetSize(nmap,ncnt);
105: PetscLayoutSetBlockSize(nmap,1);
106: PetscLayoutSetUp(nmap);
108: MatGetSize(pc->pmat,&NN,NULL);
109: PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));
110: /*
111: this code is taken from VecScatterCreate_PtoS()
112: Determines what rows need to be moved where to
113: load balance the non-diagonal rows
114: */
115: /* count number of contributors to each processor */
116: PetscMalloc2(size,&sizes,cnt,&owner);
117: PetscMemzero(sizes,size*sizeof(PetscMPIInt));
118: j = 0;
119: nsends = 0;
120: for (i=rstart; i<rend; i++) {
121: if (i < nmap->range[j]) j = 0;
122: for (; j<size; j++) {
123: if (i < nmap->range[j+1]) {
124: if (!sizes[j]++) nsends++;
125: owner[i-rstart] = j;
126: break;
127: }
128: }
129: }
130: /* inform other processors of number of messages and max length*/
131: PetscGatherNumberOfMessages(comm,NULL,sizes,&nrecvs);
132: PetscGatherMessageLengths(comm,nsends,nrecvs,sizes,&onodes1,&olengths1);
133: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
134: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
136: /* post receives: rvalues - rows I will own; count - nu */
137: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
138: count = 0;
139: for (i=0; i<nrecvs; i++) {
140: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
141: count += olengths1[i];
142: }
144: /* do sends:
145: 1) starts[i] gives the starting index in svalues for stuff going to
146: the ith processor
147: */
148: PetscMalloc3(cnt,&svalues,nsends,&send_waits,size,&starts);
149: starts[0] = 0;
150: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
151: for (i=0; i<cnt; i++) svalues[starts[owner[i]]++] = rows[i];
152: for (i=0; i<cnt; i++) rows[i] = rows[i] - rstart;
153: red->drows = drows;
154: red->dcnt = dcnt;
155: PetscFree(rows);
157: starts[0] = 0;
158: for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[i-1];
159: count = 0;
160: for (i=0; i<size; i++) {
161: if (sizes[i]) {
162: MPI_Isend(svalues+starts[i],sizes[i],MPIU_INT,i,tag,comm,send_waits+count++);
163: }
164: }
166: /* wait on receives */
167: count = nrecvs;
168: slen = 0;
169: while (count) {
170: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
171: /* unpack receives into our local space */
172: MPI_Get_count(&recv_status,MPIU_INT,&n);
173: slen += n;
174: count--;
175: }
176: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
178: ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);
180: /* free up all work space */
181: PetscFree(olengths1);
182: PetscFree(onodes1);
183: PetscFree3(rvalues,source,recv_waits);
184: PetscFree2(sizes,owner);
185: if (nsends) { /* wait on sends */
186: PetscMalloc1(nsends,&send_status);
187: MPI_Waitall(nsends,send_waits,send_status);
188: PetscFree(send_status);
189: }
190: PetscFree3(svalues,send_waits,starts);
191: PetscLayoutDestroy(&map);
192: PetscLayoutDestroy(&nmap);
194: VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);
195: VecDuplicate(red->b,&red->x);
196: MatCreateVecs(pc->pmat,&tvec,NULL);
197: VecScatterCreate(tvec,red->is,red->b,NULL,&red->scatter);
198: VecDestroy(&tvec);
199: MatCreateSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);
200: KSPSetOperators(red->ksp,tmat,tmat);
201: MatDestroy(&tmat);
202: }
204: /* get diagonal portion of matrix */
205: PetscFree(red->diag);
206: PetscMalloc1(red->dcnt,&red->diag);
207: MatCreateVecs(pc->pmat,&diag,NULL);
208: MatGetDiagonal(pc->pmat,diag);
209: VecGetArrayRead(diag,&d);
210: for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]];
211: VecRestoreArrayRead(diag,&d);
212: VecDestroy(&diag);
213: KSPSetUp(red->ksp);
214: return(0);
215: }
217: static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
218: {
219: PC_Redistribute *red = (PC_Redistribute*)pc->data;
220: PetscErrorCode ierr;
221: PetscInt dcnt = red->dcnt,i;
222: const PetscInt *drows = red->drows;
223: PetscScalar *xwork;
224: const PetscScalar *bwork,*diag = red->diag;
227: if (!red->work) {
228: VecDuplicate(b,&red->work);
229: }
230: /* compute the rows of solution that have diagonal entries only */
231: VecSet(x,0.0); /* x = diag(A)^{-1} b */
232: VecGetArray(x,&xwork);
233: VecGetArrayRead(b,&bwork);
234: for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]];
235: PetscLogFlops(dcnt);
236: VecRestoreArray(red->work,&xwork);
237: VecRestoreArrayRead(b,&bwork);
238: /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
239: MatMult(pc->pmat,x,red->work);
240: VecAYPX(red->work,-1.0,b); /* red->work = b - A x */
242: VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);
243: VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);
244: KSPSolve(red->ksp,red->b,red->x);
245: VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);
246: VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);
247: return(0);
248: }
250: static PetscErrorCode PCDestroy_Redistribute(PC pc)
251: {
252: PC_Redistribute *red = (PC_Redistribute*)pc->data;
253: PetscErrorCode ierr;
256: VecScatterDestroy(&red->scatter);
257: ISDestroy(&red->is);
258: VecDestroy(&red->b);
259: VecDestroy(&red->x);
260: KSPDestroy(&red->ksp);
261: VecDestroy(&red->work);
262: PetscFree(red->drows);
263: PetscFree(red->diag);
264: PetscFree(pc->data);
265: return(0);
266: }
268: static PetscErrorCode PCSetFromOptions_Redistribute(PetscOptionItems *PetscOptionsObject,PC pc)
269: {
270: PetscErrorCode ierr;
271: PC_Redistribute *red = (PC_Redistribute*)pc->data;
274: KSPSetFromOptions(red->ksp);
275: return(0);
276: }
278: /*@
279: PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE
281: Not Collective
283: Input Parameter:
284: . pc - the preconditioner context
286: Output Parameter:
287: . innerksp - the inner KSP
289: Level: advanced
291: .keywords: PC, redistribute solve
292: @*/
293: PetscErrorCode PCRedistributeGetKSP(PC pc,KSP *innerksp)
294: {
295: PC_Redistribute *red = (PC_Redistribute*)pc->data;
300: *innerksp = red->ksp;
301: return(0);
302: }
304: /* -------------------------------------------------------------------------------------*/
305: /*MC
306: PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix
308: Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>
310: Notes: Usually run this with -ksp_type preonly
312: If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
313: -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
315: This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.
317: Developer Notes: Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
319: Level: intermediate
321: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
322: M*/
324: PETSC_EXTERN PetscErrorCode PCCreate_Redistribute(PC pc)
325: {
326: PetscErrorCode ierr;
327: PC_Redistribute *red;
328: const char *prefix;
331: PetscNewLog(pc,&red);
332: pc->data = (void*)red;
334: pc->ops->apply = PCApply_Redistribute;
335: pc->ops->applytranspose = 0;
336: pc->ops->setup = PCSetUp_Redistribute;
337: pc->ops->destroy = PCDestroy_Redistribute;
338: pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
339: pc->ops->view = PCView_Redistribute;
341: KSPCreate(PetscObjectComm((PetscObject)pc),&red->ksp);
342: KSPSetErrorIfNotConverged(red->ksp,pc->erroriffailure);
343: PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);
344: PetscLogObjectParent((PetscObject)pc,(PetscObject)red->ksp);
345: PCGetOptionsPrefix(pc,&prefix);
346: KSPSetOptionsPrefix(red->ksp,prefix);
347: KSPAppendOptionsPrefix(red->ksp,"redistribute_");
348: return(0);
349: }