Actual source code: mpiov.c

petsc-3.5.4 2015-05-23
Report Typos and Errors
  2: /*
  3:    Routines to compute overlapping regions of a parallel MPI matrix
  4:   and to find submatrices that were shared across processors.
  5: */
  6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  7: #include <petscbt.h>

  9: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS*);
 10: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**);
 11: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*);
 12: extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
 13: extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);

 17: PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
 18: {
 20:   PetscInt       i;

 23:   if (ov < 0) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 24:   for (i=0; i<ov; ++i) {
 25:     MatIncreaseOverlap_MPIAIJ_Once(C,imax,is);
 26:   }
 27:   return(0);
 28: }

 30: /*
 31:   Sample message format:
 32:   If a processor A wants processor B to process some elements corresponding
 33:   to index sets is[1],is[5]
 34:   mesg [0] = 2   (no of index sets in the mesg)
 35:   -----------
 36:   mesg [1] = 1 => is[1]
 37:   mesg [2] = sizeof(is[1]);
 38:   -----------
 39:   mesg [3] = 5  => is[5]
 40:   mesg [4] = sizeof(is[5]);
 41:   -----------
 42:   mesg [5]
 43:   mesg [n]  datas[1]
 44:   -----------
 45:   mesg[n+1]
 46:   mesg[m]  data(is[5])
 47:   -----------

 49:   Notes:
 50:   nrqs - no of requests sent (or to be sent out)
 51:   nrqr - no of requests recieved (which have to be or which have been processed
 52: */
 55: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[])
 56: {
 57:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
 58:   PetscMPIInt    *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2;
 59:   const PetscInt **idx,*idx_i;
 60:   PetscInt       *n,**data,len;
 62:   PetscMPIInt    size,rank,tag1,tag2;
 63:   PetscInt       M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr;
 64:   PetscInt       *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p;
 65:   PetscBT        *table;
 66:   MPI_Comm       comm;
 67:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
 68:   MPI_Status     *s_status,*recv_status;
 69:   char           *t_p;

 72:   PetscObjectGetComm((PetscObject)C,&comm);
 73:   size = c->size;
 74:   rank = c->rank;
 75:   M    = C->rmap->N;

 77:   PetscObjectGetNewTag((PetscObject)C,&tag1);
 78:   PetscObjectGetNewTag((PetscObject)C,&tag2);

 80:   PetscMalloc2(imax,&idx,imax,&n);

 82:   for (i=0; i<imax; i++) {
 83:     ISGetIndices(is[i],&idx[i]);
 84:     ISGetLocalSize(is[i],&n[i]);
 85:   }

 87:   /* evaluate communication - mesg to who,length of mesg, and buffer space
 88:      required. Based on this, buffers are allocated, and data copied into them*/
 89:   PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);
 90:   PetscMemzero(w1,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 91:   PetscMemzero(w2,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 92:   PetscMemzero(w3,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 93:   for (i=0; i<imax; i++) {
 94:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialise work vector*/
 95:     idx_i = idx[i];
 96:     len   = n[i];
 97:     for (j=0; j<len; j++) {
 98:       row = idx_i[j];
 99:       if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
100:       PetscLayoutFindOwner(C->rmap,row,&proc);
101:       w4[proc]++;
102:     }
103:     for (j=0; j<size; j++) {
104:       if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
105:     }
106:   }

108:   nrqs     = 0;              /* no of outgoing messages */
109:   msz      = 0;              /* total mesg length (for all proc */
110:   w1[rank] = 0;              /* no mesg sent to intself */
111:   w3[rank] = 0;
112:   for (i=0; i<size; i++) {
113:     if (w1[i])  {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
114:   }
115:   /* pa - is list of processors to communicate with */
116:   PetscMalloc1((nrqs+1),&pa);
117:   for (i=0,j=0; i<size; i++) {
118:     if (w1[i]) {pa[j] = i; j++;}
119:   }

121:   /* Each message would have a header = 1 + 2*(no of IS) + data */
122:   for (i=0; i<nrqs; i++) {
123:     j      = pa[i];
124:     w1[j] += w2[j] + 2*w3[j];
125:     msz   += w1[j];
126:   }

128:   /* Determine the number of messages to expect, their lengths, from from-ids */
129:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
130:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

132:   /* Now post the Irecvs corresponding to these messages */
133:   PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);

135:   /* Allocate Memory for outgoing messages */
136:   PetscMalloc4(size,&outdat,size,&ptr,msz,&tmp,size,&ctr);
137:   PetscMemzero(outdat,size*sizeof(PetscInt*));
138:   PetscMemzero(ptr,size*sizeof(PetscInt*));

140:   {
141:     PetscInt *iptr = tmp,ict  = 0;
142:     for (i=0; i<nrqs; i++) {
143:       j         = pa[i];
144:       iptr     +=  ict;
145:       outdat[j] = iptr;
146:       ict       = w1[j];
147:     }
148:   }

150:   /* Form the outgoing messages */
151:   /*plug in the headers*/
152:   for (i=0; i<nrqs; i++) {
153:     j            = pa[i];
154:     outdat[j][0] = 0;
155:     PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));
156:     ptr[j]       = outdat[j] + 2*w3[j] + 1;
157:   }

159:   /* Memory for doing local proc's work*/
160:   {
161:     PetscCalloc5(imax,&table, imax,&data, imax,&isz, M*imax,&d_p, (M/PETSC_BITS_PER_BYTE+1)*imax,&t_p);

163:     for (i=0; i<imax; i++) {
164:       table[i] = t_p + (M/PETSC_BITS_PER_BYTE+1)*i;
165:       data[i]  = d_p + M*i;
166:     }
167:   }

169:   /* Parse the IS and update local tables and the outgoing buf with the data*/
170:   {
171:     PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
172:     PetscBT  table_i;

174:     for (i=0; i<imax; i++) {
175:       PetscMemzero(ctr,size*sizeof(PetscInt));
176:       n_i     = n[i];
177:       table_i = table[i];
178:       idx_i   = idx[i];
179:       data_i  = data[i];
180:       isz_i   = isz[i];
181:       for (j=0; j<n_i; j++) {   /* parse the indices of each IS */
182:         row  = idx_i[j];
183:         PetscLayoutFindOwner(C->rmap,row,&proc);
184:         if (proc != rank) { /* copy to the outgoing buffer */
185:           ctr[proc]++;
186:           *ptr[proc] = row;
187:           ptr[proc]++;
188:         } else if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; /* Update the local table */
189:       }
190:       /* Update the headers for the current IS */
191:       for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
192:         if ((ctr_j = ctr[j])) {
193:           outdat_j        = outdat[j];
194:           k               = ++outdat_j[0];
195:           outdat_j[2*k]   = ctr_j;
196:           outdat_j[2*k-1] = i;
197:         }
198:       }
199:       isz[i] = isz_i;
200:     }
201:   }

203:   /*  Now  post the sends */
204:   PetscMalloc1((nrqs+1),&s_waits1);
205:   for (i=0; i<nrqs; ++i) {
206:     j    = pa[i];
207:     MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);
208:   }

210:   /* No longer need the original indices*/
211:   for (i=0; i<imax; ++i) {
212:     ISRestoreIndices(is[i],idx+i);
213:   }
214:   PetscFree2(idx,n);

216:   for (i=0; i<imax; ++i) {
217:     ISDestroy(&is[i]);
218:   }

220:   /* Do Local work*/
221:   MatIncreaseOverlap_MPIAIJ_Local(C,imax,table,isz,data);

223:   /* Receive messages*/
224:   PetscMalloc1((nrqr+1),&recv_status);
225:   if (nrqr) {MPI_Waitall(nrqr,r_waits1,recv_status);}

227:   PetscMalloc1((nrqs+1),&s_status);
228:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status);}

230:   /* Phase 1 sends are complete - deallocate buffers */
231:   PetscFree4(outdat,ptr,tmp,ctr);
232:   PetscFree4(w1,w2,w3,w4);

234:   PetscMalloc1((nrqr+1),&xdata);
235:   PetscMalloc1((nrqr+1),&isz1);
236:   MatIncreaseOverlap_MPIAIJ_Receive(C,nrqr,rbuf,xdata,isz1);
237:   PetscFree(rbuf[0]);
238:   PetscFree(rbuf);


241:   /* Send the data back*/
242:   /* Do a global reduction to know the buffer space req for incoming messages*/
243:   {
244:     PetscMPIInt *rw1;

246:     PetscCalloc1(size,&rw1);

248:     for (i=0; i<nrqr; ++i) {
249:       proc = recv_status[i].MPI_SOURCE;

251:       if (proc != onodes1[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
252:       rw1[proc] = isz1[i];
253:     }
254:     PetscFree(onodes1);
255:     PetscFree(olengths1);

257:     /* Determine the number of messages to expect, their lengths, from from-ids */
258:     PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);
259:     PetscFree(rw1);
260:   }
261:   /* Now post the Irecvs corresponding to these messages */
262:   PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);

264:   /*  Now  post the sends */
265:   PetscMalloc1((nrqr+1),&s_waits2);
266:   for (i=0; i<nrqr; ++i) {
267:     j    = recv_status[i].MPI_SOURCE;
268:     MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);
269:   }

271:   /* receive work done on other processors*/
272:   {
273:     PetscInt    is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
274:     PetscMPIInt idex;
275:     PetscBT     table_i;
276:     MPI_Status  *status2;

278:     PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);
279:     for (i=0; i<nrqs; ++i) {
280:       MPI_Waitany(nrqs,r_waits2,&idex,status2+i);
281:       /* Process the message*/
282:       rbuf2_i = rbuf2[idex];
283:       ct1     = 2*rbuf2_i[0]+1;
284:       jmax    = rbuf2[idex][0];
285:       for (j=1; j<=jmax; j++) {
286:         max     = rbuf2_i[2*j];
287:         is_no   = rbuf2_i[2*j-1];
288:         isz_i   = isz[is_no];
289:         data_i  = data[is_no];
290:         table_i = table[is_no];
291:         for (k=0; k<max; k++,ct1++) {
292:           row = rbuf2_i[ct1];
293:           if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row;
294:         }
295:         isz[is_no] = isz_i;
296:       }
297:     }

299:     if (nrqr) {MPI_Waitall(nrqr,s_waits2,status2);}
300:     PetscFree(status2);
301:   }

303:   for (i=0; i<imax; ++i) {
304:     ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],PETSC_COPY_VALUES,is+i);
305:   }

307:   PetscFree(onodes2);
308:   PetscFree(olengths2);

310:   PetscFree(pa);
311:   PetscFree(rbuf2[0]);
312:   PetscFree(rbuf2);
313:   PetscFree(s_waits1);
314:   PetscFree(r_waits1);
315:   PetscFree(s_waits2);
316:   PetscFree(r_waits2);
317:   PetscFree5(table,data,isz,d_p,t_p);
318:   PetscFree(s_status);
319:   PetscFree(recv_status);
320:   PetscFree(xdata[0]);
321:   PetscFree(xdata);
322:   PetscFree(isz1);
323:   return(0);
324: }

328: /*
329:    MatIncreaseOverlap_MPIAIJ_Local - Called by MatincreaseOverlap, to do
330:        the work on the local processor.

332:      Inputs:
333:       C      - MAT_MPIAIJ;
334:       imax - total no of index sets processed at a time;
335:       table  - an array of char - size = m bits.

337:      Output:
338:       isz    - array containing the count of the solution elements corresponding
339:                to each index set;
340:       data   - pointer to the solutions
341: */
342: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
343: {
344:   Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
345:   Mat        A  = c->A,B = c->B;
346:   Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
347:   PetscInt   start,end,val,max,rstart,cstart,*ai,*aj;
348:   PetscInt   *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
349:   PetscBT    table_i;

352:   rstart = C->rmap->rstart;
353:   cstart = C->cmap->rstart;
354:   ai     = a->i;
355:   aj     = a->j;
356:   bi     = b->i;
357:   bj     = b->j;
358:   garray = c->garray;


361:   for (i=0; i<imax; i++) {
362:     data_i  = data[i];
363:     table_i = table[i];
364:     isz_i   = isz[i];
365:     for (j=0,max=isz[i]; j<max; j++) {
366:       row   = data_i[j] - rstart;
367:       start = ai[row];
368:       end   = ai[row+1];
369:       for (k=start; k<end; k++) { /* Amat */
370:         val = aj[k] + cstart;
371:         if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
372:       }
373:       start = bi[row];
374:       end   = bi[row+1];
375:       for (k=start; k<end; k++) { /* Bmat */
376:         val = garray[bj[k]];
377:         if (!PetscBTLookupSet(table_i,val)) data_i[isz_i++] = val;
378:       }
379:     }
380:     isz[i] = isz_i;
381:   }
382:   return(0);
383: }

387: /*
388:       MatIncreaseOverlap_MPIAIJ_Receive - Process the recieved messages,
389:          and return the output

391:          Input:
392:            C    - the matrix
393:            nrqr - no of messages being processed.
394:            rbuf - an array of pointers to the recieved requests

396:          Output:
397:            xdata - array of messages to be sent back
398:            isz1  - size of each message

400:   For better efficiency perhaps we should malloc separately each xdata[i],
401: then if a remalloc is required we need only copy the data for that one row
402: rather then all previous rows as it is now where a single large chunck of
403: memory is used.

405: */
406: static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
407: {
408:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
409:   Mat            A  = c->A,B = c->B;
410:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data;
412:   PetscInt       rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
413:   PetscInt       row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
414:   PetscInt       val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr;
415:   PetscInt       *rbuf_i,kmax,rbuf_0;
416:   PetscBT        xtable;

419:   m      = C->rmap->N;
420:   rstart = C->rmap->rstart;
421:   cstart = C->cmap->rstart;
422:   ai     = a->i;
423:   aj     = a->j;
424:   bi     = b->i;
425:   bj     = b->j;
426:   garray = c->garray;


429:   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
430:     rbuf_i =  rbuf[i];
431:     rbuf_0 =  rbuf_i[0];
432:     ct    += rbuf_0;
433:     for (j=1; j<=rbuf_0; j++) total_sz += rbuf_i[2*j];
434:   }

436:   if (C->rmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n;
437:   else max1 = 1;
438:   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
439:   PetscMalloc1(mem_estimate,&xdata[0]);
440:   ++no_malloc;
441:   PetscBTCreate(m,&xtable);
442:   PetscMemzero(isz1,nrqr*sizeof(PetscInt));

444:   ct3 = 0;
445:   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
446:     rbuf_i =  rbuf[i];
447:     rbuf_0 =  rbuf_i[0];
448:     ct1    =  2*rbuf_0+1;
449:     ct2    =  ct1;
450:     ct3   += ct1;
451:     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
452:       PetscBTMemzero(m,xtable);
453:       oct2 = ct2;
454:       kmax = rbuf_i[2*j];
455:       for (k=0; k<kmax; k++,ct1++) {
456:         row = rbuf_i[ct1];
457:         if (!PetscBTLookupSet(xtable,row)) {
458:           if (!(ct3 < mem_estimate)) {
459:             new_estimate = (PetscInt)(1.5*mem_estimate)+1;
460:             PetscMalloc1(new_estimate,&tmp);
461:             PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
462:             PetscFree(xdata[0]);
463:             xdata[0]     = tmp;
464:             mem_estimate = new_estimate; ++no_malloc;
465:             for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
466:           }
467:           xdata[i][ct2++] = row;
468:           ct3++;
469:         }
470:       }
471:       for (k=oct2,max2=ct2; k<max2; k++) {
472:         row   = xdata[i][k] - rstart;
473:         start = ai[row];
474:         end   = ai[row+1];
475:         for (l=start; l<end; l++) {
476:           val = aj[l] + cstart;
477:           if (!PetscBTLookupSet(xtable,val)) {
478:             if (!(ct3 < mem_estimate)) {
479:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
480:               PetscMalloc1(new_estimate,&tmp);
481:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
482:               PetscFree(xdata[0]);
483:               xdata[0]     = tmp;
484:               mem_estimate = new_estimate; ++no_malloc;
485:               for (ctr=1; ctr<=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
486:             }
487:             xdata[i][ct2++] = val;
488:             ct3++;
489:           }
490:         }
491:         start = bi[row];
492:         end   = bi[row+1];
493:         for (l=start; l<end; l++) {
494:           val = garray[bj[l]];
495:           if (!PetscBTLookupSet(xtable,val)) {
496:             if (!(ct3 < mem_estimate)) {
497:               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
498:               PetscMalloc1(new_estimate,&tmp);
499:               PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));
500:               PetscFree(xdata[0]);
501:               xdata[0]     = tmp;
502:               mem_estimate = new_estimate; ++no_malloc;
503:               for (ctr =1; ctr <=i; ctr++) xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];
504:             }
505:             xdata[i][ct2++] = val;
506:             ct3++;
507:           }
508:         }
509:       }
510:       /* Update the header*/
511:       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
512:       xdata[i][2*j-1] = rbuf_i[2*j-1];
513:     }
514:     xdata[i][0] = rbuf_0;
515:     xdata[i+1]  = xdata[i] + ct2;
516:     isz1[i]     = ct2; /* size of each message */
517:   }
518:   PetscBTDestroy(&xtable);
519:   PetscInfo3(C,"Allocated %D bytes, required %D bytes, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);
520:   return(0);
521: }
522: /* -------------------------------------------------------------------------*/
523: extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
524: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat,MatAssemblyType);
525: /*
526:     Every processor gets the entire matrix
527: */
530: PetscErrorCode MatGetSubMatrix_MPIAIJ_All(Mat A,MatGetSubMatrixOption flag,MatReuse scall,Mat *Bin[])
531: {
532:   Mat            B;
533:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
534:   Mat_SeqAIJ     *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data;
536:   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
537:   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j;
538:   PetscInt       m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
539:   MatScalar      *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf;

542:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
543:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);

545:   if (scall == MAT_INITIAL_MATRIX) {
546:     /* ----------------------------------------------------------------
547:          Tell every processor the number of nonzeros per row
548:     */
549:     PetscMalloc1(A->rmap->N,&lens);
550:     for (i=A->rmap->rstart; i<A->rmap->rend; i++) {
551:       lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart];
552:     }
553:     sendcount = A->rmap->rend - A->rmap->rstart;
554:     PetscMalloc2(size,&recvcounts,size,&displs);
555:     for (i=0; i<size; i++) {
556:       recvcounts[i] = A->rmap->range[i+1] - A->rmap->range[i];
557:       displs[i]     = A->rmap->range[i];
558:     }
559: #if defined(PETSC_HAVE_MPI_IN_PLACE)
560:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
561: #else
562:     MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
563: #endif
564:     /* ---------------------------------------------------------------
565:          Create the sequential matrix of the same type as the local block diagonal
566:     */
567:     MatCreate(PETSC_COMM_SELF,&B);
568:     MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);
569:     MatSetBlockSizesFromMats(B,A,A);
570:     MatSetType(B,((PetscObject)a->A)->type_name);
571:     MatSeqAIJSetPreallocation(B,0,lens);
572:     PetscMalloc(sizeof(Mat),Bin);
573:     **Bin = B;
574:     b     = (Mat_SeqAIJ*)B->data;

576:     /*--------------------------------------------------------------------
577:        Copy my part of matrix column indices over
578:     */
579:     sendcount  = ad->nz + bd->nz;
580:     jsendbuf   = b->j + b->i[rstarts[rank]];
581:     a_jsendbuf = ad->j;
582:     b_jsendbuf = bd->j;
583:     n          = A->rmap->rend - A->rmap->rstart;
584:     cnt        = 0;
585:     for (i=0; i<n; i++) {

587:       /* put in lower diagonal portion */
588:       m = bd->i[i+1] - bd->i[i];
589:       while (m > 0) {
590:         /* is it above diagonal (in bd (compressed) numbering) */
591:         if (garray[*b_jsendbuf] > A->rmap->rstart + i) break;
592:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
593:         m--;
594:       }

596:       /* put in diagonal portion */
597:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
598:         jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++;
599:       }

601:       /* put in upper diagonal portion */
602:       while (m-- > 0) {
603:         jsendbuf[cnt++] = garray[*b_jsendbuf++];
604:       }
605:     }
606:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

608:     /*--------------------------------------------------------------------
609:        Gather all column indices to all processors
610:     */
611:     for (i=0; i<size; i++) {
612:       recvcounts[i] = 0;
613:       for (j=A->rmap->range[i]; j<A->rmap->range[i+1]; j++) {
614:         recvcounts[i] += lens[j];
615:       }
616:     }
617:     displs[0] = 0;
618:     for (i=1; i<size; i++) {
619:       displs[i] = displs[i-1] + recvcounts[i-1];
620:     }
621: #if defined(PETSC_HAVE_MPI_IN_PLACE)
622:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
623: #else
624:     MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));
625: #endif
626:     /*--------------------------------------------------------------------
627:         Assemble the matrix into useable form (note numerical values not yet set)
628:     */
629:     /* set the b->ilen (length of each row) values */
630:     PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));
631:     /* set the b->i indices */
632:     b->i[0] = 0;
633:     for (i=1; i<=A->rmap->N; i++) {
634:       b->i[i] = b->i[i-1] + lens[i-1];
635:     }
636:     PetscFree(lens);
637:     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
638:     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

640:   } else {
641:     B = **Bin;
642:     b = (Mat_SeqAIJ*)B->data;
643:   }

645:   /*--------------------------------------------------------------------
646:        Copy my part of matrix numerical values into the values location
647:   */
648:   if (flag == MAT_GET_VALUES) {
649:     sendcount = ad->nz + bd->nz;
650:     sendbuf   = b->a + b->i[rstarts[rank]];
651:     a_sendbuf = ad->a;
652:     b_sendbuf = bd->a;
653:     b_sendj   = bd->j;
654:     n         = A->rmap->rend - A->rmap->rstart;
655:     cnt       = 0;
656:     for (i=0; i<n; i++) {

658:       /* put in lower diagonal portion */
659:       m = bd->i[i+1] - bd->i[i];
660:       while (m > 0) {
661:         /* is it above diagonal (in bd (compressed) numbering) */
662:         if (garray[*b_sendj] > A->rmap->rstart + i) break;
663:         sendbuf[cnt++] = *b_sendbuf++;
664:         m--;
665:         b_sendj++;
666:       }

668:       /* put in diagonal portion */
669:       for (j=ad->i[i]; j<ad->i[i+1]; j++) {
670:         sendbuf[cnt++] = *a_sendbuf++;
671:       }

673:       /* put in upper diagonal portion */
674:       while (m-- > 0) {
675:         sendbuf[cnt++] = *b_sendbuf++;
676:         b_sendj++;
677:       }
678:     }
679:     if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);

681:     /* -----------------------------------------------------------------
682:        Gather all numerical values to all processors
683:     */
684:     if (!recvcounts) {
685:       PetscMalloc2(size,&recvcounts,size,&displs);
686:     }
687:     for (i=0; i<size; i++) {
688:       recvcounts[i] = b->i[rstarts[i+1]] - b->i[rstarts[i]];
689:     }
690:     displs[0] = 0;
691:     for (i=1; i<size; i++) {
692:       displs[i] = displs[i-1] + recvcounts[i-1];
693:     }
694:     recvbuf = b->a;
695: #if defined(PETSC_HAVE_MPI_IN_PLACE)
696:     MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
697: #else
698:     MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));
699: #endif
700:   }  /* endof (flag == MAT_GET_VALUES) */
701:   PetscFree2(recvcounts,displs);

703:   if (A->symmetric) {
704:     MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
705:   } else if (A->hermitian) {
706:     MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
707:   } else if (A->structurally_symmetric) {
708:     MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
709:   }
710:   return(0);
711: }



717: PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
718: {
720:   PetscInt       nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol;
721:   PetscBool      rowflag,colflag,wantallmatrix=PETSC_FALSE,twantallmatrix,*allcolumns;

724:   /* Currently, unsorted column indices will result in inverted column indices in the resulting submatrices. */
725:   /* It would make sense to error out in MatGetSubMatrices_MPIAIJ_Local(), the most impl-specific level.
726:      However, there are more careful users of MatGetSubMatrices_MPIAIJ_Local() -- MatPermute_MPIAIJ() -- that
727:      take care to order the result correctly by assembling it with MatSetValues() (after preallocating).
728:    */
729:   for (i = 0; i < ismax; ++i) {
730:     PetscBool sorted;
731:     ISSorted(iscol[i], &sorted);
732:     if (!sorted) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_SUP, "Column index set %D not sorted", i);
733:   }

735:   /*
736:        Check for special case: each processor gets entire matrix
737:   */
738:   if (ismax == 1 && C->rmap->N == C->cmap->N) {
739:     ISIdentity(*isrow,&rowflag);
740:     ISIdentity(*iscol,&colflag);
741:     ISGetLocalSize(*isrow,&nrow);
742:     ISGetLocalSize(*iscol,&ncol);
743:     if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) {
744:       wantallmatrix = PETSC_TRUE;

746:       PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,NULL);
747:     }
748:   }
749:   MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPIU_BOOL,MPI_MIN,PetscObjectComm((PetscObject)C));
750:   if (twantallmatrix) {
751:     MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);
752:     return(0);
753:   }

755:   /* Allocate memory to hold all the submatrices */
756:   if (scall != MAT_REUSE_MATRIX) {
757:     PetscMalloc1((ismax+1),submat);
758:   }

760:   /* Check for special case: each processor gets entire matrix columns */
761:   PetscMalloc1((ismax+1),&allcolumns);
762:   for (i=0; i<ismax; i++) {
763:     ISIdentity(iscol[i],&colflag);
764:     ISGetLocalSize(iscol[i],&ncol);
765:     if (colflag && ncol == C->cmap->N) {
766:       allcolumns[i] = PETSC_TRUE;
767:     } else {
768:       allcolumns[i] = PETSC_FALSE;
769:     }
770:   }

772:   /* Determine the number of stages through which submatrices are done */
773:   nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt));

775:   /*
776:      Each stage will extract nmax submatrices.
777:      nmax is determined by the matrix column dimension.
778:      If the original matrix has 20M columns, only one submatrix per stage is allowed, etc.
779:   */
780:   if (!nmax) nmax = 1;
781:   nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0);

783:   /* Make sure every processor loops through the nstages */
784:   MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));

786:   for (i=0,pos=0; i<nstages; i++) {
787:     if (pos+nmax <= ismax) max_no = nmax;
788:     else if (pos == ismax) max_no = 0;
789:     else                   max_no = ismax-pos;
790:     MatGetSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,allcolumns+pos,*submat+pos);
791:     pos += max_no;
792:   }

794:   PetscFree(allcolumns);
795:   return(0);
796: }

798: /* -------------------------------------------------------------------------*/
801: PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool *allcolumns,Mat *submats)
802: {
803:   Mat_MPIAIJ     *c = (Mat_MPIAIJ*)C->data;
804:   Mat            A  = c->A;
805:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat;
806:   const PetscInt **icol,**irow;
807:   PetscInt       *nrow,*ncol,start;
809:   PetscMPIInt    rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr;
810:   PetscInt       **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc;
811:   PetscInt       nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
812:   PetscInt       **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
813:   PetscInt       **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
814: #if defined(PETSC_USE_CTABLE)
815:   PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i;
816: #else
817:   PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i;
818: #endif
819:   const PetscInt *irow_i;
820:   PetscInt       ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
821:   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
822:   MPI_Request    *r_waits4,*s_waits3,*s_waits4;
823:   MPI_Status     *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
824:   MPI_Status     *r_status3,*r_status4,*s_status4;
825:   MPI_Comm       comm;
826:   PetscScalar    **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i;
827:   PetscMPIInt    *onodes1,*olengths1;
828:   PetscMPIInt    idex,idex2,end;

831:   PetscObjectGetComm((PetscObject)C,&comm);
832:   tag0 = ((PetscObject)C)->tag;
833:   size = c->size;
834:   rank = c->rank;

836:   /* Get some new tags to keep the communication clean */
837:   PetscObjectGetNewTag((PetscObject)C,&tag1);
838:   PetscObjectGetNewTag((PetscObject)C,&tag2);
839:   PetscObjectGetNewTag((PetscObject)C,&tag3);

841:   PetscMalloc4(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol);

843:   for (i=0; i<ismax; i++) {
844:     ISGetIndices(isrow[i],&irow[i]);
845:     ISGetLocalSize(isrow[i],&nrow[i]);
846:     if (allcolumns[i]) {
847:       icol[i] = NULL;
848:       ncol[i] = C->cmap->N;
849:     } else {
850:       ISGetIndices(iscol[i],&icol[i]);
851:       ISGetLocalSize(iscol[i],&ncol[i]);
852:     }
853:   }

855:   /* evaluate communication - mesg to who, length of mesg, and buffer space
856:      required. Based on this, buffers are allocated, and data copied into them*/
857:   PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);   /* mesg size */
858:   PetscMemzero(w1,size*sizeof(PetscMPIInt));   /* initialize work vector*/
859:   PetscMemzero(w2,size*sizeof(PetscMPIInt));   /* initialize work vector*/
860:   PetscMemzero(w3,size*sizeof(PetscMPIInt));   /* initialize work vector*/
861:   for (i=0; i<ismax; i++) {
862:     PetscMemzero(w4,size*sizeof(PetscMPIInt)); /* initialize work vector*/
863:     jmax   = nrow[i];
864:     irow_i = irow[i];
865:     for (j=0; j<jmax; j++) {
866:       l   = 0;
867:       row = irow_i[j];
868:       while (row >= C->rmap->range[l+1]) l++;
869:       proc = l;
870:       w4[proc]++;
871:     }
872:     for (j=0; j<size; j++) {
873:       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
874:     }
875:   }

877:   nrqs     = 0;              /* no of outgoing messages */
878:   msz      = 0;              /* total mesg length (for all procs) */
879:   w1[rank] = 0;              /* no mesg sent to self */
880:   w3[rank] = 0;
881:   for (i=0; i<size; i++) {
882:     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
883:   }
884:   PetscMalloc1((nrqs+1),&pa); /*(proc -array)*/
885:   for (i=0,j=0; i<size; i++) {
886:     if (w1[i]) { pa[j] = i; j++; }
887:   }

889:   /* Each message would have a header = 1 + 2*(no of IS) + data */
890:   for (i=0; i<nrqs; i++) {
891:     j      = pa[i];
892:     w1[j] += w2[j] + 2* w3[j];
893:     msz   += w1[j];
894:   }
895:   PetscInfo2(0,"Number of outgoing messages %D Total message length %D\n",nrqs,msz);

897:   /* Determine the number of messages to expect, their lengths, from from-ids */
898:   PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);
899:   PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);

901:   /* Now post the Irecvs corresponding to these messages */
902:   PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);

904:   PetscFree(onodes1);
905:   PetscFree(olengths1);

907:   /* Allocate Memory for outgoing messages */
908:   PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);
909:   PetscMemzero(sbuf1,size*sizeof(PetscInt*));
910:   PetscMemzero(ptr,size*sizeof(PetscInt*));

912:   {
913:     PetscInt *iptr = tmp,ict = 0;
914:     for (i=0; i<nrqs; i++) {
915:       j        = pa[i];
916:       iptr    += ict;
917:       sbuf1[j] = iptr;
918:       ict      = w1[j];
919:     }
920:   }

922:   /* Form the outgoing messages */
923:   /* Initialize the header space */
924:   for (i=0; i<nrqs; i++) {
925:     j           = pa[i];
926:     sbuf1[j][0] = 0;
927:     PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));
928:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
929:   }

931:   /* Parse the isrow and copy data into outbuf */
932:   for (i=0; i<ismax; i++) {
933:     PetscMemzero(ctr,size*sizeof(PetscInt));
934:     irow_i = irow[i];
935:     jmax   = nrow[i];
936:     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
937:       l   = 0;
938:       row = irow_i[j];
939:       while (row >= C->rmap->range[l+1]) l++;
940:       proc = l;
941:       if (proc != rank) { /* copy to the outgoing buf*/
942:         ctr[proc]++;
943:         *ptr[proc] = row;
944:         ptr[proc]++;
945:       }
946:     }
947:     /* Update the headers for the current IS */
948:     for (j=0; j<size; j++) { /* Can Optimise this loop too */
949:       if ((ctr_j = ctr[j])) {
950:         sbuf1_j        = sbuf1[j];
951:         k              = ++sbuf1_j[0];
952:         sbuf1_j[2*k]   = ctr_j;
953:         sbuf1_j[2*k-1] = i;
954:       }
955:     }
956:   }

958:   /*  Now  post the sends */
959:   PetscMalloc1((nrqs+1),&s_waits1);
960:   for (i=0; i<nrqs; ++i) {
961:     j    = pa[i];
962:     MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);
963:   }

965:   /* Post Receives to capture the buffer size */
966:   PetscMalloc1((nrqs+1),&r_waits2);
967:   PetscMalloc1((nrqs+1),&rbuf2);
968:   rbuf2[0] = tmp + msz;
969:   for (i=1; i<nrqs; ++i) {
970:     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
971:   }
972:   for (i=0; i<nrqs; ++i) {
973:     j    = pa[i];
974:     MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);
975:   }

977:   /* Send to other procs the buf size they should allocate */


980:   /* Receive messages*/
981:   PetscMalloc1((nrqr+1),&s_waits2);
982:   PetscMalloc1((nrqr+1),&r_status1);
983:   PetscMalloc3(nrqr,&sbuf2,nrqr,&req_size,nrqr,&req_source);
984:   {
985:     Mat_SeqAIJ *sA  = (Mat_SeqAIJ*)c->A->data,*sB = (Mat_SeqAIJ*)c->B->data;
986:     PetscInt   *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart;
987:     PetscInt   *sbuf2_i;

989:     for (i=0; i<nrqr; ++i) {
990:       MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);

992:       req_size[idex] = 0;
993:       rbuf1_i        = rbuf1[idex];
994:       start          = 2*rbuf1_i[0] + 1;
995:       MPI_Get_count(r_status1+i,MPIU_INT,&end);
996:       PetscMalloc1((end+1),&sbuf2[idex]);
997:       sbuf2_i        = sbuf2[idex];
998:       for (j=start; j<end; j++) {
999:         id              = rbuf1_i[j] - rstart;
1000:         ncols           = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
1001:         sbuf2_i[j]      = ncols;
1002:         req_size[idex] += ncols;
1003:       }
1004:       req_source[idex] = r_status1[i].MPI_SOURCE;
1005:       /* form the header */
1006:       sbuf2_i[0] = req_size[idex];
1007:       for (j=1; j<start; j++) sbuf2_i[j] = rbuf1_i[j];

1009:       MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);
1010:     }
1011:   }
1012:   PetscFree(r_status1);
1013:   PetscFree(r_waits1);

1015:   /*  recv buffer sizes */
1016:   /* Receive messages*/

1018:   PetscMalloc1((nrqs+1),&rbuf3);
1019:   PetscMalloc1((nrqs+1),&rbuf4);
1020:   PetscMalloc1((nrqs+1),&r_waits3);
1021:   PetscMalloc1((nrqs+1),&r_waits4);
1022:   PetscMalloc1((nrqs+1),&r_status2);

1024:   for (i=0; i<nrqs; ++i) {
1025:     MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);
1026:     PetscMalloc1((rbuf2[idex][0]+1),&rbuf3[idex]);
1027:     PetscMalloc1((rbuf2[idex][0]+1),&rbuf4[idex]);
1028:     MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);
1029:     MPI_Irecv(rbuf4[idex],rbuf2[idex][0],MPIU_SCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);
1030:   }
1031:   PetscFree(r_status2);
1032:   PetscFree(r_waits2);

1034:   /* Wait on sends1 and sends2 */
1035:   PetscMalloc1((nrqs+1),&s_status1);
1036:   PetscMalloc1((nrqr+1),&s_status2);

1038:   if (nrqs) {MPI_Waitall(nrqs,s_waits1,s_status1);}
1039:   if (nrqr) {MPI_Waitall(nrqr,s_waits2,s_status2);}
1040:   PetscFree(s_status1);
1041:   PetscFree(s_status2);
1042:   PetscFree(s_waits1);
1043:   PetscFree(s_waits2);

1045:   /* Now allocate buffers for a->j, and send them off */
1046:   PetscMalloc1((nrqr+1),&sbuf_aj);
1047:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1048:   PetscMalloc1((j+1),&sbuf_aj[0]);
1049:   for (i=1; i<nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];

1051:   PetscMalloc1((nrqr+1),&s_waits3);
1052:   {
1053:     PetscInt nzA,nzB,*a_i = a->i,*b_i = b->i,lwrite;
1054:     PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1055:     PetscInt cend = C->cmap->rend;
1056:     PetscInt *a_j = a->j,*b_j = b->j,ctmp;

1058:     for (i=0; i<nrqr; i++) {
1059:       rbuf1_i   = rbuf1[i];
1060:       sbuf_aj_i = sbuf_aj[i];
1061:       ct1       = 2*rbuf1_i[0] + 1;
1062:       ct2       = 0;
1063:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1064:         kmax = rbuf1[i][2*j];
1065:         for (k=0; k<kmax; k++,ct1++) {
1066:           row    = rbuf1_i[ct1] - rstart;
1067:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1068:           ncols  = nzA + nzB;
1069:           cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];

1071:           /* load the column indices for this row into cols*/
1072:           cols = sbuf_aj_i + ct2;

1074:           lwrite = 0;
1075:           for (l=0; l<nzB; l++) {
1076:             if ((ctmp = bmap[cworkB[l]]) < cstart) cols[lwrite++] = ctmp;
1077:           }
1078:           for (l=0; l<nzA; l++) cols[lwrite++] = cstart + cworkA[l];
1079:           for (l=0; l<nzB; l++) {
1080:             if ((ctmp = bmap[cworkB[l]]) >= cend) cols[lwrite++] = ctmp;
1081:           }

1083:           ct2 += ncols;
1084:         }
1085:       }
1086:       MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);
1087:     }
1088:   }
1089:   PetscMalloc1((nrqs+1),&r_status3);
1090:   PetscMalloc1((nrqr+1),&s_status3);

1092:   /* Allocate buffers for a->a, and send them off */
1093:   PetscMalloc1((nrqr+1),&sbuf_aa);
1094:   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
1095:   PetscMalloc1((j+1),&sbuf_aa[0]);
1096:   for (i=1; i<nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1];

1098:   PetscMalloc1((nrqr+1),&s_waits4);
1099:   {
1100:     PetscInt    nzA,nzB,*a_i = a->i,*b_i = b->i, *cworkB,lwrite;
1101:     PetscInt    cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray;
1102:     PetscInt    cend   = C->cmap->rend;
1103:     PetscInt    *b_j   = b->j;
1104:     PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a;

1106:     for (i=0; i<nrqr; i++) {
1107:       rbuf1_i   = rbuf1[i];
1108:       sbuf_aa_i = sbuf_aa[i];
1109:       ct1       = 2*rbuf1_i[0]+1;
1110:       ct2       = 0;
1111:       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
1112:         kmax = rbuf1_i[2*j];
1113:         for (k=0; k<kmax; k++,ct1++) {
1114:           row    = rbuf1_i[ct1] - rstart;
1115:           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1116:           ncols  = nzA + nzB;
1117:           cworkB = b_j + b_i[row];
1118:           vworkA = a_a + a_i[row];
1119:           vworkB = b_a + b_i[row];

1121:           /* load the column values for this row into vals*/
1122:           vals = sbuf_aa_i+ct2;

1124:           lwrite = 0;
1125:           for (l=0; l<nzB; l++) {
1126:             if ((bmap[cworkB[l]]) < cstart) vals[lwrite++] = vworkB[l];
1127:           }
1128:           for (l=0; l<nzA; l++) vals[lwrite++] = vworkA[l];
1129:           for (l=0; l<nzB; l++) {
1130:             if ((bmap[cworkB[l]]) >= cend) vals[lwrite++] = vworkB[l];
1131:           }

1133:           ct2 += ncols;
1134:         }
1135:       }
1136:       MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);
1137:     }
1138:   }
1139:   PetscFree(rbuf1[0]);
1140:   PetscFree(rbuf1);
1141:   PetscMalloc1((nrqs+1),&r_status4);
1142:   PetscMalloc1((nrqr+1),&s_status4);

1144:   /* Form the matrix */
1145:   /* create col map: global col of C -> local col of submatrices */
1146:   {
1147:     const PetscInt *icol_i;
1148: #if defined(PETSC_USE_CTABLE)
1149:     PetscMalloc1((1+ismax),&cmap);
1150:     for (i=0; i<ismax; i++) {
1151:       if (!allcolumns[i]) {
1152:         PetscTableCreate(ncol[i]+1,C->cmap->N+1,&cmap[i]);

1154:         jmax   = ncol[i];
1155:         icol_i = icol[i];
1156:         cmap_i = cmap[i];
1157:         for (j=0; j<jmax; j++) {
1158:           PetscTableAdd(cmap[i],icol_i[j]+1,j+1,INSERT_VALUES);
1159:         }
1160:       } else {
1161:         cmap[i] = NULL;
1162:       }
1163:     }
1164: #else
1165:     PetscMalloc1(ismax,&cmap);
1166:     for (i=0; i<ismax; i++) {
1167:       if (!allcolumns[i]) {
1168:         PetscMalloc1(C->cmap->N,&cmap[i]);
1169:         PetscMemzero(cmap[i],C->cmap->N*sizeof(PetscInt));
1170:         jmax   = ncol[i];
1171:         icol_i = icol[i];
1172:         cmap_i = cmap[i];
1173:         for (j=0; j<jmax; j++) {
1174:           cmap_i[icol_i[j]] = j+1;
1175:         }
1176:       } else {
1177:         cmap[i] = NULL;
1178:       }
1179:     }
1180: #endif
1181:   }

1183:   /* Create lens which is required for MatCreate... */
1184:   for (i=0,j=0; i<ismax; i++) j += nrow[i];
1185:   PetscMalloc1(ismax,&lens);
1186:   if (ismax) {
1187:     PetscMalloc1(j,&lens[0]);
1188:     PetscMemzero(lens[0],j*sizeof(PetscInt));
1189:   }
1190:   for (i=1; i<ismax; i++) lens[i] = lens[i-1] + nrow[i-1];

1192:   /* Update lens from local data */
1193:   for (i=0; i<ismax; i++) {
1194:     jmax = nrow[i];
1195:     if (!allcolumns[i]) cmap_i = cmap[i];
1196:     irow_i = irow[i];
1197:     lens_i = lens[i];
1198:     for (j=0; j<jmax; j++) {
1199:       l   = 0;
1200:       row = irow_i[j];
1201:       while (row >= C->rmap->range[l+1]) l++;
1202:       proc = l;
1203:       if (proc == rank) {
1204:         MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);
1205:         if (!allcolumns[i]) {
1206:           for (k=0; k<ncols; k++) {
1207: #if defined(PETSC_USE_CTABLE)
1208:             PetscTableFind(cmap_i,cols[k]+1,&tcol);
1209: #else
1210:             tcol = cmap_i[cols[k]];
1211: #endif
1212:             if (tcol) lens_i[j]++;
1213:           }
1214:         } else { /* allcolumns */
1215:           lens_i[j] = ncols;
1216:         }
1217:         MatRestoreRow_MPIAIJ(C,row,&ncols,&cols,0);
1218:       }
1219:     }
1220:   }

1222:   /* Create row map: global row of C -> local row of submatrices */
1223: #if defined(PETSC_USE_CTABLE)
1224:   PetscMalloc1((1+ismax),&rmap);
1225:   for (i=0; i<ismax; i++) {
1226:     PetscTableCreate(nrow[i]+1,C->rmap->N+1,&rmap[i]);
1227:     rmap_i = rmap[i];
1228:     irow_i = irow[i];
1229:     jmax   = nrow[i];
1230:     for (j=0; j<jmax; j++) {
1231:       PetscTableAdd(rmap[i],irow_i[j]+1,j+1,INSERT_VALUES);
1232:     }
1233:   }
1234: #else
1235:   PetscMalloc1(ismax,&rmap);
1236:   if (ismax) {
1237:     PetscMalloc1(ismax*C->rmap->N,&rmap[0]);
1238:     PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));
1239:   }
1240:   for (i=1; i<ismax; i++) rmap[i] = rmap[i-1] + C->rmap->N;
1241:   for (i=0; i<ismax; i++) {
1242:     rmap_i = rmap[i];
1243:     irow_i = irow[i];
1244:     jmax   = nrow[i];
1245:     for (j=0; j<jmax; j++) {
1246:       rmap_i[irow_i[j]] = j;
1247:     }
1248:   }
1249: #endif

1251:   /* Update lens from offproc data */
1252:   {
1253:     PetscInt *rbuf2_i,*rbuf3_i,*sbuf1_i;

1255:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1256:       MPI_Waitany(nrqs,r_waits3,&idex2,r_status3+tmp2);
1257:       idex    = pa[idex2];
1258:       sbuf1_i = sbuf1[idex];
1259:       jmax    = sbuf1_i[0];
1260:       ct1     = 2*jmax+1;
1261:       ct2     = 0;
1262:       rbuf2_i = rbuf2[idex2];
1263:       rbuf3_i = rbuf3[idex2];
1264:       for (j=1; j<=jmax; j++) {
1265:         is_no  = sbuf1_i[2*j-1];
1266:         max1   = sbuf1_i[2*j];
1267:         lens_i = lens[is_no];
1268:         if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1269:         rmap_i = rmap[is_no];
1270:         for (k=0; k<max1; k++,ct1++) {
1271: #if defined(PETSC_USE_CTABLE)
1272:           PetscTableFind(rmap_i,sbuf1_i[ct1]+1,&row);
1273:           row--;
1274:           if (row < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"row not found in table");
1275: #else
1276:           row = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1277: #endif
1278:           max2 = rbuf2_i[ct1];
1279:           for (l=0; l<max2; l++,ct2++) {
1280:             if (!allcolumns[is_no]) {
1281: #if defined(PETSC_USE_CTABLE)
1282:               PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1283: #else
1284:               tcol = cmap_i[rbuf3_i[ct2]];
1285: #endif
1286:               if (tcol) lens_i[row]++;
1287:             } else { /* allcolumns */
1288:               lens_i[row]++; /* lens_i[row] += max2 ? */
1289:             }
1290:           }
1291:         }
1292:       }
1293:     }
1294:   }
1295:   PetscFree(r_status3);
1296:   PetscFree(r_waits3);
1297:   if (nrqr) {MPI_Waitall(nrqr,s_waits3,s_status3);}
1298:   PetscFree(s_status3);
1299:   PetscFree(s_waits3);

1301:   /* Create the submatrices */
1302:   if (scall == MAT_REUSE_MATRIX) {
1303:     PetscBool flag;

1305:     /*
1306:         Assumes new rows are same length as the old rows,hence bug!
1307:     */
1308:     for (i=0; i<ismax; i++) {
1309:       mat = (Mat_SeqAIJ*)(submats[i]->data);
1310:       if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1311:       PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);
1312:       if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
1313:       /* Initial matrix as if empty */
1314:       PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));

1316:       submats[i]->factortype = C->factortype;
1317:     }
1318:   } else {
1319:     for (i=0; i<ismax; i++) {
1320:       PetscInt rbs,cbs;
1321:       ISGetBlockSize(isrow[i],&rbs);
1322:       ISGetBlockSize(iscol[i],&cbs);

1324:       MatCreate(PETSC_COMM_SELF,submats+i);
1325:       MatSetSizes(submats[i],nrow[i],ncol[i],PETSC_DETERMINE,PETSC_DETERMINE);

1327:       MatSetBlockSizes(submats[i],rbs,cbs);
1328:       MatSetType(submats[i],((PetscObject)A)->type_name);
1329:       MatSeqAIJSetPreallocation(submats[i],0,lens[i]);
1330:     }
1331:   }

1333:   /* Assemble the matrices */
1334:   /* First assemble the local rows */
1335:   {
1336:     PetscInt    ilen_row,*imat_ilen,*imat_j,*imat_i,old_row;
1337:     PetscScalar *imat_a;

1339:     for (i=0; i<ismax; i++) {
1340:       mat       = (Mat_SeqAIJ*)submats[i]->data;
1341:       imat_ilen = mat->ilen;
1342:       imat_j    = mat->j;
1343:       imat_i    = mat->i;
1344:       imat_a    = mat->a;

1346:       if (!allcolumns[i]) cmap_i = cmap[i];
1347:       rmap_i = rmap[i];
1348:       irow_i = irow[i];
1349:       jmax   = nrow[i];
1350:       for (j=0; j<jmax; j++) {
1351:         l   = 0;
1352:         row = irow_i[j];
1353:         while (row >= C->rmap->range[l+1]) l++;
1354:         proc = l;
1355:         if (proc == rank) {
1356:           old_row = row;
1357: #if defined(PETSC_USE_CTABLE)
1358:           PetscTableFind(rmap_i,row+1,&row);
1359:           row--;
1360: #else
1361:           row = rmap_i[row];
1362: #endif
1363:           ilen_row = imat_ilen[row];
1364:           MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);
1365:           mat_i    = imat_i[row];
1366:           mat_a    = imat_a + mat_i;
1367:           mat_j    = imat_j + mat_i;
1368:           if (!allcolumns[i]) {
1369:             for (k=0; k<ncols; k++) {
1370: #if defined(PETSC_USE_CTABLE)
1371:               PetscTableFind(cmap_i,cols[k]+1,&tcol);
1372: #else
1373:               tcol = cmap_i[cols[k]];
1374: #endif
1375:               if (tcol) {
1376:                 *mat_j++ = tcol - 1;
1377:                 *mat_a++ = vals[k];
1378:                 ilen_row++;
1379:               }
1380:             }
1381:           } else { /* allcolumns */
1382:             for (k=0; k<ncols; k++) {
1383:               *mat_j++ = cols[k];  /* global col index! */
1384:               *mat_a++ = vals[k];
1385:               ilen_row++;
1386:             }
1387:           }
1388:           MatRestoreRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);

1390:           imat_ilen[row] = ilen_row;
1391:         }
1392:       }
1393:     }
1394:   }

1396:   /*   Now assemble the off proc rows*/
1397:   {
1398:     PetscInt    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1399:     PetscInt    *imat_j,*imat_i;
1400:     PetscScalar *imat_a,*rbuf4_i;

1402:     for (tmp2=0; tmp2<nrqs; tmp2++) {
1403:       MPI_Waitany(nrqs,r_waits4,&idex2,r_status4+tmp2);
1404:       idex    = pa[idex2];
1405:       sbuf1_i = sbuf1[idex];
1406:       jmax    = sbuf1_i[0];
1407:       ct1     = 2*jmax + 1;
1408:       ct2     = 0;
1409:       rbuf2_i = rbuf2[idex2];
1410:       rbuf3_i = rbuf3[idex2];
1411:       rbuf4_i = rbuf4[idex2];
1412:       for (j=1; j<=jmax; j++) {
1413:         is_no     = sbuf1_i[2*j-1];
1414:         rmap_i    = rmap[is_no];
1415:         if (!allcolumns[is_no]) cmap_i = cmap[is_no];
1416:         mat       = (Mat_SeqAIJ*)submats[is_no]->data;
1417:         imat_ilen = mat->ilen;
1418:         imat_j    = mat->j;
1419:         imat_i    = mat->i;
1420:         imat_a    = mat->a;
1421:         max1      = sbuf1_i[2*j];
1422:         for (k=0; k<max1; k++,ct1++) {
1423:           row = sbuf1_i[ct1];
1424: #if defined(PETSC_USE_CTABLE)
1425:           PetscTableFind(rmap_i,row+1,&row);
1426:           row--;
1427: #else
1428:           row = rmap_i[row];
1429: #endif
1430:           ilen  = imat_ilen[row];
1431:           mat_i = imat_i[row];
1432:           mat_a = imat_a + mat_i;
1433:           mat_j = imat_j + mat_i;
1434:           max2  = rbuf2_i[ct1];
1435:           if (!allcolumns[is_no]) {
1436:             for (l=0; l<max2; l++,ct2++) {

1438: #if defined(PETSC_USE_CTABLE)
1439:               PetscTableFind(cmap_i,rbuf3_i[ct2]+1,&tcol);
1440: #else
1441:               tcol = cmap_i[rbuf3_i[ct2]];
1442: #endif
1443:               if (tcol) {
1444:                 *mat_j++ = tcol - 1;
1445:                 *mat_a++ = rbuf4_i[ct2];
1446:                 ilen++;
1447:               }
1448:             }
1449:           } else { /* allcolumns */
1450:             for (l=0; l<max2; l++,ct2++) {
1451:               *mat_j++ = rbuf3_i[ct2]; /* same global column index of C */
1452:               *mat_a++ = rbuf4_i[ct2];
1453:               ilen++;
1454:             }
1455:           }
1456:           imat_ilen[row] = ilen;
1457:         }
1458:       }
1459:     }
1460:   }

1462:   PetscFree(r_status4);
1463:   PetscFree(r_waits4);
1464:   if (nrqr) {MPI_Waitall(nrqr,s_waits4,s_status4);}
1465:   PetscFree(s_waits4);
1466:   PetscFree(s_status4);

1468:   /* Restore the indices */
1469:   for (i=0; i<ismax; i++) {
1470:     ISRestoreIndices(isrow[i],irow+i);
1471:     if (!allcolumns[i]) {
1472:       ISRestoreIndices(iscol[i],icol+i);
1473:     }
1474:   }

1476:   /* Destroy allocated memory */
1477:   PetscFree4(irow,icol,nrow,ncol);
1478:   PetscFree4(w1,w2,w3,w4);
1479:   PetscFree(pa);

1481:   PetscFree4(sbuf1,ptr,tmp,ctr);
1482:   PetscFree(rbuf2);
1483:   for (i=0; i<nrqr; ++i) {
1484:     PetscFree(sbuf2[i]);
1485:   }
1486:   for (i=0; i<nrqs; ++i) {
1487:     PetscFree(rbuf3[i]);
1488:     PetscFree(rbuf4[i]);
1489:   }

1491:   PetscFree3(sbuf2,req_size,req_source);
1492:   PetscFree(rbuf3);
1493:   PetscFree(rbuf4);
1494:   PetscFree(sbuf_aj[0]);
1495:   PetscFree(sbuf_aj);
1496:   PetscFree(sbuf_aa[0]);
1497:   PetscFree(sbuf_aa);

1499: #if defined(PETSC_USE_CTABLE)
1500:   for (i=0; i<ismax; i++) {PetscTableDestroy((PetscTable*)&rmap[i]);}
1501: #else
1502:   if (ismax) {PetscFree(rmap[0]);}
1503: #endif
1504:   PetscFree(rmap);

1506:   for (i=0; i<ismax; i++) {
1507:     if (!allcolumns[i]) {
1508: #if defined(PETSC_USE_CTABLE)
1509:       PetscTableDestroy((PetscTable*)&cmap[i]);
1510: #else
1511:       PetscFree(cmap[i]);
1512: #endif
1513:     }
1514:   }
1515:   PetscFree(cmap);
1516:   if (ismax) {PetscFree(lens[0]);}
1517:   PetscFree(lens);

1519:   for (i=0; i<ismax; i++) {
1520:     MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);
1521:     MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);
1522:   }
1523:   return(0);
1524: }

1526: /*
1527:  Observe that the Seq matrices used to construct this MPI matrix are not increfed.
1528:  Be careful not to destroy them elsewhere.
1529:  */
1532: PetscErrorCode MatCreateMPIAIJFromSeqMatrices_Private(MPI_Comm comm, Mat A, Mat B, Mat *C)
1533: {
1534:   /* If making this function public, change the error returned in this function away from _PLIB. */
1536:   Mat_MPIAIJ     *aij;
1537:   PetscBool      seqaij;

1540:   /* Check to make sure the component matrices are compatible with C. */
1541:   PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &seqaij);
1542:   if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Diagonal matrix is of wrong type");
1543:   PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &seqaij);
1544:   if (!seqaij) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal matrix is of wrong type");
1545:   if (PetscAbs(A->rmap->n) != PetscAbs(B->rmap->n) || PetscAbs(A->rmap->bs) != PetscAbs(B->rmap->bs) || PetscAbs(A->cmap->bs) != PetscAbs(B->cmap->bs)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");

1547:   MatCreate(comm, C);
1548:   MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);
1549:   MatSetBlockSizesFromMats(*C,A,A);
1550:   PetscLayoutSetUp((*C)->rmap);
1551:   PetscLayoutSetUp((*C)->cmap);
1552:   if ((*C)->cmap->N != A->cmap->n + B->cmap->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix");
1553:   MatSetType(*C, MATMPIAIJ);
1554:   aij    = (Mat_MPIAIJ*)((*C)->data);
1555:   aij->A = A;
1556:   aij->B = B;
1557:   PetscLogObjectParent((PetscObject)*C,(PetscObject)A);
1558:   PetscLogObjectParent((PetscObject)*C,(PetscObject)B);

1560:   (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated);
1561:   (*C)->assembled    = (PetscBool)(A->assembled && B->assembled);
1562:   return(0);
1563: }

1567: PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B)
1568: {
1569:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data);

1574:   *A = aij->A;
1575:   *B = aij->B;
1576:   /* Note that we don't incref *A and *B, so be careful! */
1577:   return(0);
1578: }

1582: PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[],
1583:                                                  PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**),
1584:                                                  PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*),
1585:                                                  PetscErrorCode(*extractseq)(Mat, Mat*, Mat*))
1586: {
1588:   PetscMPIInt    size, flag;
1589:   PetscInt       i,ii;
1590:   PetscInt       ismax_c;

1593:   if (!ismax) return(0);

1595:   for (i = 0, ismax_c = 0; i < ismax; ++i) {
1596:     MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);
1597:     if (flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator");
1598:     MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1599:     if (size > 1) ++ismax_c;
1600:   }
1601:   if (!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */
1602:     (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);
1603:   } else { /* if (ismax_c) */
1604:     Mat         *A,*B;
1605:     IS          *isrow_c, *iscol_c;
1606:     PetscMPIInt size;
1607:     /*
1608:      Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate
1609:      array of sequential matrices underlying the resulting parallel matrices.
1610:      Which arrays to allocate is based on the value of MatReuse scall.
1611:      There are as many diag matrices as there are original index sets.
1612:      There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets.

1614:      Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists,
1615:      we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq
1616:      will deposite the extracted diag and off-diag parts.
1617:      However, if reuse is taking place, we have to allocate the seq matrix arrays here.
1618:      If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq.
1619:     */

1621:     /* Parallel matrix array is allocated only if no reuse is taking place. */
1622:     if (scall != MAT_REUSE_MATRIX) {
1623:       PetscMalloc1((ismax),submat);
1624:     } else {
1625:       PetscMalloc1(ismax, &A);
1626:       PetscMalloc1(ismax_c, &B);
1627:       /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */
1628:       for (i = 0, ii = 0; i < ismax; ++i) {
1629:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1630:         if (size > 1) {
1631:           (*extractseq)((*submat)[i],A+i,B+ii);
1632:           ++ii;
1633:         } else A[i] = (*submat)[i];
1634:       }
1635:     }
1636:     /*
1637:      Construct the complements of the iscol ISs for parallel ISs only.
1638:      These are used to extract the off-diag portion of the resulting parallel matrix.
1639:      The row IS for the off-diag portion is the same as for the diag portion,
1640:      so we merely alias the row IS, while skipping those that are sequential.
1641:     */
1642:     PetscMalloc2(ismax_c,&isrow_c, ismax_c, &iscol_c);
1643:     for (i = 0, ii = 0; i < ismax; ++i) {
1644:       MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1645:       if (size > 1) {
1646:         isrow_c[ii] = isrow[i];

1648:         ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));
1649:         ++ii;
1650:       }
1651:     }
1652:     /* Now obtain the sequential A and B submatrices separately. */
1653:     (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);
1654:     (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);
1655:     for (ii = 0; ii < ismax_c; ++ii) {
1656:       ISDestroy(&iscol_c[ii]);
1657:     }
1658:     PetscFree2(isrow_c, iscol_c);
1659:     /*
1660:      If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B
1661:      have been extracted directly into the parallel matrices containing them, or
1662:      simply into the sequential matrix identical with the corresponding A (if size == 1).
1663:      Otherwise, make sure that parallel matrices are constructed from A & B, or the
1664:      A is put into the correct submat slot (if size == 1).
1665:      */
1666:     if (scall != MAT_REUSE_MATRIX) {
1667:       for (i = 0, ii = 0; i < ismax; ++i) {
1668:         MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);
1669:         if (size > 1) {
1670:           /*
1671:            For each parallel isrow[i], create parallel matrices from the extracted sequential matrices.
1672:            */
1673:           /* Construct submat[i] from the Seq pieces A and B. */
1674:           (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);

1676:           ++ii;
1677:         } else (*submat)[i] = A[i];
1678:       }
1679:     }
1680:     PetscFree(A);
1681:     PetscFree(B);
1682:   }
1683:   return(0);
1684: } /* MatGetSubMatricesParallel_MPIXAIJ() */



1690: PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
1691: {

1695:   MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,MatCreateMPIAIJFromSeqMatrices_Private,MatMPIAIJExtractSeqMatrices_Private);
1696:   return(0);
1697: }