Actual source code: mpiaij.c

  1: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  2: #include <petsc/private/vecimpl.h>
  3: #include <petsc/private/sfimpl.h>
  4: #include <petsc/private/isimpl.h>
  5: #include <petscblaslapack.h>
  6: #include <petscsf.h>
  7: #include <petsc/private/hashmapi.h>

  9: /*MC
 10:    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.

 12:    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
 13:    and MATMPIAIJ otherwise.  As a result, for single process communicators,
 14:   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
 15:   for communicators controlling multiple processes.  It is recommended that you call both of
 16:   the above preallocation routines for simplicity.

 18:    Options Database Keys:
 19: . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()

 21:   Developer Notes:
 22:     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
 23:    enough exist.

 25:   Level: beginner

 27: .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
 28: M*/

 30: /*MC
 31:    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.

 33:    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
 34:    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
 35:    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
 36:   for communicators controlling multiple processes.  It is recommended that you call both of
 37:   the above preallocation routines for simplicity.

 39:    Options Database Keys:
 40: . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()

 42:   Level: beginner

 44: .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
 45: M*/

 47: static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
 48: {
 49:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

 51: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
 52:   A->boundtocpu = flg;
 53: #endif
 54:   if (a->A) {
 55:     MatBindToCPU(a->A,flg);
 56:   }
 57:   if (a->B) {
 58:     MatBindToCPU(a->B,flg);
 59:   }

 61:   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
 62:    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
 63:    * to differ from the parent matrix. */
 64:   if (a->lvec) {
 65:     VecBindToCPU(a->lvec,flg);
 66:   }
 67:   if (a->diag) {
 68:     VecBindToCPU(a->diag,flg);
 69:   }

 71:   return 0;
 72: }

 74: PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
 75: {
 76:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;

 78:   if (mat->A) {
 79:     MatSetBlockSizes(mat->A,rbs,cbs);
 80:     MatSetBlockSizes(mat->B,rbs,1);
 81:   }
 82:   return 0;
 83: }

 85: PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
 86: {
 87:   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
 88:   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
 89:   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
 90:   const PetscInt  *ia,*ib;
 91:   const MatScalar *aa,*bb,*aav,*bav;
 92:   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
 93:   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;

 95:   *keptrows = NULL;

 97:   ia   = a->i;
 98:   ib   = b->i;
 99:   MatSeqAIJGetArrayRead(mat->A,&aav);
100:   MatSeqAIJGetArrayRead(mat->B,&bav);
101:   for (i=0; i<m; i++) {
102:     na = ia[i+1] - ia[i];
103:     nb = ib[i+1] - ib[i];
104:     if (!na && !nb) {
105:       cnt++;
106:       goto ok1;
107:     }
108:     aa = aav + ia[i];
109:     for (j=0; j<na; j++) {
110:       if (aa[j] != 0.0) goto ok1;
111:     }
112:     bb = bav + ib[i];
113:     for (j=0; j <nb; j++) {
114:       if (bb[j] != 0.0) goto ok1;
115:     }
116:     cnt++;
117: ok1:;
118:   }
119:   MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));
120:   if (!n0rows) {
121:     MatSeqAIJRestoreArrayRead(mat->A,&aav);
122:     MatSeqAIJRestoreArrayRead(mat->B,&bav);
123:     return 0;
124:   }
125:   PetscMalloc1(M->rmap->n-cnt,&rows);
126:   cnt  = 0;
127:   for (i=0; i<m; i++) {
128:     na = ia[i+1] - ia[i];
129:     nb = ib[i+1] - ib[i];
130:     if (!na && !nb) continue;
131:     aa = aav + ia[i];
132:     for (j=0; j<na;j++) {
133:       if (aa[j] != 0.0) {
134:         rows[cnt++] = rstart + i;
135:         goto ok2;
136:       }
137:     }
138:     bb = bav + ib[i];
139:     for (j=0; j<nb; j++) {
140:       if (bb[j] != 0.0) {
141:         rows[cnt++] = rstart + i;
142:         goto ok2;
143:       }
144:     }
145: ok2:;
146:   }
147:   ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);
148:   MatSeqAIJRestoreArrayRead(mat->A,&aav);
149:   MatSeqAIJRestoreArrayRead(mat->B,&bav);
150:   return 0;
151: }

153: PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
154: {
155:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
156:   PetscBool         cong;

158:   MatHasCongruentLayouts(Y,&cong);
159:   if (Y->assembled && cong) {
160:     MatDiagonalSet(aij->A,D,is);
161:   } else {
162:     MatDiagonalSet_Default(Y,D,is);
163:   }
164:   return 0;
165: }

167: PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
168: {
169:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
170:   PetscInt       i,rstart,nrows,*rows;

172:   *zrows = NULL;
173:   MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);
174:   MatGetOwnershipRange(M,&rstart,NULL);
175:   for (i=0; i<nrows; i++) rows[i] += rstart;
176:   ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);
177:   return 0;
178: }

180: PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions)
181: {
182:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183:   PetscInt          i,m,n,*garray = aij->garray;
184:   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185:   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186:   PetscReal         *work;
187:   const PetscScalar *dummy;

189:   MatGetSize(A,&m,&n);
190:   PetscCalloc1(n,&work);
191:   MatSeqAIJGetArrayRead(aij->A,&dummy);
192:   MatSeqAIJRestoreArrayRead(aij->A,&dummy);
193:   MatSeqAIJGetArrayRead(aij->B,&dummy);
194:   MatSeqAIJRestoreArrayRead(aij->B,&dummy);
195:   if (type == NORM_2) {
196:     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
197:       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
198:     }
199:     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
200:       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
201:     }
202:   } else if (type == NORM_1) {
203:     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
204:       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
205:     }
206:     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
207:       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
208:     }
209:   } else if (type == NORM_INFINITY) {
210:     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
211:       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
212:     }
213:     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
214:       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
215:     }
216:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
217:     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
218:       work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
219:     }
220:     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
221:       work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
222:     }
223:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
224:     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
225:       work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
226:     }
227:     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
228:       work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
229:     }
230:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
231:   if (type == NORM_INFINITY) {
232:     MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
233:   } else {
234:     MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
235:   }
236:   PetscFree(work);
237:   if (type == NORM_2) {
238:     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
239:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
240:     for (i=0; i<n; i++) reductions[i] /= m;
241:   }
242:   return 0;
243: }

245: PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
246: {
247:   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
248:   IS              sis,gis;
249:   const PetscInt  *isis,*igis;
250:   PetscInt        n,*iis,nsis,ngis,rstart,i;

252:   MatFindOffBlockDiagonalEntries(a->A,&sis);
253:   MatFindNonzeroRows(a->B,&gis);
254:   ISGetSize(gis,&ngis);
255:   ISGetSize(sis,&nsis);
256:   ISGetIndices(sis,&isis);
257:   ISGetIndices(gis,&igis);

259:   PetscMalloc1(ngis+nsis,&iis);
260:   PetscArraycpy(iis,igis,ngis);
261:   PetscArraycpy(iis+ngis,isis,nsis);
262:   n    = ngis + nsis;
263:   PetscSortRemoveDupsInt(&n,iis);
264:   MatGetOwnershipRange(A,&rstart,NULL);
265:   for (i=0; i<n; i++) iis[i] += rstart;
266:   ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);

268:   ISRestoreIndices(sis,&isis);
269:   ISRestoreIndices(gis,&igis);
270:   ISDestroy(&sis);
271:   ISDestroy(&gis);
272:   return 0;
273: }

275: /*
276:   Local utility routine that creates a mapping from the global column
277: number to the local number in the off-diagonal part of the local
278: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
279: a slightly higher hash table cost; without it it is not scalable (each processor
280: has an order N integer array but is fast to access.
281: */
282: PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
283: {
284:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
285:   PetscInt       n = aij->B->cmap->n,i;

288: #if defined(PETSC_USE_CTABLE)
289:   PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);
290:   for (i=0; i<n; i++) {
291:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);
292:   }
293: #else
294:   PetscCalloc1(mat->cmap->N+1,&aij->colmap);
295:   PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));
296:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
297: #endif
298:   return 0;
299: }

301: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
302: { \
303:     if (col <= lastcol1)  low1 = 0;     \
304:     else                 high1 = nrow1; \
305:     lastcol1 = col;\
306:     while (high1-low1 > 5) { \
307:       t = (low1+high1)/2; \
308:       if (rp1[t] > col) high1 = t; \
309:       else              low1  = t; \
310:     } \
311:       for (_i=low1; _i<high1; _i++) { \
312:         if (rp1[_i] > col) break; \
313:         if (rp1[_i] == col) { \
314:           if (addv == ADD_VALUES) { \
315:             ap1[_i] += value;   \
316:             /* Not sure LogFlops will slow dow the code or not */ \
317:             (void)PetscLogFlops(1.0);   \
318:            } \
319:           else                    ap1[_i] = value; \
320:           goto a_noinsert; \
321:         } \
322:       }  \
323:       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
324:       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
326:       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
327:       N = nrow1++ - 1; a->nz++; high1++; \
328:       /* shift up all the later entries in this row */ \
329:       PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);\
330:       PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);\
331:       rp1[_i] = col;  \
332:       ap1[_i] = value;  \
333:       A->nonzerostate++;\
334:       a_noinsert: ; \
335:       ailen[row] = nrow1; \
336: }

338: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
339:   { \
340:     if (col <= lastcol2) low2 = 0;                        \
341:     else high2 = nrow2;                                   \
342:     lastcol2 = col;                                       \
343:     while (high2-low2 > 5) {                              \
344:       t = (low2+high2)/2;                                 \
345:       if (rp2[t] > col) high2 = t;                        \
346:       else             low2  = t;                         \
347:     }                                                     \
348:     for (_i=low2; _i<high2; _i++) {                       \
349:       if (rp2[_i] > col) break;                           \
350:       if (rp2[_i] == col) {                               \
351:         if (addv == ADD_VALUES) {                         \
352:           ap2[_i] += value;                               \
353:           (void)PetscLogFlops(1.0);                       \
354:         }                                                 \
355:         else                    ap2[_i] = value;          \
356:         goto b_noinsert;                                  \
357:       }                                                   \
358:     }                                                     \
359:     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
360:     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
362:     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
363:     N = nrow2++ - 1; b->nz++; high2++;                    \
364:     /* shift up all the later entries in this row */      \
365:     PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);\
366:     PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);\
367:     rp2[_i] = col;                                        \
368:     ap2[_i] = value;                                      \
369:     B->nonzerostate++;                                    \
370:     b_noinsert: ;                                         \
371:     bilen[row] = nrow2;                                   \
372:   }

374: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
375: {
376:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
377:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
378:   PetscInt       l,*garray = mat->garray,diag;
379:   PetscScalar    *aa,*ba;

381:   /* code only works for square matrices A */

383:   /* find size of row to the left of the diagonal part */
384:   MatGetOwnershipRange(A,&diag,NULL);
385:   row  = row - diag;
386:   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
387:     if (garray[b->j[b->i[row]+l]] > diag) break;
388:   }
389:   if (l) {
390:     MatSeqAIJGetArray(mat->B,&ba);
391:     PetscArraycpy(ba+b->i[row],v,l);
392:     MatSeqAIJRestoreArray(mat->B,&ba);
393:   }

395:   /* diagonal part */
396:   if (a->i[row+1]-a->i[row]) {
397:     MatSeqAIJGetArray(mat->A,&aa);
398:     PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]));
399:     MatSeqAIJRestoreArray(mat->A,&aa);
400:   }

402:   /* right of diagonal part */
403:   if (b->i[row+1]-b->i[row]-l) {
404:     MatSeqAIJGetArray(mat->B,&ba);
405:     PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);
406:     MatSeqAIJRestoreArray(mat->B,&ba);
407:   }
408:   return 0;
409: }

411: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
412: {
413:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
414:   PetscScalar    value = 0.0;
415:   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
416:   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
417:   PetscBool      roworiented = aij->roworiented;

419:   /* Some Variables required in the macro */
420:   Mat        A                    = aij->A;
421:   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
422:   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
423:   PetscBool  ignorezeroentries    = a->ignorezeroentries;
424:   Mat        B                    = aij->B;
425:   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
426:   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
427:   MatScalar  *aa,*ba;
428:   PetscInt   *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
429:   PetscInt   nonew;
430:   MatScalar  *ap1,*ap2;

432:   MatSeqAIJGetArray(A,&aa);
433:   MatSeqAIJGetArray(B,&ba);
434:   for (i=0; i<m; i++) {
435:     if (im[i] < 0) continue;
437:     if (im[i] >= rstart && im[i] < rend) {
438:       row      = im[i] - rstart;
439:       lastcol1 = -1;
440:       rp1      = aj + ai[row];
441:       ap1      = aa + ai[row];
442:       rmax1    = aimax[row];
443:       nrow1    = ailen[row];
444:       low1     = 0;
445:       high1    = nrow1;
446:       lastcol2 = -1;
447:       rp2      = bj + bi[row];
448:       ap2      = ba + bi[row];
449:       rmax2    = bimax[row];
450:       nrow2    = bilen[row];
451:       low2     = 0;
452:       high2    = nrow2;

454:       for (j=0; j<n; j++) {
455:         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
456:         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
457:         if (in[j] >= cstart && in[j] < cend) {
458:           col   = in[j] - cstart;
459:           nonew = a->nonew;
460:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
461:         } else if (in[j] < 0) continue;
463:         else {
464:           if (mat->was_assembled) {
465:             if (!aij->colmap) {
466:               MatCreateColmap_MPIAIJ_Private(mat);
467:             }
468: #if defined(PETSC_USE_CTABLE)
469:             PetscTableFind(aij->colmap,in[j]+1,&col); /* map global col ids to local ones */
470:             col--;
471: #else
472:             col = aij->colmap[in[j]] - 1;
473: #endif
474:             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
475:               MatDisAssemble_MPIAIJ(mat); /* Change aij->B from reduced/local format to expanded/global format */
476:               col  =  in[j];
477:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
478:               B        = aij->B;
479:               b        = (Mat_SeqAIJ*)B->data;
480:               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
481:               rp2      = bj + bi[row];
482:               ap2      = ba + bi[row];
483:               rmax2    = bimax[row];
484:               nrow2    = bilen[row];
485:               low2     = 0;
486:               high2    = nrow2;
487:               bm       = aij->B->rmap->n;
488:               ba       = b->a;
489:             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
490:               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
491:                 PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j]);
492:               } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
493:             }
494:           } else col = in[j];
495:           nonew = b->nonew;
496:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
497:         }
498:       }
499:     } else {
501:       if (!aij->donotstash) {
502:         mat->assembled = PETSC_FALSE;
503:         if (roworiented) {
504:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
505:         } else {
506:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
507:         }
508:       }
509:     }
510:   }
511:   MatSeqAIJRestoreArray(A,&aa);
512:   MatSeqAIJRestoreArray(B,&ba);
513:   return 0;
514: }

516: /*
517:     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
518:     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
519:     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
520: */
521: PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
522: {
523:   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
524:   Mat            A           = aij->A; /* diagonal part of the matrix */
525:   Mat            B           = aij->B; /* offdiagonal part of the matrix */
526:   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
527:   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
528:   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
529:   PetscInt       *ailen      = a->ilen,*aj = a->j;
530:   PetscInt       *bilen      = b->ilen,*bj = b->j;
531:   PetscInt       am          = aij->A->rmap->n,j;
532:   PetscInt       diag_so_far = 0,dnz;
533:   PetscInt       offd_so_far = 0,onz;

535:   /* Iterate over all rows of the matrix */
536:   for (j=0; j<am; j++) {
537:     dnz = onz = 0;
538:     /*  Iterate over all non-zero columns of the current row */
539:     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
540:       /* If column is in the diagonal */
541:       if (mat_j[col] >= cstart && mat_j[col] < cend) {
542:         aj[diag_so_far++] = mat_j[col] - cstart;
543:         dnz++;
544:       } else { /* off-diagonal entries */
545:         bj[offd_so_far++] = mat_j[col];
546:         onz++;
547:       }
548:     }
549:     ailen[j] = dnz;
550:     bilen[j] = onz;
551:   }
552:   return 0;
553: }

555: /*
556:     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
557:     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
558:     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
559:     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
560:     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
561: */
562: PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
563: {
564:   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
565:   Mat            A      = aij->A; /* diagonal part of the matrix */
566:   Mat            B      = aij->B; /* offdiagonal part of the matrix */
567:   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
568:   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
569:   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
570:   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
571:   PetscInt       *ailen = a->ilen,*aj = a->j;
572:   PetscInt       *bilen = b->ilen,*bj = b->j;
573:   PetscInt       am     = aij->A->rmap->n,j;
574:   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
575:   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
576:   PetscScalar    *aa = a->a,*ba = b->a;

578:   /* Iterate over all rows of the matrix */
579:   for (j=0; j<am; j++) {
580:     dnz_row = onz_row = 0;
581:     rowstart_offd = full_offd_i[j];
582:     rowstart_diag = full_diag_i[j];
583:     /*  Iterate over all non-zero columns of the current row */
584:     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
585:       /* If column is in the diagonal */
586:       if (mat_j[col] >= cstart && mat_j[col] < cend) {
587:         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
588:         aa[rowstart_diag+dnz_row] = mat_a[col];
589:         dnz_row++;
590:       } else { /* off-diagonal entries */
591:         bj[rowstart_offd+onz_row] = mat_j[col];
592:         ba[rowstart_offd+onz_row] = mat_a[col];
593:         onz_row++;
594:       }
595:     }
596:     ailen[j] = dnz_row;
597:     bilen[j] = onz_row;
598:   }
599:   return 0;
600: }

602: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
603: {
604:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
605:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606:   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;

608:   for (i=0; i<m; i++) {
609:     if (idxm[i] < 0) continue; /* negative row */
611:     if (idxm[i] >= rstart && idxm[i] < rend) {
612:       row = idxm[i] - rstart;
613:       for (j=0; j<n; j++) {
614:         if (idxn[j] < 0) continue; /* negative column */
616:         if (idxn[j] >= cstart && idxn[j] < cend) {
617:           col  = idxn[j] - cstart;
618:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
619:         } else {
620:           if (!aij->colmap) {
621:             MatCreateColmap_MPIAIJ_Private(mat);
622:           }
623: #if defined(PETSC_USE_CTABLE)
624:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
625:           col--;
626: #else
627:           col = aij->colmap[idxn[j]] - 1;
628: #endif
629:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
630:           else {
631:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
632:           }
633:         }
634:       }
635:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
636:   }
637:   return 0;
638: }

640: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
641: {
642:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
643:   PetscInt       nstash,reallocs;

645:   if (aij->donotstash || mat->nooffprocentries) return 0;

647:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
648:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
649:   PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
650:   return 0;
651: }

653: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
654: {
655:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
656:   PetscMPIInt    n;
657:   PetscInt       i,j,rstart,ncols,flg;
658:   PetscInt       *row,*col;
659:   PetscBool      other_disassembled;
660:   PetscScalar    *val;

662:   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */

664:   if (!aij->donotstash && !mat->nooffprocentries) {
665:     while (1) {
666:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
667:       if (!flg) break;

669:       for (i=0; i<n;) {
670:         /* Now identify the consecutive vals belonging to the same row */
671:         for (j=i,rstart=row[j]; j<n; j++) {
672:           if (row[j] != rstart) break;
673:         }
674:         if (j < n) ncols = j-i;
675:         else       ncols = n-i;
676:         /* Now assemble all these values with a single function call */
677:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
678:         i    = j;
679:       }
680:     }
681:     MatStashScatterEnd_Private(&mat->stash);
682:   }
683: #if defined(PETSC_HAVE_DEVICE)
684:   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
685:   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
686:   if (mat->boundtocpu) {
687:     MatBindToCPU(aij->A,PETSC_TRUE);
688:     MatBindToCPU(aij->B,PETSC_TRUE);
689:   }
690: #endif
691:   MatAssemblyBegin(aij->A,mode);
692:   MatAssemblyEnd(aij->A,mode);

694:   /* determine if any processor has disassembled, if so we must
695:      also disassemble ourself, in order that we may reassemble. */
696:   /*
697:      if nonzero structure of submatrix B cannot change then we know that
698:      no processor disassembled thus we can skip this stuff
699:   */
700:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
701:     MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));
702:     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
703:       MatDisAssemble_MPIAIJ(mat);
704:     }
705:   }
706:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
707:     MatSetUpMultiply_MPIAIJ(mat);
708:   }
709:   MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);
710: #if defined(PETSC_HAVE_DEVICE)
711:   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
712: #endif
713:   MatAssemblyBegin(aij->B,mode);
714:   MatAssemblyEnd(aij->B,mode);

716:   PetscFree2(aij->rowvalues,aij->rowindices);

718:   aij->rowvalues = NULL;

720:   VecDestroy(&aij->diag);

722:   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
723:   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
724:     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
725:     MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));
726:   }
727: #if defined(PETSC_HAVE_DEVICE)
728:   mat->offloadmask = PETSC_OFFLOAD_BOTH;
729: #endif
730:   return 0;
731: }

733: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
734: {
735:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;

737:   MatZeroEntries(l->A);
738:   MatZeroEntries(l->B);
739:   return 0;
740: }

742: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743: {
744:   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
745:   PetscObjectState sA, sB;
746:   PetscInt        *lrows;
747:   PetscInt         r, len;
748:   PetscBool        cong, lch, gch;

750:   /* get locally owned rows */
751:   MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);
752:   MatHasCongruentLayouts(A,&cong);
753:   /* fix right hand side if needed */
754:   if (x && b) {
755:     const PetscScalar *xx;
756:     PetscScalar       *bb;

759:     VecGetArrayRead(x, &xx);
760:     VecGetArray(b, &bb);
761:     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
762:     VecRestoreArrayRead(x, &xx);
763:     VecRestoreArray(b, &bb);
764:   }

766:   sA = mat->A->nonzerostate;
767:   sB = mat->B->nonzerostate;

769:   if (diag != 0.0 && cong) {
770:     MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);
771:     MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
772:   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
773:     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
774:     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
775:     PetscInt   nnwA, nnwB;
776:     PetscBool  nnzA, nnzB;

778:     nnwA = aijA->nonew;
779:     nnwB = aijB->nonew;
780:     nnzA = aijA->keepnonzeropattern;
781:     nnzB = aijB->keepnonzeropattern;
782:     if (!nnzA) {
783:       PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");
784:       aijA->nonew = 0;
785:     }
786:     if (!nnzB) {
787:       PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");
788:       aijB->nonew = 0;
789:     }
790:     /* Must zero here before the next loop */
791:     MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
792:     MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
793:     for (r = 0; r < len; ++r) {
794:       const PetscInt row = lrows[r] + A->rmap->rstart;
795:       if (row >= A->cmap->N) continue;
796:       MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);
797:     }
798:     aijA->nonew = nnwA;
799:     aijB->nonew = nnwB;
800:   } else {
801:     MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);
802:     MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);
803:   }
804:   PetscFree(lrows);
805:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
806:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);

808:   /* reduce nonzerostate */
809:   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
810:   MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));
811:   if (gch) A->nonzerostate++;
812:   return 0;
813: }

815: PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
816: {
817:   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
818:   PetscMPIInt       n = A->rmap->n;
819:   PetscInt          i,j,r,m,len = 0;
820:   PetscInt          *lrows,*owners = A->rmap->range;
821:   PetscMPIInt       p = 0;
822:   PetscSFNode       *rrows;
823:   PetscSF           sf;
824:   const PetscScalar *xx;
825:   PetscScalar       *bb,*mask,*aij_a;
826:   Vec               xmask,lmask;
827:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
828:   const PetscInt    *aj, *ii,*ridx;
829:   PetscScalar       *aa;

831:   /* Create SF where leaves are input rows and roots are owned rows */
832:   PetscMalloc1(n, &lrows);
833:   for (r = 0; r < n; ++r) lrows[r] = -1;
834:   PetscMalloc1(N, &rrows);
835:   for (r = 0; r < N; ++r) {
836:     const PetscInt idx   = rows[r];
838:     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
839:       PetscLayoutFindOwner(A->rmap,idx,&p);
840:     }
841:     rrows[r].rank  = p;
842:     rrows[r].index = rows[r] - owners[p];
843:   }
844:   PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);
845:   PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);
846:   /* Collect flags for rows to be zeroed */
847:   PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
848:   PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);
849:   PetscSFDestroy(&sf);
850:   /* Compress and put in row numbers */
851:   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
852:   /* zero diagonal part of matrix */
853:   MatZeroRowsColumns(l->A,len,lrows,diag,x,b);
854:   /* handle off diagonal part of matrix */
855:   MatCreateVecs(A,&xmask,NULL);
856:   VecDuplicate(l->lvec,&lmask);
857:   VecGetArray(xmask,&bb);
858:   for (i=0; i<len; i++) bb[lrows[i]] = 1;
859:   VecRestoreArray(xmask,&bb);
860:   VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
861:   VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);
862:   VecDestroy(&xmask);
863:   if (x && b) { /* this code is buggy when the row and column layout don't match */
864:     PetscBool cong;

866:     MatHasCongruentLayouts(A,&cong);
868:     VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
869:     VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);
870:     VecGetArrayRead(l->lvec,&xx);
871:     VecGetArray(b,&bb);
872:   }
873:   VecGetArray(lmask,&mask);
874:   /* remove zeroed rows of off diagonal matrix */
875:   MatSeqAIJGetArray(l->B,&aij_a);
876:   ii = aij->i;
877:   for (i=0; i<len; i++) {
878:     PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);
879:   }
880:   /* loop over all elements of off process part of matrix zeroing removed columns*/
881:   if (aij->compressedrow.use) {
882:     m    = aij->compressedrow.nrows;
883:     ii   = aij->compressedrow.i;
884:     ridx = aij->compressedrow.rindex;
885:     for (i=0; i<m; i++) {
886:       n  = ii[i+1] - ii[i];
887:       aj = aij->j + ii[i];
888:       aa = aij_a + ii[i];

890:       for (j=0; j<n; j++) {
891:         if (PetscAbsScalar(mask[*aj])) {
892:           if (b) bb[*ridx] -= *aa*xx[*aj];
893:           *aa = 0.0;
894:         }
895:         aa++;
896:         aj++;
897:       }
898:       ridx++;
899:     }
900:   } else { /* do not use compressed row format */
901:     m = l->B->rmap->n;
902:     for (i=0; i<m; i++) {
903:       n  = ii[i+1] - ii[i];
904:       aj = aij->j + ii[i];
905:       aa = aij_a + ii[i];
906:       for (j=0; j<n; j++) {
907:         if (PetscAbsScalar(mask[*aj])) {
908:           if (b) bb[i] -= *aa*xx[*aj];
909:           *aa = 0.0;
910:         }
911:         aa++;
912:         aj++;
913:       }
914:     }
915:   }
916:   if (x && b) {
917:     VecRestoreArray(b,&bb);
918:     VecRestoreArrayRead(l->lvec,&xx);
919:   }
920:   MatSeqAIJRestoreArray(l->B,&aij_a);
921:   VecRestoreArray(lmask,&mask);
922:   VecDestroy(&lmask);
923:   PetscFree(lrows);

925:   /* only change matrix nonzero state if pattern was allowed to be changed */
926:   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927:     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928:     MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));
929:   }
930:   return 0;
931: }

933: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934: {
935:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936:   PetscInt       nt;
937:   VecScatter     Mvctx = a->Mvctx;

939:   VecGetLocalSize(xx,&nt);
941:   VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
942:   (*a->A->ops->mult)(a->A,xx,yy);
943:   VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
944:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
945:   return 0;
946: }

948: PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
949: {
950:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

952:   MatMultDiagonalBlock(a->A,bb,xx);
953:   return 0;
954: }

956: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
957: {
958:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959:   VecScatter     Mvctx = a->Mvctx;

961:   VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
962:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
963:   VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
964:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
965:   return 0;
966: }

968: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
969: {
970:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

972:   /* do nondiagonal part */
973:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
974:   /* do local part */
975:   (*a->A->ops->multtranspose)(a->A,xx,yy);
976:   /* add partial results together */
977:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
978:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
979:   return 0;
980: }

982: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
983: {
984:   MPI_Comm       comm;
985:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
986:   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
987:   IS             Me,Notme;
988:   PetscInt       M,N,first,last,*notme,i;
989:   PetscBool      lf;
990:   PetscMPIInt    size;

992:   /* Easy test: symmetric diagonal block */
993:   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
994:   MatIsTranspose(Adia,Bdia,tol,&lf);
995:   MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));
996:   if (!*f) return 0;
997:   PetscObjectGetComm((PetscObject)Amat,&comm);
998:   MPI_Comm_size(comm,&size);
999:   if (size == 1) return 0;

1001:   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1002:   MatGetSize(Amat,&M,&N);
1003:   MatGetOwnershipRange(Amat,&first,&last);
1004:   PetscMalloc1(N-last+first,&notme);
1005:   for (i=0; i<first; i++) notme[i] = i;
1006:   for (i=last; i<M; i++) notme[i-last+first] = i;
1007:   ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);
1008:   ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
1009:   MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
1010:   Aoff = Aoffs[0];
1011:   MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
1012:   Boff = Boffs[0];
1013:   MatIsTranspose(Aoff,Boff,tol,f);
1014:   MatDestroyMatrices(1,&Aoffs);
1015:   MatDestroyMatrices(1,&Boffs);
1016:   ISDestroy(&Me);
1017:   ISDestroy(&Notme);
1018:   PetscFree(notme);
1019:   return 0;
1020: }

1022: PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1023: {
1024:   MatIsTranspose_MPIAIJ(A,A,tol,f);
1025:   return 0;
1026: }

1028: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1029: {
1030:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

1032:   /* do nondiagonal part */
1033:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1034:   /* do local part */
1035:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1036:   /* add partial results together */
1037:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1038:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1039:   return 0;
1040: }

1042: /*
1043:   This only works correctly for square matrices where the subblock A->A is the
1044:    diagonal block
1045: */
1046: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1047: {
1048:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

1052:   MatGetDiagonal(a->A,v);
1053:   return 0;
1054: }

1056: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1057: {
1058:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

1060:   MatScale(a->A,aa);
1061:   MatScale(a->B,aa);
1062:   return 0;
1063: }

1065: /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1066: PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1067: {
1068:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;

1070:   PetscSFDestroy(&aij->coo_sf);
1071:   PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1);
1072:   PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2);
1073:   PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2);
1074:   PetscFree2(aij->sendbuf,aij->recvbuf);
1075:   PetscFree(aij->Cperm1);
1076:   return 0;
1077: }

1079: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1080: {
1081:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;

1083: #if defined(PETSC_USE_LOG)
1084:   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1085: #endif
1086:   MatStashDestroy_Private(&mat->stash);
1087:   VecDestroy(&aij->diag);
1088:   MatDestroy(&aij->A);
1089:   MatDestroy(&aij->B);
1090: #if defined(PETSC_USE_CTABLE)
1091:   PetscTableDestroy(&aij->colmap);
1092: #else
1093:   PetscFree(aij->colmap);
1094: #endif
1095:   PetscFree(aij->garray);
1096:   VecDestroy(&aij->lvec);
1097:   VecScatterDestroy(&aij->Mvctx);
1098:   PetscFree2(aij->rowvalues,aij->rowindices);
1099:   PetscFree(aij->ld);

1101:   /* Free COO */
1102:   MatResetPreallocationCOO_MPIAIJ(mat);

1104:   PetscFree(mat->data);

1106:   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1107:   PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);

1109:   PetscObjectChangeTypeName((PetscObject)mat,NULL);
1110:   PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);
1111:   PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);
1112:   PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);
1113:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);
1114:   PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);
1115:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);
1116:   PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);
1117:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);
1118:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);
1119: #if defined(PETSC_HAVE_CUDA)
1120:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);
1121: #endif
1122: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1123:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);
1124: #endif
1125:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);
1126: #if defined(PETSC_HAVE_ELEMENTAL)
1127:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);
1128: #endif
1129: #if defined(PETSC_HAVE_SCALAPACK)
1130:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);
1131: #endif
1132: #if defined(PETSC_HAVE_HYPRE)
1133:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);
1134:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);
1135: #endif
1136:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);
1137:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);
1138:   PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);
1139:   PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);
1140:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);
1141:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);
1142: #if defined(PETSC_HAVE_MKL_SPARSE)
1143:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);
1144: #endif
1145:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);
1146:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);
1147:   PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);
1148:   PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL);
1149:   PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL);
1150:   return 0;
1151: }

1153: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1154: {
1155:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1156:   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1157:   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1158:   const PetscInt    *garray = aij->garray;
1159:   const PetscScalar *aa,*ba;
1160:   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1161:   PetscInt          *rowlens;
1162:   PetscInt          *colidxs;
1163:   PetscScalar       *matvals;

1165:   PetscViewerSetUp(viewer);

1167:   M  = mat->rmap->N;
1168:   N  = mat->cmap->N;
1169:   m  = mat->rmap->n;
1170:   rs = mat->rmap->rstart;
1171:   cs = mat->cmap->rstart;
1172:   nz = A->nz + B->nz;

1174:   /* write matrix header */
1175:   header[0] = MAT_FILE_CLASSID;
1176:   header[1] = M; header[2] = N; header[3] = nz;
1177:   MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));
1178:   PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);

1180:   /* fill in and store row lengths  */
1181:   PetscMalloc1(m,&rowlens);
1182:   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1183:   PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);
1184:   PetscFree(rowlens);

1186:   /* fill in and store column indices */
1187:   PetscMalloc1(nz,&colidxs);
1188:   for (cnt=0, i=0; i<m; i++) {
1189:     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1190:       if (garray[B->j[jb]] > cs) break;
1191:       colidxs[cnt++] = garray[B->j[jb]];
1192:     }
1193:     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1194:       colidxs[cnt++] = A->j[ja] + cs;
1195:     for (; jb<B->i[i+1]; jb++)
1196:       colidxs[cnt++] = garray[B->j[jb]];
1197:   }
1199:   PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);
1200:   PetscFree(colidxs);

1202:   /* fill in and store nonzero values */
1203:   MatSeqAIJGetArrayRead(aij->A,&aa);
1204:   MatSeqAIJGetArrayRead(aij->B,&ba);
1205:   PetscMalloc1(nz,&matvals);
1206:   for (cnt=0, i=0; i<m; i++) {
1207:     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1208:       if (garray[B->j[jb]] > cs) break;
1209:       matvals[cnt++] = ba[jb];
1210:     }
1211:     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1212:       matvals[cnt++] = aa[ja];
1213:     for (; jb<B->i[i+1]; jb++)
1214:       matvals[cnt++] = ba[jb];
1215:   }
1216:   MatSeqAIJRestoreArrayRead(aij->A,&aa);
1217:   MatSeqAIJRestoreArrayRead(aij->B,&ba);
1219:   PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);
1220:   PetscFree(matvals);

1222:   /* write block size option to the viewer's .info file */
1223:   MatView_Binary_BlockSizes(mat,viewer);
1224:   return 0;
1225: }

1227: #include <petscdraw.h>
1228: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1229: {
1230:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1231:   PetscMPIInt       rank = aij->rank,size = aij->size;
1232:   PetscBool         isdraw,iascii,isbinary;
1233:   PetscViewer       sviewer;
1234:   PetscViewerFormat format;

1236:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1237:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1238:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1239:   if (iascii) {
1240:     PetscViewerGetFormat(viewer,&format);
1241:     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1242:       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1243:       PetscMalloc1(size,&nz);
1244:       MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
1245:       for (i=0; i<(PetscInt)size; i++) {
1246:         nmax = PetscMax(nmax,nz[i]);
1247:         nmin = PetscMin(nmin,nz[i]);
1248:         navg += nz[i];
1249:       }
1250:       PetscFree(nz);
1251:       navg = navg/size;
1252:       PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n",nmin,navg,nmax);
1253:       return 0;
1254:     }
1255:     PetscViewerGetFormat(viewer,&format);
1256:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1257:       MatInfo   info;
1258:       PetscInt *inodes=NULL;

1260:       MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
1261:       MatGetInfo(mat,MAT_LOCAL,&info);
1262:       MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);
1263:       PetscViewerASCIIPushSynchronized(viewer);
1264:       if (!inodes) {
1265:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n",
1266:                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1267:       } else {
1268:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n",
1269:                                                    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory));
1270:       }
1271:       MatGetInfo(aij->A,MAT_LOCAL,&info);
1272:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);
1273:       MatGetInfo(aij->B,MAT_LOCAL,&info);
1274:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);
1275:       PetscViewerFlush(viewer);
1276:       PetscViewerASCIIPopSynchronized(viewer);
1277:       PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
1278:       VecScatterView(aij->Mvctx,viewer);
1279:       return 0;
1280:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1281:       PetscInt inodecount,inodelimit,*inodes;
1282:       MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
1283:       if (inodes) {
1284:         PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit);
1285:       } else {
1286:         PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
1287:       }
1288:       return 0;
1289:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1290:       return 0;
1291:     }
1292:   } else if (isbinary) {
1293:     if (size == 1) {
1294:       PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1295:       MatView(aij->A,viewer);
1296:     } else {
1297:       MatView_MPIAIJ_Binary(mat,viewer);
1298:     }
1299:     return 0;
1300:   } else if (iascii && size == 1) {
1301:     PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);
1302:     MatView(aij->A,viewer);
1303:     return 0;
1304:   } else if (isdraw) {
1305:     PetscDraw draw;
1306:     PetscBool isnull;
1307:     PetscViewerDrawGetDraw(viewer,0,&draw);
1308:     PetscDrawIsNull(draw,&isnull);
1309:     if (isnull) return 0;
1310:   }

1312:   { /* assemble the entire matrix onto first processor */
1313:     Mat A = NULL, Av;
1314:     IS  isrow,iscol;

1316:     ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);
1317:     ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);
1318:     MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);
1319:     MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);
1320: /*  The commented code uses MatCreateSubMatrices instead */
1321: /*
1322:     Mat *AA, A = NULL, Av;
1323:     IS  isrow,iscol;

1325:     ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow);
1326:     ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol);
1327:     MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);
1328:     if (rank == 0) {
1329:        PetscObjectReference((PetscObject)AA[0]);
1330:        A    = AA[0];
1331:        Av   = AA[0];
1332:     }
1333:     MatDestroySubMatrices(1,&AA);
1334: */
1335:     ISDestroy(&iscol);
1336:     ISDestroy(&isrow);
1337:     /*
1338:        Everyone has to call to draw the matrix since the graphics waits are
1339:        synchronized across all processors that share the PetscDraw object
1340:     */
1341:     PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1342:     if (rank == 0) {
1343:       if (((PetscObject)mat)->name) {
1344:         PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);
1345:       }
1346:       MatView_SeqAIJ(Av,sviewer);
1347:     }
1348:     PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
1349:     PetscViewerFlush(viewer);
1350:     MatDestroy(&A);
1351:   }
1352:   return 0;
1353: }

1355: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1356: {
1357:   PetscBool      iascii,isdraw,issocket,isbinary;

1359:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1360:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
1361:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
1362:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
1363:   if (iascii || isdraw || isbinary || issocket) {
1364:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1365:   }
1366:   return 0;
1367: }

1369: PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1370: {
1371:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1372:   Vec            bb1 = NULL;
1373:   PetscBool      hasop;

1375:   if (flag == SOR_APPLY_UPPER) {
1376:     (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1377:     return 0;
1378:   }

1380:   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1381:     VecDuplicate(bb,&bb1);
1382:   }

1384:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1385:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1386:       (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1387:       its--;
1388:     }

1390:     while (its--) {
1391:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1392:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

1394:       /* update rhs: bb1 = bb - B*x */
1395:       VecScale(mat->lvec,-1.0);
1396:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1398:       /* local sweep */
1399:       (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
1400:     }
1401:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1402:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1403:       (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1404:       its--;
1405:     }
1406:     while (its--) {
1407:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1408:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

1410:       /* update rhs: bb1 = bb - B*x */
1411:       VecScale(mat->lvec,-1.0);
1412:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1414:       /* local sweep */
1415:       (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
1416:     }
1417:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1418:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1419:       (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
1420:       its--;
1421:     }
1422:     while (its--) {
1423:       VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1424:       VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);

1426:       /* update rhs: bb1 = bb - B*x */
1427:       VecScale(mat->lvec,-1.0);
1428:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1430:       /* local sweep */
1431:       (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
1432:     }
1433:   } else if (flag & SOR_EISENSTAT) {
1434:     Vec xx1;

1436:     VecDuplicate(bb,&xx1);
1437:     (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);

1439:     VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1440:     VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1441:     if (!mat->diag) {
1442:       MatCreateVecs(matin,&mat->diag,NULL);
1443:       MatGetDiagonal(matin,mat->diag);
1444:     }
1445:     MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);
1446:     if (hasop) {
1447:       MatMultDiagonalBlock(matin,xx,bb1);
1448:     } else {
1449:       VecPointwiseMult(bb1,mat->diag,xx);
1450:     }
1451:     VecAYPX(bb1,(omega-2.0)/omega,bb);

1453:     MatMultAdd(mat->B,mat->lvec,bb1,bb1);

1455:     /* local sweep */
1456:     (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);
1457:     VecAXPY(xx,1.0,xx1);
1458:     VecDestroy(&xx1);
1459:   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");

1461:   VecDestroy(&bb1);

1463:   matin->factorerrortype = mat->A->factorerrortype;
1464:   return 0;
1465: }

1467: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1468: {
1469:   Mat            aA,aB,Aperm;
1470:   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1471:   PetscScalar    *aa,*ba;
1472:   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1473:   PetscSF        rowsf,sf;
1474:   IS             parcolp = NULL;
1475:   PetscBool      done;

1477:   MatGetLocalSize(A,&m,&n);
1478:   ISGetIndices(rowp,&rwant);
1479:   ISGetIndices(colp,&cwant);
1480:   PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);

1482:   /* Invert row permutation to find out where my rows should go */
1483:   PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);
1484:   PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);
1485:   PetscSFSetFromOptions(rowsf);
1486:   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1487:   PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);
1488:   PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);

1490:   /* Invert column permutation to find out where my columns should go */
1491:   PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1492:   PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);
1493:   PetscSFSetFromOptions(sf);
1494:   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1495:   PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);
1496:   PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);
1497:   PetscSFDestroy(&sf);

1499:   ISRestoreIndices(rowp,&rwant);
1500:   ISRestoreIndices(colp,&cwant);
1501:   MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);

1503:   /* Find out where my gcols should go */
1504:   MatGetSize(aB,NULL,&ng);
1505:   PetscMalloc1(ng,&gcdest);
1506:   PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1507:   PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);
1508:   PetscSFSetFromOptions(sf);
1509:   PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);
1510:   PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);
1511:   PetscSFDestroy(&sf);

1513:   PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);
1514:   MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1515:   MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1516:   for (i=0; i<m; i++) {
1517:     PetscInt    row = rdest[i];
1518:     PetscMPIInt rowner;
1519:     PetscLayoutFindOwner(A->rmap,row,&rowner);
1520:     for (j=ai[i]; j<ai[i+1]; j++) {
1521:       PetscInt    col = cdest[aj[j]];
1522:       PetscMPIInt cowner;
1523:       PetscLayoutFindOwner(A->cmap,col,&cowner); /* Could build an index for the columns to eliminate this search */
1524:       if (rowner == cowner) dnnz[i]++;
1525:       else onnz[i]++;
1526:     }
1527:     for (j=bi[i]; j<bi[i+1]; j++) {
1528:       PetscInt    col = gcdest[bj[j]];
1529:       PetscMPIInt cowner;
1530:       PetscLayoutFindOwner(A->cmap,col,&cowner);
1531:       if (rowner == cowner) dnnz[i]++;
1532:       else onnz[i]++;
1533:     }
1534:   }
1535:   PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);
1536:   PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);
1537:   PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);
1538:   PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);
1539:   PetscSFDestroy(&rowsf);

1541:   MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);
1542:   MatSeqAIJGetArray(aA,&aa);
1543:   MatSeqAIJGetArray(aB,&ba);
1544:   for (i=0; i<m; i++) {
1545:     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1546:     PetscInt j0,rowlen;
1547:     rowlen = ai[i+1] - ai[i];
1548:     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1549:       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1550:       MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);
1551:     }
1552:     rowlen = bi[i+1] - bi[i];
1553:     for (j0=j=0; j<rowlen; j0=j) {
1554:       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1555:       MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);
1556:     }
1557:   }
1558:   MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);
1559:   MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);
1560:   MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);
1561:   MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);
1562:   MatSeqAIJRestoreArray(aA,&aa);
1563:   MatSeqAIJRestoreArray(aB,&ba);
1564:   PetscFree4(dnnz,onnz,tdnnz,tonnz);
1565:   PetscFree3(work,rdest,cdest);
1566:   PetscFree(gcdest);
1567:   if (parcolp) ISDestroy(&colp);
1568:   *B = Aperm;
1569:   return 0;
1570: }

1572: PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1573: {
1574:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1576:   MatGetSize(aij->B,NULL,nghosts);
1577:   if (ghosts) *ghosts = aij->garray;
1578:   return 0;
1579: }

1581: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1582: {
1583:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1584:   Mat            A    = mat->A,B = mat->B;
1585:   PetscLogDouble isend[5],irecv[5];

1587:   info->block_size = 1.0;
1588:   MatGetInfo(A,MAT_LOCAL,info);

1590:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1591:   isend[3] = info->memory;  isend[4] = info->mallocs;

1593:   MatGetInfo(B,MAT_LOCAL,info);

1595:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1596:   isend[3] += info->memory;  isend[4] += info->mallocs;
1597:   if (flag == MAT_LOCAL) {
1598:     info->nz_used      = isend[0];
1599:     info->nz_allocated = isend[1];
1600:     info->nz_unneeded  = isend[2];
1601:     info->memory       = isend[3];
1602:     info->mallocs      = isend[4];
1603:   } else if (flag == MAT_GLOBAL_MAX) {
1604:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));

1606:     info->nz_used      = irecv[0];
1607:     info->nz_allocated = irecv[1];
1608:     info->nz_unneeded  = irecv[2];
1609:     info->memory       = irecv[3];
1610:     info->mallocs      = irecv[4];
1611:   } else if (flag == MAT_GLOBAL_SUM) {
1612:     MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));

1614:     info->nz_used      = irecv[0];
1615:     info->nz_allocated = irecv[1];
1616:     info->nz_unneeded  = irecv[2];
1617:     info->memory       = irecv[3];
1618:     info->mallocs      = irecv[4];
1619:   }
1620:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1621:   info->fill_ratio_needed = 0;
1622:   info->factor_mallocs    = 0;
1623:   return 0;
1624: }

1626: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1627: {
1628:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

1630:   switch (op) {
1631:   case MAT_NEW_NONZERO_LOCATIONS:
1632:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1633:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1634:   case MAT_KEEP_NONZERO_PATTERN:
1635:   case MAT_NEW_NONZERO_LOCATION_ERR:
1636:   case MAT_USE_INODES:
1637:   case MAT_IGNORE_ZERO_ENTRIES:
1638:   case MAT_FORM_EXPLICIT_TRANSPOSE:
1639:     MatCheckPreallocated(A,1);
1640:     MatSetOption(a->A,op,flg);
1641:     MatSetOption(a->B,op,flg);
1642:     break;
1643:   case MAT_ROW_ORIENTED:
1644:     MatCheckPreallocated(A,1);
1645:     a->roworiented = flg;

1647:     MatSetOption(a->A,op,flg);
1648:     MatSetOption(a->B,op,flg);
1649:     break;
1650:   case MAT_FORCE_DIAGONAL_ENTRIES:
1651:   case MAT_SORTED_FULL:
1652:     PetscInfo(A,"Option %s ignored\n",MatOptions[op]);
1653:     break;
1654:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1655:     a->donotstash = flg;
1656:     break;
1657:   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1658:   case MAT_SPD:
1659:   case MAT_SYMMETRIC:
1660:   case MAT_STRUCTURALLY_SYMMETRIC:
1661:   case MAT_HERMITIAN:
1662:   case MAT_SYMMETRY_ETERNAL:
1663:     break;
1664:   case MAT_SUBMAT_SINGLEIS:
1665:     A->submat_singleis = flg;
1666:     break;
1667:   case MAT_STRUCTURE_ONLY:
1668:     /* The option is handled directly by MatSetOption() */
1669:     break;
1670:   default:
1671:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1672:   }
1673:   return 0;
1674: }

1676: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1677: {
1678:   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1679:   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1680:   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1681:   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1682:   PetscInt       *cmap,*idx_p;

1685:   mat->getrowactive = PETSC_TRUE;

1687:   if (!mat->rowvalues && (idx || v)) {
1688:     /*
1689:         allocate enough space to hold information from the longest row.
1690:     */
1691:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1692:     PetscInt   max = 1,tmp;
1693:     for (i=0; i<matin->rmap->n; i++) {
1694:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1695:       if (max < tmp) max = tmp;
1696:     }
1697:     PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);
1698:   }

1701:   lrow = row - rstart;

1703:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1704:   if (!v)   {pvA = NULL; pvB = NULL;}
1705:   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1706:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1707:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1708:   nztot = nzA + nzB;

1710:   cmap = mat->garray;
1711:   if (v  || idx) {
1712:     if (nztot) {
1713:       /* Sort by increasing column numbers, assuming A and B already sorted */
1714:       PetscInt imark = -1;
1715:       if (v) {
1716:         *v = v_p = mat->rowvalues;
1717:         for (i=0; i<nzB; i++) {
1718:           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1719:           else break;
1720:         }
1721:         imark = i;
1722:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1723:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1724:       }
1725:       if (idx) {
1726:         *idx = idx_p = mat->rowindices;
1727:         if (imark > -1) {
1728:           for (i=0; i<imark; i++) {
1729:             idx_p[i] = cmap[cworkB[i]];
1730:           }
1731:         } else {
1732:           for (i=0; i<nzB; i++) {
1733:             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1734:             else break;
1735:           }
1736:           imark = i;
1737:         }
1738:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1739:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1740:       }
1741:     } else {
1742:       if (idx) *idx = NULL;
1743:       if (v)   *v   = NULL;
1744:     }
1745:   }
1746:   *nz  = nztot;
1747:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1748:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1749:   return 0;
1750: }

1752: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1753: {
1754:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1757:   aij->getrowactive = PETSC_FALSE;
1758:   return 0;
1759: }

1761: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1762: {
1763:   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ*)mat->data;
1764:   Mat_SeqAIJ      *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1765:   PetscInt        i,j,cstart = mat->cmap->rstart;
1766:   PetscReal       sum = 0.0;
1767:   const MatScalar *v,*amata,*bmata;

1769:   if (aij->size == 1) {
1770:     MatNorm(aij->A,type,norm);
1771:   } else {
1772:     MatSeqAIJGetArrayRead(aij->A,&amata);
1773:     MatSeqAIJGetArrayRead(aij->B,&bmata);
1774:     if (type == NORM_FROBENIUS) {
1775:       v = amata;
1776:       for (i=0; i<amat->nz; i++) {
1777:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1778:       }
1779:       v = bmata;
1780:       for (i=0; i<bmat->nz; i++) {
1781:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1782:       }
1783:       MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1784:       *norm = PetscSqrtReal(*norm);
1785:       PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);
1786:     } else if (type == NORM_1) { /* max column norm */
1787:       PetscReal *tmp,*tmp2;
1788:       PetscInt  *jj,*garray = aij->garray;
1789:       PetscCalloc1(mat->cmap->N+1,&tmp);
1790:       PetscMalloc1(mat->cmap->N+1,&tmp2);
1791:       *norm = 0.0;
1792:       v     = amata; jj = amat->j;
1793:       for (j=0; j<amat->nz; j++) {
1794:         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1795:       }
1796:       v = bmata; jj = bmat->j;
1797:       for (j=0; j<bmat->nz; j++) {
1798:         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1799:       }
1800:       MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));
1801:       for (j=0; j<mat->cmap->N; j++) {
1802:         if (tmp2[j] > *norm) *norm = tmp2[j];
1803:       }
1804:       PetscFree(tmp);
1805:       PetscFree(tmp2);
1806:       PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));
1807:     } else if (type == NORM_INFINITY) { /* max row norm */
1808:       PetscReal ntemp = 0.0;
1809:       for (j=0; j<aij->A->rmap->n; j++) {
1810:         v   = amata + amat->i[j];
1811:         sum = 0.0;
1812:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1813:           sum += PetscAbsScalar(*v); v++;
1814:         }
1815:         v = bmata + bmat->i[j];
1816:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1817:           sum += PetscAbsScalar(*v); v++;
1818:         }
1819:         if (sum > ntemp) ntemp = sum;
1820:       }
1821:       MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));
1822:       PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));
1823:     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1824:     MatSeqAIJRestoreArrayRead(aij->A,&amata);
1825:     MatSeqAIJRestoreArrayRead(aij->B,&bmata);
1826:   }
1827:   return 0;
1828: }

1830: PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1831: {
1832:   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1833:   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1834:   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1835:   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1836:   Mat             B,A_diag,*B_diag;
1837:   const MatScalar *pbv,*bv;

1839:   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1840:   ai = Aloc->i; aj = Aloc->j;
1841:   bi = Bloc->i; bj = Bloc->j;
1842:   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1843:     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1844:     PetscSFNode          *oloc;
1845:     PETSC_UNUSED PetscSF sf;

1847:     PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);
1848:     /* compute d_nnz for preallocation */
1849:     PetscArrayzero(d_nnz,na);
1850:     for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++;
1851:     /* compute local off-diagonal contributions */
1852:     PetscArrayzero(g_nnz,nb);
1853:     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1854:     /* map those to global */
1855:     PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);
1856:     PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);
1857:     PetscSFSetFromOptions(sf);
1858:     PetscArrayzero(o_nnz,na);
1859:     PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1860:     PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);
1861:     PetscSFDestroy(&sf);

1863:     MatCreate(PetscObjectComm((PetscObject)A),&B);
1864:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1865:     MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
1866:     MatSetType(B,((PetscObject)A)->type_name);
1867:     MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
1868:     PetscFree4(d_nnz,o_nnz,g_nnz,oloc);
1869:   } else {
1870:     B    = *matout;
1871:     MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
1872:   }

1874:   b           = (Mat_MPIAIJ*)B->data;
1875:   A_diag      = a->A;
1876:   B_diag      = &b->A;
1877:   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1878:   A_diag_ncol = A_diag->cmap->N;
1879:   B_diag_ilen = sub_B_diag->ilen;
1880:   B_diag_i    = sub_B_diag->i;

1882:   /* Set ilen for diagonal of B */
1883:   for (i=0; i<A_diag_ncol; i++) {
1884:     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1885:   }

1887:   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1888:   very quickly (=without using MatSetValues), because all writes are local. */
1889:   MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);

1891:   /* copy over the B part */
1892:   PetscMalloc1(bi[mb],&cols);
1893:   MatSeqAIJGetArrayRead(a->B,&bv);
1894:   pbv  = bv;
1895:   row  = A->rmap->rstart;
1896:   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1897:   cols_tmp = cols;
1898:   for (i=0; i<mb; i++) {
1899:     ncol = bi[i+1]-bi[i];
1900:     MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);
1901:     row++;
1902:     pbv += ncol; cols_tmp += ncol;
1903:   }
1904:   PetscFree(cols);
1905:   MatSeqAIJRestoreArrayRead(a->B,&bv);

1907:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1908:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1909:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1910:     *matout = B;
1911:   } else {
1912:     MatHeaderMerge(A,&B);
1913:   }
1914:   return 0;
1915: }

1917: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1918: {
1919:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1920:   Mat            a    = aij->A,b = aij->B;
1921:   PetscInt       s1,s2,s3;

1923:   MatGetLocalSize(mat,&s2,&s3);
1924:   if (rr) {
1925:     VecGetLocalSize(rr,&s1);
1927:     /* Overlap communication with computation. */
1928:     VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1929:   }
1930:   if (ll) {
1931:     VecGetLocalSize(ll,&s1);
1933:     (*b->ops->diagonalscale)(b,ll,NULL);
1934:   }
1935:   /* scale  the diagonal block */
1936:   (*a->ops->diagonalscale)(a,ll,rr);

1938:   if (rr) {
1939:     /* Do a scatter end and then right scale the off-diagonal block */
1940:     VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1941:     (*b->ops->diagonalscale)(b,NULL,aij->lvec);
1942:   }
1943:   return 0;
1944: }

1946: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1947: {
1948:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

1950:   MatSetUnfactored(a->A);
1951:   return 0;
1952: }

1954: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1955: {
1956:   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1957:   Mat            a,b,c,d;
1958:   PetscBool      flg;

1960:   a = matA->A; b = matA->B;
1961:   c = matB->A; d = matB->B;

1963:   MatEqual(a,c,&flg);
1964:   if (flg) {
1965:     MatEqual(b,d,&flg);
1966:   }
1967:   MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
1968:   return 0;
1969: }

1971: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1972: {
1973:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1974:   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;

1976:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1977:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1978:     /* because of the column compression in the off-processor part of the matrix a->B,
1979:        the number of columns in a->B and b->B may be different, hence we cannot call
1980:        the MatCopy() directly on the two parts. If need be, we can provide a more
1981:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1982:        then copying the submatrices */
1983:     MatCopy_Basic(A,B,str);
1984:   } else {
1985:     MatCopy(a->A,b->A,str);
1986:     MatCopy(a->B,b->B,str);
1987:   }
1988:   PetscObjectStateIncrease((PetscObject)B);
1989:   return 0;
1990: }

1992: PetscErrorCode MatSetUp_MPIAIJ(Mat A)
1993: {
1994:   MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);
1995:   return 0;
1996: }

1998: /*
1999:    Computes the number of nonzeros per row needed for preallocation when X and Y
2000:    have different nonzero structure.
2001: */
2002: PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2003: {
2004:   PetscInt       i,j,k,nzx,nzy;

2006:   /* Set the number of nonzeros in the new matrix */
2007:   for (i=0; i<m; i++) {
2008:     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2009:     nzx = xi[i+1] - xi[i];
2010:     nzy = yi[i+1] - yi[i];
2011:     nnz[i] = 0;
2012:     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2013:       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2014:       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2015:       nnz[i]++;
2016:     }
2017:     for (; k<nzy; k++) nnz[i]++;
2018:   }
2019:   return 0;
2020: }

2022: /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2023: static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2024: {
2025:   PetscInt       m = Y->rmap->N;
2026:   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2027:   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;

2029:   MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);
2030:   return 0;
2031: }

2033: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2034: {
2035:   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;

2037:   if (str == SAME_NONZERO_PATTERN) {
2038:     MatAXPY(yy->A,a,xx->A,str);
2039:     MatAXPY(yy->B,a,xx->B,str);
2040:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2041:     MatAXPY_Basic(Y,a,X,str);
2042:   } else {
2043:     Mat      B;
2044:     PetscInt *nnz_d,*nnz_o;

2046:     PetscMalloc1(yy->A->rmap->N,&nnz_d);
2047:     PetscMalloc1(yy->B->rmap->N,&nnz_o);
2048:     MatCreate(PetscObjectComm((PetscObject)Y),&B);
2049:     PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);
2050:     MatSetLayouts(B,Y->rmap,Y->cmap);
2051:     MatSetType(B,((PetscObject)Y)->type_name);
2052:     MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);
2053:     MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);
2054:     MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);
2055:     MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
2056:     MatHeaderMerge(Y,&B);
2057:     PetscFree(nnz_d);
2058:     PetscFree(nnz_o);
2059:   }
2060:   return 0;
2061: }

2063: PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);

2065: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2066: {
2067:   if (PetscDefined(USE_COMPLEX)) {
2068:     Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

2070:     MatConjugate_SeqAIJ(aij->A);
2071:     MatConjugate_SeqAIJ(aij->B);
2072:   }
2073:   return 0;
2074: }

2076: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2077: {
2078:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2080:   MatRealPart(a->A);
2081:   MatRealPart(a->B);
2082:   return 0;
2083: }

2085: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2086: {
2087:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2089:   MatImaginaryPart(a->A);
2090:   MatImaginaryPart(a->B);
2091:   return 0;
2092: }

2094: PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2095: {
2096:   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2097:   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2098:   PetscScalar       *va,*vv;
2099:   Vec               vB,vA;
2100:   const PetscScalar *vb;

2102:   VecCreateSeq(PETSC_COMM_SELF,m,&vA);
2103:   MatGetRowMaxAbs(a->A,vA,idx);

2105:   VecGetArrayWrite(vA,&va);
2106:   if (idx) {
2107:     for (i=0; i<m; i++) {
2108:       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2109:     }
2110:   }

2112:   VecCreateSeq(PETSC_COMM_SELF,m,&vB);
2113:   PetscMalloc1(m,&idxb);
2114:   MatGetRowMaxAbs(a->B,vB,idxb);

2116:   VecGetArrayWrite(v,&vv);
2117:   VecGetArrayRead(vB,&vb);
2118:   for (i=0; i<m; i++) {
2119:     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2120:       vv[i] = vb[i];
2121:       if (idx) idx[i] = a->garray[idxb[i]];
2122:     } else {
2123:       vv[i] = va[i];
2124:       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2125:         idx[i] = a->garray[idxb[i]];
2126:     }
2127:   }
2128:   VecRestoreArrayWrite(vA,&vv);
2129:   VecRestoreArrayWrite(vA,&va);
2130:   VecRestoreArrayRead(vB,&vb);
2131:   PetscFree(idxb);
2132:   VecDestroy(&vA);
2133:   VecDestroy(&vB);
2134:   return 0;
2135: }

2137: PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2138: {
2139:   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2140:   PetscInt          m = A->rmap->n,n = A->cmap->n;
2141:   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2142:   PetscInt          *cmap  = mat->garray;
2143:   PetscInt          *diagIdx, *offdiagIdx;
2144:   Vec               diagV, offdiagV;
2145:   PetscScalar       *a, *diagA, *offdiagA;
2146:   const PetscScalar *ba,*bav;
2147:   PetscInt          r,j,col,ncols,*bi,*bj;
2148:   Mat               B = mat->B;
2149:   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;

2151:   /* When a process holds entire A and other processes have no entry */
2152:   if (A->cmap->N == n) {
2153:     VecGetArrayWrite(v,&diagA);
2154:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);
2155:     MatGetRowMinAbs(mat->A,diagV,idx);
2156:     VecDestroy(&diagV);
2157:     VecRestoreArrayWrite(v,&diagA);
2158:     return 0;
2159:   } else if (n == 0) {
2160:     if (m) {
2161:       VecGetArrayWrite(v,&a);
2162:       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2163:       VecRestoreArrayWrite(v,&a);
2164:     }
2165:     return 0;
2166:   }

2168:   PetscMalloc2(m,&diagIdx,m,&offdiagIdx);
2169:   VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2170:   VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2171:   MatGetRowMinAbs(mat->A, diagV, diagIdx);

2173:   /* Get offdiagIdx[] for implicit 0.0 */
2174:   MatSeqAIJGetArrayRead(B,&bav);
2175:   ba   = bav;
2176:   bi   = b->i;
2177:   bj   = b->j;
2178:   VecGetArrayWrite(offdiagV, &offdiagA);
2179:   for (r = 0; r < m; r++) {
2180:     ncols = bi[r+1] - bi[r];
2181:     if (ncols == A->cmap->N - n) { /* Brow is dense */
2182:       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2183:     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2184:       offdiagA[r] = 0.0;

2186:       /* Find first hole in the cmap */
2187:       for (j=0; j<ncols; j++) {
2188:         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2189:         if (col > j && j < cstart) {
2190:           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2191:           break;
2192:         } else if (col > j + n && j >= cstart) {
2193:           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2194:           break;
2195:         }
2196:       }
2197:       if (j == ncols && ncols < A->cmap->N - n) {
2198:         /* a hole is outside compressed Bcols */
2199:         if (ncols == 0) {
2200:           if (cstart) {
2201:             offdiagIdx[r] = 0;
2202:           } else offdiagIdx[r] = cend;
2203:         } else { /* ncols > 0 */
2204:           offdiagIdx[r] = cmap[ncols-1] + 1;
2205:           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2206:         }
2207:       }
2208:     }

2210:     for (j=0; j<ncols; j++) {
2211:       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2212:       ba++; bj++;
2213:     }
2214:   }

2216:   VecGetArrayWrite(v, &a);
2217:   VecGetArrayRead(diagV, (const PetscScalar**)&diagA);
2218:   for (r = 0; r < m; ++r) {
2219:     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2220:       a[r]   = diagA[r];
2221:       if (idx) idx[r] = cstart + diagIdx[r];
2222:     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2223:       a[r] = diagA[r];
2224:       if (idx) {
2225:         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2226:           idx[r] = cstart + diagIdx[r];
2227:         } else idx[r] = offdiagIdx[r];
2228:       }
2229:     } else {
2230:       a[r]   = offdiagA[r];
2231:       if (idx) idx[r] = offdiagIdx[r];
2232:     }
2233:   }
2234:   MatSeqAIJRestoreArrayRead(B,&bav);
2235:   VecRestoreArrayWrite(v, &a);
2236:   VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);
2237:   VecRestoreArrayWrite(offdiagV, &offdiagA);
2238:   VecDestroy(&diagV);
2239:   VecDestroy(&offdiagV);
2240:   PetscFree2(diagIdx, offdiagIdx);
2241:   return 0;
2242: }

2244: PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2245: {
2246:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2247:   PetscInt          m = A->rmap->n,n = A->cmap->n;
2248:   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2249:   PetscInt          *cmap  = mat->garray;
2250:   PetscInt          *diagIdx, *offdiagIdx;
2251:   Vec               diagV, offdiagV;
2252:   PetscScalar       *a, *diagA, *offdiagA;
2253:   const PetscScalar *ba,*bav;
2254:   PetscInt          r,j,col,ncols,*bi,*bj;
2255:   Mat               B = mat->B;
2256:   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;

2258:   /* When a process holds entire A and other processes have no entry */
2259:   if (A->cmap->N == n) {
2260:     VecGetArrayWrite(v,&diagA);
2261:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);
2262:     MatGetRowMin(mat->A,diagV,idx);
2263:     VecDestroy(&diagV);
2264:     VecRestoreArrayWrite(v,&diagA);
2265:     return 0;
2266:   } else if (n == 0) {
2267:     if (m) {
2268:       VecGetArrayWrite(v,&a);
2269:       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2270:       VecRestoreArrayWrite(v,&a);
2271:     }
2272:     return 0;
2273:   }

2275:   PetscCalloc2(m,&diagIdx,m,&offdiagIdx);
2276:   VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2277:   VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2278:   MatGetRowMin(mat->A, diagV, diagIdx);

2280:   /* Get offdiagIdx[] for implicit 0.0 */
2281:   MatSeqAIJGetArrayRead(B,&bav);
2282:   ba   = bav;
2283:   bi   = b->i;
2284:   bj   = b->j;
2285:   VecGetArrayWrite(offdiagV, &offdiagA);
2286:   for (r = 0; r < m; r++) {
2287:     ncols = bi[r+1] - bi[r];
2288:     if (ncols == A->cmap->N - n) { /* Brow is dense */
2289:       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2290:     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2291:       offdiagA[r] = 0.0;

2293:       /* Find first hole in the cmap */
2294:       for (j=0; j<ncols; j++) {
2295:         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2296:         if (col > j && j < cstart) {
2297:           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2298:           break;
2299:         } else if (col > j + n && j >= cstart) {
2300:           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2301:           break;
2302:         }
2303:       }
2304:       if (j == ncols && ncols < A->cmap->N - n) {
2305:         /* a hole is outside compressed Bcols */
2306:         if (ncols == 0) {
2307:           if (cstart) {
2308:             offdiagIdx[r] = 0;
2309:           } else offdiagIdx[r] = cend;
2310:         } else { /* ncols > 0 */
2311:           offdiagIdx[r] = cmap[ncols-1] + 1;
2312:           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2313:         }
2314:       }
2315:     }

2317:     for (j=0; j<ncols; j++) {
2318:       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2319:       ba++; bj++;
2320:     }
2321:   }

2323:   VecGetArrayWrite(v, &a);
2324:   VecGetArrayRead(diagV, (const PetscScalar**)&diagA);
2325:   for (r = 0; r < m; ++r) {
2326:     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2327:       a[r]   = diagA[r];
2328:       if (idx) idx[r] = cstart + diagIdx[r];
2329:     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2330:       a[r] = diagA[r];
2331:       if (idx) {
2332:         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2333:           idx[r] = cstart + diagIdx[r];
2334:         } else idx[r] = offdiagIdx[r];
2335:       }
2336:     } else {
2337:       a[r]   = offdiagA[r];
2338:       if (idx) idx[r] = offdiagIdx[r];
2339:     }
2340:   }
2341:   MatSeqAIJRestoreArrayRead(B,&bav);
2342:   VecRestoreArrayWrite(v, &a);
2343:   VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);
2344:   VecRestoreArrayWrite(offdiagV, &offdiagA);
2345:   VecDestroy(&diagV);
2346:   VecDestroy(&offdiagV);
2347:   PetscFree2(diagIdx, offdiagIdx);
2348:   return 0;
2349: }

2351: PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2352: {
2353:   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2354:   PetscInt          m = A->rmap->n,n = A->cmap->n;
2355:   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2356:   PetscInt          *cmap  = mat->garray;
2357:   PetscInt          *diagIdx, *offdiagIdx;
2358:   Vec               diagV, offdiagV;
2359:   PetscScalar       *a, *diagA, *offdiagA;
2360:   const PetscScalar *ba,*bav;
2361:   PetscInt          r,j,col,ncols,*bi,*bj;
2362:   Mat               B = mat->B;
2363:   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;

2365:   /* When a process holds entire A and other processes have no entry */
2366:   if (A->cmap->N == n) {
2367:     VecGetArrayWrite(v,&diagA);
2368:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);
2369:     MatGetRowMax(mat->A,diagV,idx);
2370:     VecDestroy(&diagV);
2371:     VecRestoreArrayWrite(v,&diagA);
2372:     return 0;
2373:   } else if (n == 0) {
2374:     if (m) {
2375:       VecGetArrayWrite(v,&a);
2376:       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2377:       VecRestoreArrayWrite(v,&a);
2378:     }
2379:     return 0;
2380:   }

2382:   PetscMalloc2(m,&diagIdx,m,&offdiagIdx);
2383:   VecCreateSeq(PETSC_COMM_SELF, m, &diagV);
2384:   VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);
2385:   MatGetRowMax(mat->A, diagV, diagIdx);

2387:   /* Get offdiagIdx[] for implicit 0.0 */
2388:   MatSeqAIJGetArrayRead(B,&bav);
2389:   ba   = bav;
2390:   bi   = b->i;
2391:   bj   = b->j;
2392:   VecGetArrayWrite(offdiagV, &offdiagA);
2393:   for (r = 0; r < m; r++) {
2394:     ncols = bi[r+1] - bi[r];
2395:     if (ncols == A->cmap->N - n) { /* Brow is dense */
2396:       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2397:     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398:       offdiagA[r] = 0.0;

2400:       /* Find first hole in the cmap */
2401:       for (j=0; j<ncols; j++) {
2402:         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403:         if (col > j && j < cstart) {
2404:           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405:           break;
2406:         } else if (col > j + n && j >= cstart) {
2407:           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408:           break;
2409:         }
2410:       }
2411:       if (j == ncols && ncols < A->cmap->N - n) {
2412:         /* a hole is outside compressed Bcols */
2413:         if (ncols == 0) {
2414:           if (cstart) {
2415:             offdiagIdx[r] = 0;
2416:           } else offdiagIdx[r] = cend;
2417:         } else { /* ncols > 0 */
2418:           offdiagIdx[r] = cmap[ncols-1] + 1;
2419:           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420:         }
2421:       }
2422:     }

2424:     for (j=0; j<ncols; j++) {
2425:       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2426:       ba++; bj++;
2427:     }
2428:   }

2430:   VecGetArrayWrite(v,    &a);
2431:   VecGetArrayRead(diagV,(const PetscScalar**)&diagA);
2432:   for (r = 0; r < m; ++r) {
2433:     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2434:       a[r] = diagA[r];
2435:       if (idx) idx[r] = cstart + diagIdx[r];
2436:     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2437:       a[r] = diagA[r];
2438:       if (idx) {
2439:         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2440:           idx[r] = cstart + diagIdx[r];
2441:         } else idx[r] = offdiagIdx[r];
2442:       }
2443:     } else {
2444:       a[r] = offdiagA[r];
2445:       if (idx) idx[r] = offdiagIdx[r];
2446:     }
2447:   }
2448:   MatSeqAIJRestoreArrayRead(B,&bav);
2449:   VecRestoreArrayWrite(v,       &a);
2450:   VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);
2451:   VecRestoreArrayWrite(offdiagV,&offdiagA);
2452:   VecDestroy(&diagV);
2453:   VecDestroy(&offdiagV);
2454:   PetscFree2(diagIdx, offdiagIdx);
2455:   return 0;
2456: }

2458: PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2459: {
2460:   Mat            *dummy;

2462:   MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);
2463:   *newmat = *dummy;
2464:   PetscFree(dummy);
2465:   return 0;
2466: }

2468: PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2469: {
2470:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;

2472:   MatInvertBlockDiagonal(a->A,values);
2473:   A->factorerrortype = a->A->factorerrortype;
2474:   return 0;
2475: }

2477: static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2478: {
2479:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;

2482:   MatSetRandom(aij->A,rctx);
2483:   if (x->assembled) {
2484:     MatSetRandom(aij->B,rctx);
2485:   } else {
2486:     MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);
2487:   }
2488:   MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);
2489:   MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);
2490:   return 0;
2491: }

2493: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2494: {
2495:   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2496:   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2497:   return 0;
2498: }

2500: /*@
2501:    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap

2503:    Collective on Mat

2505:    Input Parameters:
2506: +    A - the matrix
2507: -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)

2509:  Level: advanced

2511: @*/
2512: PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2513: {
2514:   PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));
2515:   return 0;
2516: }

2518: PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2519: {
2520:   PetscBool            sc = PETSC_FALSE,flg;

2522:   PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");
2523:   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2524:   PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);
2525:   if (flg) {
2526:     MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);
2527:   }
2528:   PetscOptionsTail();
2529:   return 0;
2530: }

2532: PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2533: {
2534:   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2535:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;

2537:   if (!Y->preallocated) {
2538:     MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);
2539:   } else if (!aij->nz) {
2540:     PetscInt nonew = aij->nonew;
2541:     MatSeqAIJSetPreallocation(maij->A,1,NULL);
2542:     aij->nonew = nonew;
2543:   }
2544:   MatShift_Basic(Y,a);
2545:   return 0;
2546: }

2548: PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2549: {
2550:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2553:   MatMissingDiagonal(a->A,missing,d);
2554:   if (d) {
2555:     PetscInt rstart;
2556:     MatGetOwnershipRange(A,&rstart,NULL);
2557:     *d += rstart;

2559:   }
2560:   return 0;
2561: }

2563: PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2564: {
2565:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;

2567:   MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);
2568:   return 0;
2569: }

2571: /* -------------------------------------------------------------------*/
2572: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2573:                                        MatGetRow_MPIAIJ,
2574:                                        MatRestoreRow_MPIAIJ,
2575:                                        MatMult_MPIAIJ,
2576:                                 /* 4*/ MatMultAdd_MPIAIJ,
2577:                                        MatMultTranspose_MPIAIJ,
2578:                                        MatMultTransposeAdd_MPIAIJ,
2579:                                        NULL,
2580:                                        NULL,
2581:                                        NULL,
2582:                                 /*10*/ NULL,
2583:                                        NULL,
2584:                                        NULL,
2585:                                        MatSOR_MPIAIJ,
2586:                                        MatTranspose_MPIAIJ,
2587:                                 /*15*/ MatGetInfo_MPIAIJ,
2588:                                        MatEqual_MPIAIJ,
2589:                                        MatGetDiagonal_MPIAIJ,
2590:                                        MatDiagonalScale_MPIAIJ,
2591:                                        MatNorm_MPIAIJ,
2592:                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2593:                                        MatAssemblyEnd_MPIAIJ,
2594:                                        MatSetOption_MPIAIJ,
2595:                                        MatZeroEntries_MPIAIJ,
2596:                                 /*24*/ MatZeroRows_MPIAIJ,
2597:                                        NULL,
2598:                                        NULL,
2599:                                        NULL,
2600:                                        NULL,
2601:                                 /*29*/ MatSetUp_MPIAIJ,
2602:                                        NULL,
2603:                                        NULL,
2604:                                        MatGetDiagonalBlock_MPIAIJ,
2605:                                        NULL,
2606:                                 /*34*/ MatDuplicate_MPIAIJ,
2607:                                        NULL,
2608:                                        NULL,
2609:                                        NULL,
2610:                                        NULL,
2611:                                 /*39*/ MatAXPY_MPIAIJ,
2612:                                        MatCreateSubMatrices_MPIAIJ,
2613:                                        MatIncreaseOverlap_MPIAIJ,
2614:                                        MatGetValues_MPIAIJ,
2615:                                        MatCopy_MPIAIJ,
2616:                                 /*44*/ MatGetRowMax_MPIAIJ,
2617:                                        MatScale_MPIAIJ,
2618:                                        MatShift_MPIAIJ,
2619:                                        MatDiagonalSet_MPIAIJ,
2620:                                        MatZeroRowsColumns_MPIAIJ,
2621:                                 /*49*/ MatSetRandom_MPIAIJ,
2622:                                        NULL,
2623:                                        NULL,
2624:                                        NULL,
2625:                                        NULL,
2626:                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2627:                                        NULL,
2628:                                        MatSetUnfactored_MPIAIJ,
2629:                                        MatPermute_MPIAIJ,
2630:                                        NULL,
2631:                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2632:                                        MatDestroy_MPIAIJ,
2633:                                        MatView_MPIAIJ,
2634:                                        NULL,
2635:                                        NULL,
2636:                                 /*64*/ NULL,
2637:                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2638:                                        NULL,
2639:                                        NULL,
2640:                                        NULL,
2641:                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2642:                                        MatGetRowMinAbs_MPIAIJ,
2643:                                        NULL,
2644:                                        NULL,
2645:                                        NULL,
2646:                                        NULL,
2647:                                 /*75*/ MatFDColoringApply_AIJ,
2648:                                        MatSetFromOptions_MPIAIJ,
2649:                                        NULL,
2650:                                        NULL,
2651:                                        MatFindZeroDiagonals_MPIAIJ,
2652:                                 /*80*/ NULL,
2653:                                        NULL,
2654:                                        NULL,
2655:                                 /*83*/ MatLoad_MPIAIJ,
2656:                                        MatIsSymmetric_MPIAIJ,
2657:                                        NULL,
2658:                                        NULL,
2659:                                        NULL,
2660:                                        NULL,
2661:                                 /*89*/ NULL,
2662:                                        NULL,
2663:                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2664:                                        NULL,
2665:                                        NULL,
2666:                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2667:                                        NULL,
2668:                                        NULL,
2669:                                        NULL,
2670:                                        MatBindToCPU_MPIAIJ,
2671:                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2672:                                        NULL,
2673:                                        NULL,
2674:                                        MatConjugate_MPIAIJ,
2675:                                        NULL,
2676:                                 /*104*/MatSetValuesRow_MPIAIJ,
2677:                                        MatRealPart_MPIAIJ,
2678:                                        MatImaginaryPart_MPIAIJ,
2679:                                        NULL,
2680:                                        NULL,
2681:                                 /*109*/NULL,
2682:                                        NULL,
2683:                                        MatGetRowMin_MPIAIJ,
2684:                                        NULL,
2685:                                        MatMissingDiagonal_MPIAIJ,
2686:                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2687:                                        NULL,
2688:                                        MatGetGhosts_MPIAIJ,
2689:                                        NULL,
2690:                                        NULL,
2691:                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2692:                                        NULL,
2693:                                        NULL,
2694:                                        NULL,
2695:                                        MatGetMultiProcBlock_MPIAIJ,
2696:                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2697:                                        MatGetColumnReductions_MPIAIJ,
2698:                                        MatInvertBlockDiagonal_MPIAIJ,
2699:                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2700:                                        MatCreateSubMatricesMPI_MPIAIJ,
2701:                                 /*129*/NULL,
2702:                                        NULL,
2703:                                        NULL,
2704:                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2705:                                        NULL,
2706:                                 /*134*/NULL,
2707:                                        NULL,
2708:                                        NULL,
2709:                                        NULL,
2710:                                        NULL,
2711:                                 /*139*/MatSetBlockSizes_MPIAIJ,
2712:                                        NULL,
2713:                                        NULL,
2714:                                        MatFDColoringSetUp_MPIXAIJ,
2715:                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2716:                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2717:                                 /*145*/NULL,
2718:                                        NULL,
2719:                                        NULL
2720: };

2722: /* ----------------------------------------------------------------------------------------*/

2724: PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2725: {
2726:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;

2728:   MatStoreValues(aij->A);
2729:   MatStoreValues(aij->B);
2730:   return 0;
2731: }

2733: PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2734: {
2735:   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;

2737:   MatRetrieveValues(aij->A);
2738:   MatRetrieveValues(aij->B);
2739:   return 0;
2740: }

2742: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2743: {
2744:   Mat_MPIAIJ     *b;
2745:   PetscMPIInt    size;

2747:   PetscLayoutSetUp(B->rmap);
2748:   PetscLayoutSetUp(B->cmap);
2749:   b = (Mat_MPIAIJ*)B->data;

2751: #if defined(PETSC_USE_CTABLE)
2752:   PetscTableDestroy(&b->colmap);
2753: #else
2754:   PetscFree(b->colmap);
2755: #endif
2756:   PetscFree(b->garray);
2757:   VecDestroy(&b->lvec);
2758:   VecScatterDestroy(&b->Mvctx);

2760:   /* Because the B will have been resized we simply destroy it and create a new one each time */
2761:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);
2762:   MatDestroy(&b->B);
2763:   MatCreate(PETSC_COMM_SELF,&b->B);
2764:   MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);
2765:   MatSetBlockSizesFromMats(b->B,B,B);
2766:   MatSetType(b->B,MATSEQAIJ);
2767:   PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);

2769:   if (!B->preallocated) {
2770:     MatCreate(PETSC_COMM_SELF,&b->A);
2771:     MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2772:     MatSetBlockSizesFromMats(b->A,B,B);
2773:     MatSetType(b->A,MATSEQAIJ);
2774:     PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);
2775:   }

2777:   MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
2778:   MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
2779:   B->preallocated  = PETSC_TRUE;
2780:   B->was_assembled = PETSC_FALSE;
2781:   B->assembled     = PETSC_FALSE;
2782:   return 0;
2783: }

2785: PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2786: {
2787:   Mat_MPIAIJ     *b;

2790:   PetscLayoutSetUp(B->rmap);
2791:   PetscLayoutSetUp(B->cmap);
2792:   b = (Mat_MPIAIJ*)B->data;

2794: #if defined(PETSC_USE_CTABLE)
2795:   PetscTableDestroy(&b->colmap);
2796: #else
2797:   PetscFree(b->colmap);
2798: #endif
2799:   PetscFree(b->garray);
2800:   VecDestroy(&b->lvec);
2801:   VecScatterDestroy(&b->Mvctx);

2803:   MatResetPreallocation(b->A);
2804:   MatResetPreallocation(b->B);
2805:   B->preallocated  = PETSC_TRUE;
2806:   B->was_assembled = PETSC_FALSE;
2807:   B->assembled = PETSC_FALSE;
2808:   return 0;
2809: }

2811: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2812: {
2813:   Mat            mat;
2814:   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;

2816:   *newmat = NULL;
2817:   MatCreate(PetscObjectComm((PetscObject)matin),&mat);
2818:   MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
2819:   MatSetBlockSizesFromMats(mat,matin,matin);
2820:   MatSetType(mat,((PetscObject)matin)->type_name);
2821:   a       = (Mat_MPIAIJ*)mat->data;

2823:   mat->factortype   = matin->factortype;
2824:   mat->assembled    = matin->assembled;
2825:   mat->insertmode   = NOT_SET_VALUES;
2826:   mat->preallocated = matin->preallocated;

2828:   a->size         = oldmat->size;
2829:   a->rank         = oldmat->rank;
2830:   a->donotstash   = oldmat->donotstash;
2831:   a->roworiented  = oldmat->roworiented;
2832:   a->rowindices   = NULL;
2833:   a->rowvalues    = NULL;
2834:   a->getrowactive = PETSC_FALSE;

2836:   PetscLayoutReference(matin->rmap,&mat->rmap);
2837:   PetscLayoutReference(matin->cmap,&mat->cmap);

2839:   if (oldmat->colmap) {
2840: #if defined(PETSC_USE_CTABLE)
2841:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2842: #else
2843:     PetscMalloc1(mat->cmap->N,&a->colmap);
2844:     PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));
2845:     PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);
2846: #endif
2847:   } else a->colmap = NULL;
2848:   if (oldmat->garray) {
2849:     PetscInt len;
2850:     len  = oldmat->B->cmap->n;
2851:     PetscMalloc1(len+1,&a->garray);
2852:     PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));
2853:     if (len) PetscArraycpy(a->garray,oldmat->garray,len);
2854:   } else a->garray = NULL;

2856:   /* It may happen MatDuplicate is called with a non-assembled matrix
2857:      In fact, MatDuplicate only requires the matrix to be preallocated
2858:      This may happen inside a DMCreateMatrix_Shell */
2859:   if (oldmat->lvec) {
2860:     VecDuplicate(oldmat->lvec,&a->lvec);
2861:     PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);
2862:   }
2863:   if (oldmat->Mvctx) {
2864:     VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2865:     PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);
2866:   }
2867:   MatDuplicate(oldmat->A,cpvalues,&a->A);
2868:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2869:   MatDuplicate(oldmat->B,cpvalues,&a->B);
2870:   PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);
2871:   PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
2872:   *newmat = mat;
2873:   return 0;
2874: }

2876: PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2877: {
2878:   PetscBool      isbinary, ishdf5;

2882:   /* force binary viewer to load .info file if it has not yet done so */
2883:   PetscViewerSetUp(viewer);
2884:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2885:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);
2886:   if (isbinary) {
2887:     MatLoad_MPIAIJ_Binary(newMat,viewer);
2888:   } else if (ishdf5) {
2889: #if defined(PETSC_HAVE_HDF5)
2890:     MatLoad_AIJ_HDF5(newMat,viewer);
2891: #else
2892:     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2893: #endif
2894:   } else {
2895:     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2896:   }
2897:   return 0;
2898: }

2900: PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
2901: {
2902:   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
2903:   PetscInt       *rowidxs,*colidxs;
2904:   PetscScalar    *matvals;

2906:   PetscViewerSetUp(viewer);

2908:   /* read in matrix header */
2909:   PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);
2911:   M  = header[1]; N = header[2]; nz = header[3];

2916:   /* set block sizes from the viewer's .info file */
2917:   MatLoad_Binary_BlockSizes(mat,viewer);
2918:   /* set global sizes if not set already */
2919:   if (mat->rmap->N < 0) mat->rmap->N = M;
2920:   if (mat->cmap->N < 0) mat->cmap->N = N;
2921:   PetscLayoutSetUp(mat->rmap);
2922:   PetscLayoutSetUp(mat->cmap);

2924:   /* check if the matrix sizes are correct */
2925:   MatGetSize(mat,&rows,&cols);

2928:   /* read in row lengths and build row indices */
2929:   MatGetLocalSize(mat,&m,NULL);
2930:   PetscMalloc1(m+1,&rowidxs);
2931:   PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);
2932:   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
2933:   MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));
2935:   /* read in column indices and matrix values */
2936:   PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);
2937:   PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);
2938:   PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);
2939:   /* store matrix indices and values */
2940:   MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);
2941:   PetscFree(rowidxs);
2942:   PetscFree2(colidxs,matvals);
2943:   return 0;
2944: }

2946: /* Not scalable because of ISAllGather() unless getting all columns. */
2947: PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2948: {
2949:   IS             iscol_local;
2950:   PetscBool      isstride;
2951:   PetscMPIInt    lisstride=0,gisstride;

2953:   /* check if we are grabbing all columns*/
2954:   PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);

2956:   if (isstride) {
2957:     PetscInt  start,len,mstart,mlen;
2958:     ISStrideGetInfo(iscol,&start,NULL);
2959:     ISGetLocalSize(iscol,&len);
2960:     MatGetOwnershipRangeColumn(mat,&mstart,&mlen);
2961:     if (mstart == start && mlen-mstart == len) lisstride = 1;
2962:   }

2964:   MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));
2965:   if (gisstride) {
2966:     PetscInt N;
2967:     MatGetSize(mat,NULL,&N);
2968:     ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);
2969:     ISSetIdentity(iscol_local);
2970:     PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");
2971:   } else {
2972:     PetscInt cbs;
2973:     ISGetBlockSize(iscol,&cbs);
2974:     ISAllGather(iscol,&iscol_local);
2975:     ISSetBlockSize(iscol_local,cbs);
2976:   }

2978:   *isseq = iscol_local;
2979:   return 0;
2980: }

2982: /*
2983:  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2984:  (see MatCreateSubMatrix_MPIAIJ_nonscalable)

2986:  Input Parameters:
2987:    mat - matrix
2988:    isrow - parallel row index set; its local indices are a subset of local columns of mat,
2989:            i.e., mat->rstart <= isrow[i] < mat->rend
2990:    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2991:            i.e., mat->cstart <= iscol[i] < mat->cend
2992:  Output Parameter:
2993:    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
2994:    iscol_o - sequential column index set for retrieving mat->B
2995:    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
2996:  */
2997: PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
2998: {
2999:   Vec            x,cmap;
3000:   const PetscInt *is_idx;
3001:   PetscScalar    *xarray,*cmaparray;
3002:   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3003:   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3004:   Mat            B=a->B;
3005:   Vec            lvec=a->lvec,lcmap;
3006:   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3007:   MPI_Comm       comm;
3008:   VecScatter     Mvctx=a->Mvctx;

3010:   PetscObjectGetComm((PetscObject)mat,&comm);
3011:   ISGetLocalSize(iscol,&ncols);

3013:   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3014:   MatCreateVecs(mat,&x,NULL);
3015:   VecSet(x,-1.0);
3016:   VecDuplicate(x,&cmap);
3017:   VecSet(cmap,-1.0);

3019:   /* Get start indices */
3020:   MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);
3021:   isstart -= ncols;
3022:   MatGetOwnershipRangeColumn(mat,&cstart,&cend);

3024:   ISGetIndices(iscol,&is_idx);
3025:   VecGetArray(x,&xarray);
3026:   VecGetArray(cmap,&cmaparray);
3027:   PetscMalloc1(ncols,&idx);
3028:   for (i=0; i<ncols; i++) {
3029:     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3030:     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3031:     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3032:   }
3033:   VecRestoreArray(x,&xarray);
3034:   VecRestoreArray(cmap,&cmaparray);
3035:   ISRestoreIndices(iscol,&is_idx);

3037:   /* Get iscol_d */
3038:   ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);
3039:   ISGetBlockSize(iscol,&i);
3040:   ISSetBlockSize(*iscol_d,i);

3042:   /* Get isrow_d */
3043:   ISGetLocalSize(isrow,&m);
3044:   rstart = mat->rmap->rstart;
3045:   PetscMalloc1(m,&idx);
3046:   ISGetIndices(isrow,&is_idx);
3047:   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3048:   ISRestoreIndices(isrow,&is_idx);

3050:   ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);
3051:   ISGetBlockSize(isrow,&i);
3052:   ISSetBlockSize(*isrow_d,i);

3054:   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3055:   VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);
3056:   VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);

3058:   VecDuplicate(lvec,&lcmap);

3060:   VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);
3061:   VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);

3063:   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3064:   /* off-process column indices */
3065:   count = 0;
3066:   PetscMalloc1(Bn,&idx);
3067:   PetscMalloc1(Bn,&cmap1);

3069:   VecGetArray(lvec,&xarray);
3070:   VecGetArray(lcmap,&cmaparray);
3071:   for (i=0; i<Bn; i++) {
3072:     if (PetscRealPart(xarray[i]) > -1.0) {
3073:       idx[count]     = i;                   /* local column index in off-diagonal part B */
3074:       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3075:       count++;
3076:     }
3077:   }
3078:   VecRestoreArray(lvec,&xarray);
3079:   VecRestoreArray(lcmap,&cmaparray);

3081:   ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);
3082:   /* cannot ensure iscol_o has same blocksize as iscol! */

3084:   PetscFree(idx);
3085:   *garray = cmap1;

3087:   VecDestroy(&x);
3088:   VecDestroy(&cmap);
3089:   VecDestroy(&lcmap);
3090:   return 0;
3091: }

3093: /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3094: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3095: {
3096:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3097:   Mat            M = NULL;
3098:   MPI_Comm       comm;
3099:   IS             iscol_d,isrow_d,iscol_o;
3100:   Mat            Asub = NULL,Bsub = NULL;
3101:   PetscInt       n;

3103:   PetscObjectGetComm((PetscObject)mat,&comm);

3105:   if (call == MAT_REUSE_MATRIX) {
3106:     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3107:     PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);

3110:     PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);

3113:     PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);

3116:     /* Update diagonal and off-diagonal portions of submat */
3117:     asub = (Mat_MPIAIJ*)(*submat)->data;
3118:     MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);
3119:     ISGetLocalSize(iscol_o,&n);
3120:     if (n) {
3121:       MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);
3122:     }
3123:     MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);
3124:     MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);

3126:   } else { /* call == MAT_INITIAL_MATRIX) */
3127:     const PetscInt *garray;
3128:     PetscInt        BsubN;

3130:     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3131:     ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);

3133:     /* Create local submatrices Asub and Bsub */
3134:     MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);
3135:     MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);

3137:     /* Create submatrix M */
3138:     MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);

3140:     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3141:     asub = (Mat_MPIAIJ*)M->data;

3143:     ISGetLocalSize(iscol_o,&BsubN);
3144:     n = asub->B->cmap->N;
3145:     if (BsubN > n) {
3146:       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3147:       const PetscInt *idx;
3148:       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3149:       PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN);

3151:       PetscMalloc1(n,&idx_new);
3152:       j = 0;
3153:       ISGetIndices(iscol_o,&idx);
3154:       for (i=0; i<n; i++) {
3155:         if (j >= BsubN) break;
3156:         while (subgarray[i] > garray[j]) j++;

3158:         if (subgarray[i] == garray[j]) {
3159:           idx_new[i] = idx[j++];
3160:         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]);
3161:       }
3162:       ISRestoreIndices(iscol_o,&idx);

3164:       ISDestroy(&iscol_o);
3165:       ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);

3167:     } else if (BsubN < n) {
3168:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N);
3169:     }

3171:     PetscFree(garray);
3172:     *submat = M;

3174:     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3175:     PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);
3176:     ISDestroy(&isrow_d);

3178:     PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);
3179:     ISDestroy(&iscol_d);

3181:     PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);
3182:     ISDestroy(&iscol_o);
3183:   }
3184:   return 0;
3185: }

3187: PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3188: {
3189:   IS             iscol_local=NULL,isrow_d;
3190:   PetscInt       csize;
3191:   PetscInt       n,i,j,start,end;
3192:   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3193:   MPI_Comm       comm;

3195:   /* If isrow has same processor distribution as mat,
3196:      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3197:   if (call == MAT_REUSE_MATRIX) {
3198:     PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);
3199:     if (isrow_d) {
3200:       sameRowDist  = PETSC_TRUE;
3201:       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3202:     } else {
3203:       PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);
3204:       if (iscol_local) {
3205:         sameRowDist  = PETSC_TRUE;
3206:         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3207:       }
3208:     }
3209:   } else {
3210:     /* Check if isrow has same processor distribution as mat */
3211:     sameDist[0] = PETSC_FALSE;
3212:     ISGetLocalSize(isrow,&n);
3213:     if (!n) {
3214:       sameDist[0] = PETSC_TRUE;
3215:     } else {
3216:       ISGetMinMax(isrow,&i,&j);
3217:       MatGetOwnershipRange(mat,&start,&end);
3218:       if (i >= start && j < end) {
3219:         sameDist[0] = PETSC_TRUE;
3220:       }
3221:     }

3223:     /* Check if iscol has same processor distribution as mat */
3224:     sameDist[1] = PETSC_FALSE;
3225:     ISGetLocalSize(iscol,&n);
3226:     if (!n) {
3227:       sameDist[1] = PETSC_TRUE;
3228:     } else {
3229:       ISGetMinMax(iscol,&i,&j);
3230:       MatGetOwnershipRangeColumn(mat,&start,&end);
3231:       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3232:     }

3234:     PetscObjectGetComm((PetscObject)mat,&comm);
3235:     MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);
3236:     sameRowDist = tsameDist[0];
3237:   }

3239:   if (sameRowDist) {
3240:     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3241:       /* isrow and iscol have same processor distribution as mat */
3242:       MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);
3243:       return 0;
3244:     } else { /* sameRowDist */
3245:       /* isrow has same processor distribution as mat */
3246:       if (call == MAT_INITIAL_MATRIX) {
3247:         PetscBool sorted;
3248:         ISGetSeqIS_Private(mat,iscol,&iscol_local);
3249:         ISGetLocalSize(iscol_local,&n); /* local size of iscol_local = global columns of newmat */
3250:         ISGetSize(iscol,&i);

3253:         ISSorted(iscol_local,&sorted);
3254:         if (sorted) {
3255:           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3256:           MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);
3257:           return 0;
3258:         }
3259:       } else { /* call == MAT_REUSE_MATRIX */
3260:         IS iscol_sub;
3261:         PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);
3262:         if (iscol_sub) {
3263:           MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);
3264:           return 0;
3265:         }
3266:       }
3267:     }
3268:   }

3270:   /* General case: iscol -> iscol_local which has global size of iscol */
3271:   if (call == MAT_REUSE_MATRIX) {
3272:     PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
3274:   } else {
3275:     if (!iscol_local) {
3276:       ISGetSeqIS_Private(mat,iscol,&iscol_local);
3277:     }
3278:   }

3280:   ISGetLocalSize(iscol,&csize);
3281:   MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);

3283:   if (call == MAT_INITIAL_MATRIX) {
3284:     PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
3285:     ISDestroy(&iscol_local);
3286:   }
3287:   return 0;
3288: }

3290: /*@C
3291:      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3292:          and "off-diagonal" part of the matrix in CSR format.

3294:    Collective

3296:    Input Parameters:
3297: +  comm - MPI communicator
3298: .  A - "diagonal" portion of matrix
3299: .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3300: -  garray - global index of B columns

3302:    Output Parameter:
3303: .   mat - the matrix, with input A as its local diagonal matrix
3304:    Level: advanced

3306:    Notes:
3307:        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3308:        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.

3310: .seealso: MatCreateMPIAIJWithSplitArrays()
3311: @*/
3312: PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3313: {
3314:   Mat_MPIAIJ        *maij;
3315:   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3316:   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3317:   const PetscScalar *oa;
3318:   Mat               Bnew;
3319:   PetscInt          m,n,N;
3320:   MatType           mpi_mat_type;

3322:   MatCreate(comm,mat);
3323:   MatGetSize(A,&m,&n);
3326:   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */

3329:   /* Get global columns of mat */
3330:   MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);

3332:   MatSetSizes(*mat,m,n,PETSC_DECIDE,N);
3333:   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3334:   MatGetMPIMatType_Private(A,&mpi_mat_type);
3335:   MatSetType(*mat,mpi_mat_type);

3337:   MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);
3338:   maij = (Mat_MPIAIJ*)(*mat)->data;

3340:   (*mat)->preallocated = PETSC_TRUE;

3342:   PetscLayoutSetUp((*mat)->rmap);
3343:   PetscLayoutSetUp((*mat)->cmap);

3345:   /* Set A as diagonal portion of *mat */
3346:   maij->A = A;

3348:   nz = oi[m];
3349:   for (i=0; i<nz; i++) {
3350:     col   = oj[i];
3351:     oj[i] = garray[col];
3352:   }

3354:   /* Set Bnew as off-diagonal portion of *mat */
3355:   MatSeqAIJGetArrayRead(B,&oa);
3356:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);
3357:   MatSeqAIJRestoreArrayRead(B,&oa);
3358:   bnew        = (Mat_SeqAIJ*)Bnew->data;
3359:   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3360:   maij->B     = Bnew;


3364:   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3365:   b->free_a       = PETSC_FALSE;
3366:   b->free_ij      = PETSC_FALSE;
3367:   MatDestroy(&B);

3369:   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3370:   bnew->free_a       = PETSC_TRUE;
3371:   bnew->free_ij      = PETSC_TRUE;

3373:   /* condense columns of maij->B */
3374:   MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
3375:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3376:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3377:   MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);
3378:   MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3379:   return 0;
3380: }

3382: extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);

3384: PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3385: {
3386:   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3387:   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3388:   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3389:   Mat            M,Msub,B=a->B;
3390:   MatScalar      *aa;
3391:   Mat_SeqAIJ     *aij;
3392:   PetscInt       *garray = a->garray,*colsub,Ncols;
3393:   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3394:   IS             iscol_sub,iscmap;
3395:   const PetscInt *is_idx,*cmap;
3396:   PetscBool      allcolumns=PETSC_FALSE;
3397:   MPI_Comm       comm;

3399:   PetscObjectGetComm((PetscObject)mat,&comm);
3400:   if (call == MAT_REUSE_MATRIX) {
3401:     PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);
3403:     ISGetLocalSize(iscol_sub,&count);

3405:     PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);

3408:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);

3411:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);

3413:   } else { /* call == MAT_INITIAL_MATRIX) */
3414:     PetscBool flg;

3416:     ISGetLocalSize(iscol,&n);
3417:     ISGetSize(iscol,&Ncols);

3419:     /* (1) iscol -> nonscalable iscol_local */
3420:     /* Check for special case: each processor gets entire matrix columns */
3421:     ISIdentity(iscol_local,&flg);
3422:     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3423:     MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));
3424:     if (allcolumns) {
3425:       iscol_sub = iscol_local;
3426:       PetscObjectReference((PetscObject)iscol_local);
3427:       ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);

3429:     } else {
3430:       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3431:       PetscInt *idx,*cmap1,k;
3432:       PetscMalloc1(Ncols,&idx);
3433:       PetscMalloc1(Ncols,&cmap1);
3434:       ISGetIndices(iscol_local,&is_idx);
3435:       count = 0;
3436:       k     = 0;
3437:       for (i=0; i<Ncols; i++) {
3438:         j = is_idx[i];
3439:         if (j >= cstart && j < cend) {
3440:           /* diagonal part of mat */
3441:           idx[count]     = j;
3442:           cmap1[count++] = i; /* column index in submat */
3443:         } else if (Bn) {
3444:           /* off-diagonal part of mat */
3445:           if (j == garray[k]) {
3446:             idx[count]     = j;
3447:             cmap1[count++] = i;  /* column index in submat */
3448:           } else if (j > garray[k]) {
3449:             while (j > garray[k] && k < Bn-1) k++;
3450:             if (j == garray[k]) {
3451:               idx[count]     = j;
3452:               cmap1[count++] = i; /* column index in submat */
3453:             }
3454:           }
3455:         }
3456:       }
3457:       ISRestoreIndices(iscol_local,&is_idx);

3459:       ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);
3460:       ISGetBlockSize(iscol,&cbs);
3461:       ISSetBlockSize(iscol_sub,cbs);

3463:       ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);
3464:     }

3466:     /* (3) Create sequential Msub */
3467:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);
3468:   }

3470:   ISGetLocalSize(iscol_sub,&count);
3471:   aij  = (Mat_SeqAIJ*)(Msub)->data;
3472:   ii   = aij->i;
3473:   ISGetIndices(iscmap,&cmap);

3475:   /*
3476:       m - number of local rows
3477:       Ncols - number of columns (same on all processors)
3478:       rstart - first row in new global matrix generated
3479:   */
3480:   MatGetSize(Msub,&m,NULL);

3482:   if (call == MAT_INITIAL_MATRIX) {
3483:     /* (4) Create parallel newmat */
3484:     PetscMPIInt    rank,size;
3485:     PetscInt       csize;

3487:     MPI_Comm_size(comm,&size);
3488:     MPI_Comm_rank(comm,&rank);

3490:     /*
3491:         Determine the number of non-zeros in the diagonal and off-diagonal
3492:         portions of the matrix in order to do correct preallocation
3493:     */

3495:     /* first get start and end of "diagonal" columns */
3496:     ISGetLocalSize(iscol,&csize);
3497:     if (csize == PETSC_DECIDE) {
3498:       ISGetSize(isrow,&mglobal);
3499:       if (mglobal == Ncols) { /* square matrix */
3500:         nlocal = m;
3501:       } else {
3502:         nlocal = Ncols/size + ((Ncols % size) > rank);
3503:       }
3504:     } else {
3505:       nlocal = csize;
3506:     }
3507:     MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3508:     rstart = rend - nlocal;

3511:     /* next, compute all the lengths */
3512:     jj    = aij->j;
3513:     PetscMalloc1(2*m+1,&dlens);
3514:     olens = dlens + m;
3515:     for (i=0; i<m; i++) {
3516:       jend = ii[i+1] - ii[i];
3517:       olen = 0;
3518:       dlen = 0;
3519:       for (j=0; j<jend; j++) {
3520:         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3521:         else dlen++;
3522:         jj++;
3523:       }
3524:       olens[i] = olen;
3525:       dlens[i] = dlen;
3526:     }

3528:     ISGetBlockSize(isrow,&bs);
3529:     ISGetBlockSize(iscol,&cbs);

3531:     MatCreate(comm,&M);
3532:     MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);
3533:     MatSetBlockSizes(M,bs,cbs);
3534:     MatSetType(M,((PetscObject)mat)->type_name);
3535:     MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3536:     PetscFree(dlens);

3538:   } else { /* call == MAT_REUSE_MATRIX */
3539:     M    = *newmat;
3540:     MatGetLocalSize(M,&i,NULL);
3542:     MatZeroEntries(M);
3543:     /*
3544:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3545:        rather than the slower MatSetValues().
3546:     */
3547:     M->was_assembled = PETSC_TRUE;
3548:     M->assembled     = PETSC_FALSE;
3549:   }

3551:   /* (5) Set values of Msub to *newmat */
3552:   PetscMalloc1(count,&colsub);
3553:   MatGetOwnershipRange(M,&rstart,NULL);

3555:   jj   = aij->j;
3556:   MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);
3557:   for (i=0; i<m; i++) {
3558:     row = rstart + i;
3559:     nz  = ii[i+1] - ii[i];
3560:     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3561:     MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);
3562:     jj += nz; aa += nz;
3563:   }
3564:   MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);
3565:   ISRestoreIndices(iscmap,&cmap);

3567:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3568:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);

3570:   PetscFree(colsub);

3572:   /* save Msub, iscol_sub and iscmap used in processor for next request */
3573:   if (call == MAT_INITIAL_MATRIX) {
3574:     *newmat = M;
3575:     PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);
3576:     MatDestroy(&Msub);

3578:     PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);
3579:     ISDestroy(&iscol_sub);

3581:     PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);
3582:     ISDestroy(&iscmap);

3584:     if (iscol_local) {
3585:       PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);
3586:       ISDestroy(&iscol_local);
3587:     }
3588:   }
3589:   return 0;
3590: }

3592: /*
3593:     Not great since it makes two copies of the submatrix, first an SeqAIJ
3594:   in local and then by concatenating the local matrices the end result.
3595:   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()

3597:   Note: This requires a sequential iscol with all indices.
3598: */
3599: PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3600: {
3601:   PetscMPIInt    rank,size;
3602:   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3603:   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3604:   Mat            M,Mreuse;
3605:   MatScalar      *aa,*vwork;
3606:   MPI_Comm       comm;
3607:   Mat_SeqAIJ     *aij;
3608:   PetscBool      colflag,allcolumns=PETSC_FALSE;

3610:   PetscObjectGetComm((PetscObject)mat,&comm);
3611:   MPI_Comm_rank(comm,&rank);
3612:   MPI_Comm_size(comm,&size);

3614:   /* Check for special case: each processor gets entire matrix columns */
3615:   ISIdentity(iscol,&colflag);
3616:   ISGetLocalSize(iscol,&n);
3617:   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3618:   MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));

3620:   if (call ==  MAT_REUSE_MATRIX) {
3621:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);
3623:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);
3624:   } else {
3625:     MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);
3626:   }

3628:   /*
3629:       m - number of local rows
3630:       n - number of columns (same on all processors)
3631:       rstart - first row in new global matrix generated
3632:   */
3633:   MatGetSize(Mreuse,&m,&n);
3634:   MatGetBlockSizes(Mreuse,&bs,&cbs);
3635:   if (call == MAT_INITIAL_MATRIX) {
3636:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3637:     ii  = aij->i;
3638:     jj  = aij->j;

3640:     /*
3641:         Determine the number of non-zeros in the diagonal and off-diagonal
3642:         portions of the matrix in order to do correct preallocation
3643:     */

3645:     /* first get start and end of "diagonal" columns */
3646:     if (csize == PETSC_DECIDE) {
3647:       ISGetSize(isrow,&mglobal);
3648:       if (mglobal == n) { /* square matrix */
3649:         nlocal = m;
3650:       } else {
3651:         nlocal = n/size + ((n % size) > rank);
3652:       }
3653:     } else {
3654:       nlocal = csize;
3655:     }
3656:     MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
3657:     rstart = rend - nlocal;

3660:     /* next, compute all the lengths */
3661:     PetscMalloc1(2*m+1,&dlens);
3662:     olens = dlens + m;
3663:     for (i=0; i<m; i++) {
3664:       jend = ii[i+1] - ii[i];
3665:       olen = 0;
3666:       dlen = 0;
3667:       for (j=0; j<jend; j++) {
3668:         if (*jj < rstart || *jj >= rend) olen++;
3669:         else dlen++;
3670:         jj++;
3671:       }
3672:       olens[i] = olen;
3673:       dlens[i] = dlen;
3674:     }
3675:     MatCreate(comm,&M);
3676:     MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
3677:     MatSetBlockSizes(M,bs,cbs);
3678:     MatSetType(M,((PetscObject)mat)->type_name);
3679:     MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
3680:     PetscFree(dlens);
3681:   } else {
3682:     PetscInt ml,nl;

3684:     M    = *newmat;
3685:     MatGetLocalSize(M,&ml,&nl);
3687:     MatZeroEntries(M);
3688:     /*
3689:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3690:        rather than the slower MatSetValues().
3691:     */
3692:     M->was_assembled = PETSC_TRUE;
3693:     M->assembled     = PETSC_FALSE;
3694:   }
3695:   MatGetOwnershipRange(M,&rstart,&rend);
3696:   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3697:   ii   = aij->i;
3698:   jj   = aij->j;

3700:   /* trigger copy to CPU if needed */
3701:   MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);
3702:   for (i=0; i<m; i++) {
3703:     row   = rstart + i;
3704:     nz    = ii[i+1] - ii[i];
3705:     cwork = jj; jj += nz;
3706:     vwork = aa; aa += nz;
3707:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
3708:   }
3709:   MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);

3711:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
3712:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
3713:   *newmat = M;

3715:   /* save submatrix used in processor for next request */
3716:   if (call ==  MAT_INITIAL_MATRIX) {
3717:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
3718:     MatDestroy(&Mreuse);
3719:   }
3720:   return 0;
3721: }

3723: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3724: {
3725:   PetscInt       m,cstart, cend,j,nnz,i,d;
3726:   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3727:   const PetscInt *JJ;
3728:   PetscBool      nooffprocentries;


3732:   PetscLayoutSetUp(B->rmap);
3733:   PetscLayoutSetUp(B->cmap);
3734:   m      = B->rmap->n;
3735:   cstart = B->cmap->rstart;
3736:   cend   = B->cmap->rend;
3737:   rstart = B->rmap->rstart;

3739:   PetscCalloc2(m,&d_nnz,m,&o_nnz);

3741:   if (PetscDefined(USE_DEBUG)) {
3742:     for (i=0; i<m; i++) {
3743:       nnz = Ii[i+1]- Ii[i];
3744:       JJ  = J + Ii[i];
3748:     }
3749:   }

3751:   for (i=0; i<m; i++) {
3752:     nnz     = Ii[i+1]- Ii[i];
3753:     JJ      = J + Ii[i];
3754:     nnz_max = PetscMax(nnz_max,nnz);
3755:     d       = 0;
3756:     for (j=0; j<nnz; j++) {
3757:       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3758:     }
3759:     d_nnz[i] = d;
3760:     o_nnz[i] = nnz - d;
3761:   }
3762:   MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
3763:   PetscFree2(d_nnz,o_nnz);

3765:   for (i=0; i<m; i++) {
3766:     ii   = i + rstart;
3767:     MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);
3768:   }
3769:   nooffprocentries    = B->nooffprocentries;
3770:   B->nooffprocentries = PETSC_TRUE;
3771:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3772:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3773:   B->nooffprocentries = nooffprocentries;

3775:   MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3776:   return 0;
3777: }

3779: /*@
3780:    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3781:    (the default parallel PETSc format).

3783:    Collective

3785:    Input Parameters:
3786: +  B - the matrix
3787: .  i - the indices into j for the start of each local row (starts with zero)
3788: .  j - the column indices for each local row (starts with zero)
3789: -  v - optional values in the matrix

3791:    Level: developer

3793:    Notes:
3794:        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3795:      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3796:      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.

3798:        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.

3800:        The format which is used for the sparse matrix input, is equivalent to a
3801:     row-major ordering.. i.e for the following matrix, the input data expected is
3802:     as shown

3804: $        1 0 0
3805: $        2 0 3     P0
3806: $       -------
3807: $        4 5 6     P1
3808: $
3809: $     Process0 [P0]: rows_owned=[0,1]
3810: $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3811: $        j =  {0,0,2}  [size = 3]
3812: $        v =  {1,2,3}  [size = 3]
3813: $
3814: $     Process1 [P1]: rows_owned=[2]
3815: $        i =  {0,3}    [size = nrow+1  = 1+1]
3816: $        j =  {0,1,2}  [size = 3]
3817: $        v =  {4,5,6}  [size = 3]

3819: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3820:           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3821: @*/
3822: PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3823: {
3824:   PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));
3825:   return 0;
3826: }

3828: /*@C
3829:    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3830:    (the default parallel PETSc format).  For good matrix assembly performance
3831:    the user should preallocate the matrix storage by setting the parameters
3832:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3833:    performance can be increased by more than a factor of 50.

3835:    Collective

3837:    Input Parameters:
3838: +  B - the matrix
3839: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3840:            (same value is used for all local rows)
3841: .  d_nnz - array containing the number of nonzeros in the various rows of the
3842:            DIAGONAL portion of the local submatrix (possibly different for each row)
3843:            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3844:            The size of this array is equal to the number of local rows, i.e 'm'.
3845:            For matrices that will be factored, you must leave room for (and set)
3846:            the diagonal entry even if it is zero.
3847: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3848:            submatrix (same value is used for all local rows).
3849: -  o_nnz - array containing the number of nonzeros in the various rows of the
3850:            OFF-DIAGONAL portion of the local submatrix (possibly different for
3851:            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3852:            structure. The size of this array is equal to the number
3853:            of local rows, i.e 'm'.

3855:    If the *_nnz parameter is given then the *_nz parameter is ignored

3857:    The AIJ format (also called the Yale sparse matrix format or
3858:    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3859:    storage.  The stored row and column indices begin with zero.
3860:    See Users-Manual: ch_mat for details.

3862:    The parallel matrix is partitioned such that the first m0 rows belong to
3863:    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3864:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

3866:    The DIAGONAL portion of the local submatrix of a processor can be defined
3867:    as the submatrix which is obtained by extraction the part corresponding to
3868:    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3869:    first row that belongs to the processor, r2 is the last row belonging to
3870:    the this processor, and c1-c2 is range of indices of the local part of a
3871:    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3872:    common case of a square matrix, the row and column ranges are the same and
3873:    the DIAGONAL part is also square. The remaining portion of the local
3874:    submatrix (mxN) constitute the OFF-DIAGONAL portion.

3876:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

3878:    You can call MatGetInfo() to get information on how effective the preallocation was;
3879:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3880:    You can also run with the option -info and look for messages with the string
3881:    malloc in them to see if additional memory allocation was needed.

3883:    Example usage:

3885:    Consider the following 8x8 matrix with 34 non-zero values, that is
3886:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3887:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3888:    as follows:

3890: .vb
3891:             1  2  0  |  0  3  0  |  0  4
3892:     Proc0   0  5  6  |  7  0  0  |  8  0
3893:             9  0 10  | 11  0  0  | 12  0
3894:     -------------------------------------
3895:            13  0 14  | 15 16 17  |  0  0
3896:     Proc1   0 18  0  | 19 20 21  |  0  0
3897:             0  0  0  | 22 23  0  | 24  0
3898:     -------------------------------------
3899:     Proc2  25 26 27  |  0  0 28  | 29  0
3900:            30  0  0  | 31 32 33  |  0 34
3901: .ve

3903:    This can be represented as a collection of submatrices as:

3905: .vb
3906:       A B C
3907:       D E F
3908:       G H I
3909: .ve

3911:    Where the submatrices A,B,C are owned by proc0, D,E,F are
3912:    owned by proc1, G,H,I are owned by proc2.

3914:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3915:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3916:    The 'M','N' parameters are 8,8, and have the same values on all procs.

3918:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3919:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3920:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3921:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3922:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3923:    matrix, ans [DF] as another SeqAIJ matrix.

3925:    When d_nz, o_nz parameters are specified, d_nz storage elements are
3926:    allocated for every row of the local diagonal submatrix, and o_nz
3927:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3928:    One way to choose d_nz and o_nz is to use the max nonzerors per local
3929:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3930:    In this case, the values of d_nz,o_nz are:
3931: .vb
3932:      proc0 : dnz = 2, o_nz = 2
3933:      proc1 : dnz = 3, o_nz = 2
3934:      proc2 : dnz = 1, o_nz = 4
3935: .ve
3936:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3937:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3938:    for proc3. i.e we are using 12+15+10=37 storage locations to store
3939:    34 values.

3941:    When d_nnz, o_nnz parameters are specified, the storage is specified
3942:    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3943:    In the above case the values for d_nnz,o_nnz are:
3944: .vb
3945:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3946:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3947:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3948: .ve
3949:    Here the space allocated is sum of all the above values i.e 34, and
3950:    hence pre-allocation is perfect.

3952:    Level: intermediate

3954: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3955:           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3956: @*/
3957: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3958: {
3961:   PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));
3962:   return 0;
3963: }

3965: /*@
3966:      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3967:          CSR format for the local rows.

3969:    Collective

3971:    Input Parameters:
3972: +  comm - MPI communicator
3973: .  m - number of local rows (Cannot be PETSC_DECIDE)
3974: .  n - This value should be the same as the local size used in creating the
3975:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3976:        calculated if N is given) For square matrices n is almost always m.
3977: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3978: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3979: .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
3980: .   j - column indices
3981: -   a - matrix values

3983:    Output Parameter:
3984: .   mat - the matrix

3986:    Level: intermediate

3988:    Notes:
3989:        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3990:      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3991:      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.

3993:        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.

3995:        The format which is used for the sparse matrix input, is equivalent to a
3996:     row-major ordering.. i.e for the following matrix, the input data expected is
3997:     as shown

3999:        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays

4001: $        1 0 0
4002: $        2 0 3     P0
4003: $       -------
4004: $        4 5 6     P1
4005: $
4006: $     Process0 [P0]: rows_owned=[0,1]
4007: $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4008: $        j =  {0,0,2}  [size = 3]
4009: $        v =  {1,2,3}  [size = 3]
4010: $
4011: $     Process1 [P1]: rows_owned=[2]
4012: $        i =  {0,3}    [size = nrow+1  = 1+1]
4013: $        j =  {0,1,2}  [size = 3]
4014: $        v =  {4,5,6}  [size = 3]

4016: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4017:           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4018: @*/
4019: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4020: {
4023:   MatCreate(comm,mat);
4024:   MatSetSizes(*mat,m,n,M,N);
4025:   /* MatSetBlockSizes(M,bs,cbs); */
4026:   MatSetType(*mat,MATMPIAIJ);
4027:   MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
4028:   return 0;
4029: }

4031: /*@
4032:      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4033:          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical

4035:    Collective

4037:    Input Parameters:
4038: +  mat - the matrix
4039: .  m - number of local rows (Cannot be PETSC_DECIDE)
4040: .  n - This value should be the same as the local size used in creating the
4041:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4042:        calculated if N is given) For square matrices n is almost always m.
4043: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4044: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4045: .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4046: .  J - column indices
4047: -  v - matrix values

4049:    Level: intermediate

4051: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4052:           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4053: @*/
4054: PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4055: {
4056:   PetscInt       cstart,nnz,i,j;
4057:   PetscInt       *ld;
4058:   PetscBool      nooffprocentries;
4059:   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4060:   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data;
4061:   PetscScalar    *ad,*ao;
4062:   const PetscInt *Adi = Ad->i;
4063:   PetscInt       ldi,Iii,md;


4070:   MatSeqAIJGetArrayWrite(Aij->A,&ad);
4071:   MatSeqAIJGetArrayWrite(Aij->B,&ao);
4072:   cstart = mat->cmap->rstart;
4073:   if (!Aij->ld) {
4074:     /* count number of entries below block diagonal */
4075:     PetscCalloc1(m,&ld);
4076:     Aij->ld = ld;
4077:     for (i=0; i<m; i++) {
4078:       nnz  = Ii[i+1]- Ii[i];
4079:       j     = 0;
4080:       while  (J[j] < cstart && j < nnz) {j++;}
4081:       J    += nnz;
4082:       ld[i] = j;
4083:     }
4084:   } else {
4085:     ld = Aij->ld;
4086:   }

4088:   for (i=0; i<m; i++) {
4089:     nnz  = Ii[i+1]- Ii[i];
4090:     Iii  = Ii[i];
4091:     ldi  = ld[i];
4092:     md   = Adi[i+1]-Adi[i];
4093:     PetscArraycpy(ao,v + Iii,ldi);
4094:     PetscArraycpy(ad,v + Iii + ldi,md);
4095:     PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);
4096:     ad  += md;
4097:     ao  += nnz - md;
4098:   }
4099:   nooffprocentries      = mat->nooffprocentries;
4100:   mat->nooffprocentries = PETSC_TRUE;
4101:   MatSeqAIJRestoreArrayWrite(Aij->A,&ad);
4102:   MatSeqAIJRestoreArrayWrite(Aij->B,&ao);
4103:   PetscObjectStateIncrease((PetscObject)Aij->A);
4104:   PetscObjectStateIncrease((PetscObject)Aij->B);
4105:   PetscObjectStateIncrease((PetscObject)mat);
4106:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
4107:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
4108:   mat->nooffprocentries = nooffprocentries;
4109:   return 0;
4110: }

4112: /*@C
4113:    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4114:    (the default parallel PETSc format).  For good matrix assembly performance
4115:    the user should preallocate the matrix storage by setting the parameters
4116:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4117:    performance can be increased by more than a factor of 50.

4119:    Collective

4121:    Input Parameters:
4122: +  comm - MPI communicator
4123: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4124:            This value should be the same as the local size used in creating the
4125:            y vector for the matrix-vector product y = Ax.
4126: .  n - This value should be the same as the local size used in creating the
4127:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4128:        calculated if N is given) For square matrices n is almost always m.
4129: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4130: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4131: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4132:            (same value is used for all local rows)
4133: .  d_nnz - array containing the number of nonzeros in the various rows of the
4134:            DIAGONAL portion of the local submatrix (possibly different for each row)
4135:            or NULL, if d_nz is used to specify the nonzero structure.
4136:            The size of this array is equal to the number of local rows, i.e 'm'.
4137: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4138:            submatrix (same value is used for all local rows).
4139: -  o_nnz - array containing the number of nonzeros in the various rows of the
4140:            OFF-DIAGONAL portion of the local submatrix (possibly different for
4141:            each row) or NULL, if o_nz is used to specify the nonzero
4142:            structure. The size of this array is equal to the number
4143:            of local rows, i.e 'm'.

4145:    Output Parameter:
4146: .  A - the matrix

4148:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4149:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4150:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

4152:    Notes:
4153:    If the *_nnz parameter is given then the *_nz parameter is ignored

4155:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4156:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4157:    storage requirements for this matrix.

4159:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4160:    processor than it must be used on all processors that share the object for
4161:    that argument.

4163:    The user MUST specify either the local or global matrix dimensions
4164:    (possibly both).

4166:    The parallel matrix is partitioned across processors such that the
4167:    first m0 rows belong to process 0, the next m1 rows belong to
4168:    process 1, the next m2 rows belong to process 2 etc.. where
4169:    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4170:    values corresponding to [m x N] submatrix.

4172:    The columns are logically partitioned with the n0 columns belonging
4173:    to 0th partition, the next n1 columns belonging to the next
4174:    partition etc.. where n0,n1,n2... are the input parameter 'n'.

4176:    The DIAGONAL portion of the local submatrix on any given processor
4177:    is the submatrix corresponding to the rows and columns m,n
4178:    corresponding to the given processor. i.e diagonal matrix on
4179:    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4180:    etc. The remaining portion of the local submatrix [m x (N-n)]
4181:    constitute the OFF-DIAGONAL portion. The example below better
4182:    illustrates this concept.

4184:    For a square global matrix we define each processor's diagonal portion
4185:    to be its local rows and the corresponding columns (a square submatrix);
4186:    each processor's off-diagonal portion encompasses the remainder of the
4187:    local matrix (a rectangular submatrix).

4189:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

4191:    When calling this routine with a single process communicator, a matrix of
4192:    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4193:    type of communicator, use the construction mechanism
4194: .vb
4195:      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4196: .ve

4198: $     MatCreate(...,&A);
4199: $     MatSetType(A,MATMPIAIJ);
4200: $     MatSetSizes(A, m,n,M,N);
4201: $     MatMPIAIJSetPreallocation(A,...);

4203:    By default, this format uses inodes (identical nodes) when possible.
4204:    We search for consecutive rows with the same nonzero structure, thereby
4205:    reusing matrix information to achieve increased efficiency.

4207:    Options Database Keys:
4208: +  -mat_no_inode  - Do not use inodes
4209: .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4210: -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4211:         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4212:         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.

4214:    Example usage:

4216:    Consider the following 8x8 matrix with 34 non-zero values, that is
4217:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4218:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4219:    as follows

4221: .vb
4222:             1  2  0  |  0  3  0  |  0  4
4223:     Proc0   0  5  6  |  7  0  0  |  8  0
4224:             9  0 10  | 11  0  0  | 12  0
4225:     -------------------------------------
4226:            13  0 14  | 15 16 17  |  0  0
4227:     Proc1   0 18  0  | 19 20 21  |  0  0
4228:             0  0  0  | 22 23  0  | 24  0
4229:     -------------------------------------
4230:     Proc2  25 26 27  |  0  0 28  | 29  0
4231:            30  0  0  | 31 32 33  |  0 34
4232: .ve

4234:    This can be represented as a collection of submatrices as

4236: .vb
4237:       A B C
4238:       D E F
4239:       G H I
4240: .ve

4242:    Where the submatrices A,B,C are owned by proc0, D,E,F are
4243:    owned by proc1, G,H,I are owned by proc2.

4245:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4246:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4247:    The 'M','N' parameters are 8,8, and have the same values on all procs.

4249:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4250:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4251:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4252:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4253:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4254:    matrix, ans [DF] as another SeqAIJ matrix.

4256:    When d_nz, o_nz parameters are specified, d_nz storage elements are
4257:    allocated for every row of the local diagonal submatrix, and o_nz
4258:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4259:    One way to choose d_nz and o_nz is to use the max nonzerors per local
4260:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4261:    In this case, the values of d_nz,o_nz are
4262: .vb
4263:      proc0 : dnz = 2, o_nz = 2
4264:      proc1 : dnz = 3, o_nz = 2
4265:      proc2 : dnz = 1, o_nz = 4
4266: .ve
4267:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4268:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4269:    for proc3. i.e we are using 12+15+10=37 storage locations to store
4270:    34 values.

4272:    When d_nnz, o_nnz parameters are specified, the storage is specified
4273:    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4274:    In the above case the values for d_nnz,o_nnz are
4275: .vb
4276:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4277:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4278:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4279: .ve
4280:    Here the space allocated is sum of all the above values i.e 34, and
4281:    hence pre-allocation is perfect.

4283:    Level: intermediate

4285: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4286:           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4287: @*/
4288: PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4289: {
4290:   PetscMPIInt    size;

4292:   MatCreate(comm,A);
4293:   MatSetSizes(*A,m,n,M,N);
4294:   MPI_Comm_size(comm,&size);
4295:   if (size > 1) {
4296:     MatSetType(*A,MATMPIAIJ);
4297:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
4298:   } else {
4299:     MatSetType(*A,MATSEQAIJ);
4300:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
4301:   }
4302:   return 0;
4303: }

4305: /*@C
4306:   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix

4308:   Not collective

4310:   Input Parameter:
4311: . A - The MPIAIJ matrix

4313:   Output Parameters:
4314: + Ad - The local diagonal block as a SeqAIJ matrix
4315: . Ao - The local off-diagonal block as a SeqAIJ matrix
4316: - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix

4318:   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4319:   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4320:   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4321:   local column numbers to global column numbers in the original matrix.

4323:   Level: intermediate

4325: .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4326: @*/
4327: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4328: {
4329:   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4330:   PetscBool      flg;

4332:   PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);
4334:   if (Ad)     *Ad     = a->A;
4335:   if (Ao)     *Ao     = a->B;
4336:   if (colmap) *colmap = a->garray;
4337:   return 0;
4338: }

4340: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4341: {
4343:   PetscInt       m,N,i,rstart,nnz,Ii;
4344:   PetscInt       *indx;
4345:   PetscScalar    *values;
4346:   MatType        rootType;

4348:   MatGetSize(inmat,&m,&N);
4349:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4350:     PetscInt       *dnz,*onz,sum,bs,cbs;

4352:     if (n == PETSC_DECIDE) {
4353:       PetscSplitOwnership(comm,&n,&N);
4354:     }
4355:     /* Check sum(n) = N */
4356:     MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);

4359:     MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
4360:     rstart -= m;

4362:     MatPreallocateInitialize(comm,m,n,dnz,onz);
4363:     for (i=0; i<m; i++) {
4364:       MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4365:       MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
4366:       MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);
4367:     }

4369:     MatCreate(comm,outmat);
4370:     MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4371:     MatGetBlockSizes(inmat,&bs,&cbs);
4372:     MatSetBlockSizes(*outmat,bs,cbs);
4373:     MatGetRootType_Private(inmat,&rootType);
4374:     MatSetType(*outmat,rootType);
4375:     MatSeqAIJSetPreallocation(*outmat,0,dnz);
4376:     MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
4377:     MatPreallocateFinalize(dnz,onz);
4378:     MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
4379:   }

4381:   /* numeric phase */
4382:   MatGetOwnershipRange(*outmat,&rstart,NULL);
4383:   for (i=0; i<m; i++) {
4384:     MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4385:     Ii   = i + rstart;
4386:     MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
4387:     MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
4388:   }
4389:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
4390:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
4391:   return 0;
4392: }

4394: PetscErrorCode MatFileSplit(Mat A,char *outfile)
4395: {
4396:   PetscMPIInt       rank;
4397:   PetscInt          m,N,i,rstart,nnz;
4398:   size_t            len;
4399:   const PetscInt    *indx;
4400:   PetscViewer       out;
4401:   char              *name;
4402:   Mat               B;
4403:   const PetscScalar *values;

4405:   MatGetLocalSize(A,&m,NULL);
4406:   MatGetSize(A,NULL,&N);
4407:   /* Should this be the type of the diagonal block of A? */
4408:   MatCreate(PETSC_COMM_SELF,&B);
4409:   MatSetSizes(B,m,N,m,N);
4410:   MatSetBlockSizesFromMats(B,A,A);
4411:   MatSetType(B,MATSEQAIJ);
4412:   MatSeqAIJSetPreallocation(B,0,NULL);
4413:   MatGetOwnershipRange(A,&rstart,NULL);
4414:   for (i=0; i<m; i++) {
4415:     MatGetRow(A,i+rstart,&nnz,&indx,&values);
4416:     MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
4417:     MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
4418:   }
4419:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
4420:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

4422:   MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
4423:   PetscStrlen(outfile,&len);
4424:   PetscMalloc1(len+6,&name);
4425:   PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);
4426:   PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
4427:   PetscFree(name);
4428:   MatView(B,out);
4429:   PetscViewerDestroy(&out);
4430:   MatDestroy(&B);
4431:   return 0;
4432: }

4434: static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4435: {
4436:   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;

4438:   if (!merge) return 0;
4439:   PetscFree(merge->id_r);
4440:   PetscFree(merge->len_s);
4441:   PetscFree(merge->len_r);
4442:   PetscFree(merge->bi);
4443:   PetscFree(merge->bj);
4444:   PetscFree(merge->buf_ri[0]);
4445:   PetscFree(merge->buf_ri);
4446:   PetscFree(merge->buf_rj[0]);
4447:   PetscFree(merge->buf_rj);
4448:   PetscFree(merge->coi);
4449:   PetscFree(merge->coj);
4450:   PetscFree(merge->owners_co);
4451:   PetscLayoutDestroy(&merge->rowmap);
4452:   PetscFree(merge);
4453:   return 0;
4454: }

4456: #include <../src/mat/utils/freespace.h>
4457: #include <petscbt.h>

4459: PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4460: {
4461:   MPI_Comm            comm;
4462:   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4463:   PetscMPIInt         size,rank,taga,*len_s;
4464:   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4465:   PetscInt            proc,m;
4466:   PetscInt            **buf_ri,**buf_rj;
4467:   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4468:   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4469:   MPI_Request         *s_waits,*r_waits;
4470:   MPI_Status          *status;
4471:   const MatScalar     *aa,*a_a;
4472:   MatScalar           **abuf_r,*ba_i;
4473:   Mat_Merge_SeqsToMPI *merge;
4474:   PetscContainer      container;

4476:   PetscObjectGetComm((PetscObject)mpimat,&comm);
4477:   PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);

4479:   MPI_Comm_size(comm,&size);
4480:   MPI_Comm_rank(comm,&rank);

4482:   PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);
4484:   PetscContainerGetPointer(container,(void**)&merge);
4485:   MatSeqAIJGetArrayRead(seqmat,&a_a);
4486:   aa   = a_a;

4488:   bi     = merge->bi;
4489:   bj     = merge->bj;
4490:   buf_ri = merge->buf_ri;
4491:   buf_rj = merge->buf_rj;

4493:   PetscMalloc1(size,&status);
4494:   owners = merge->rowmap->range;
4495:   len_s  = merge->len_s;

4497:   /* send and recv matrix values */
4498:   /*-----------------------------*/
4499:   PetscObjectGetNewTag((PetscObject)mpimat,&taga);
4500:   PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);

4502:   PetscMalloc1(merge->nsend+1,&s_waits);
4503:   for (proc=0,k=0; proc<size; proc++) {
4504:     if (!len_s[proc]) continue;
4505:     i    = owners[proc];
4506:     MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
4507:     k++;
4508:   }

4510:   if (merge->nrecv) MPI_Waitall(merge->nrecv,r_waits,status);
4511:   if (merge->nsend) MPI_Waitall(merge->nsend,s_waits,status);
4512:   PetscFree(status);

4514:   PetscFree(s_waits);
4515:   PetscFree(r_waits);

4517:   /* insert mat values of mpimat */
4518:   /*----------------------------*/
4519:   PetscMalloc1(N,&ba_i);
4520:   PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);

4522:   for (k=0; k<merge->nrecv; k++) {
4523:     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4524:     nrows       = *(buf_ri_k[k]);
4525:     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4526:     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4527:   }

4529:   /* set values of ba */
4530:   m    = merge->rowmap->n;
4531:   for (i=0; i<m; i++) {
4532:     arow = owners[rank] + i;
4533:     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4534:     bnzi = bi[i+1] - bi[i];
4535:     PetscArrayzero(ba_i,bnzi);

4537:     /* add local non-zero vals of this proc's seqmat into ba */
4538:     anzi   = ai[arow+1] - ai[arow];
4539:     aj     = a->j + ai[arow];
4540:     aa     = a_a + ai[arow];
4541:     nextaj = 0;
4542:     for (j=0; nextaj<anzi; j++) {
4543:       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4544:         ba_i[j] += aa[nextaj++];
4545:       }
4546:     }

4548:     /* add received vals into ba */
4549:     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4550:       /* i-th row */
4551:       if (i == *nextrow[k]) {
4552:         anzi   = *(nextai[k]+1) - *nextai[k];
4553:         aj     = buf_rj[k] + *(nextai[k]);
4554:         aa     = abuf_r[k] + *(nextai[k]);
4555:         nextaj = 0;
4556:         for (j=0; nextaj<anzi; j++) {
4557:           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4558:             ba_i[j] += aa[nextaj++];
4559:           }
4560:         }
4561:         nextrow[k]++; nextai[k]++;
4562:       }
4563:     }
4564:     MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
4565:   }
4566:   MatSeqAIJRestoreArrayRead(seqmat,&a_a);
4567:   MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
4568:   MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);

4570:   PetscFree(abuf_r[0]);
4571:   PetscFree(abuf_r);
4572:   PetscFree(ba_i);
4573:   PetscFree3(buf_ri_k,nextrow,nextai);
4574:   PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);
4575:   return 0;
4576: }

4578: PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4579: {
4580:   PetscErrorCode      ierr;
4581:   Mat                 B_mpi;
4582:   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4583:   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4584:   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4585:   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4586:   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4587:   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4588:   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4589:   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4590:   MPI_Status          *status;
4591:   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4592:   PetscBT             lnkbt;
4593:   Mat_Merge_SeqsToMPI *merge;
4594:   PetscContainer      container;

4596:   PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);

4598:   /* make sure it is a PETSc comm */
4599:   PetscCommDuplicate(comm,&comm,NULL);
4600:   MPI_Comm_size(comm,&size);
4601:   MPI_Comm_rank(comm,&rank);

4603:   PetscNew(&merge);
4604:   PetscMalloc1(size,&status);

4606:   /* determine row ownership */
4607:   /*---------------------------------------------------------*/
4608:   PetscLayoutCreate(comm,&merge->rowmap);
4609:   PetscLayoutSetLocalSize(merge->rowmap,m);
4610:   PetscLayoutSetSize(merge->rowmap,M);
4611:   PetscLayoutSetBlockSize(merge->rowmap,1);
4612:   PetscLayoutSetUp(merge->rowmap);
4613:   PetscMalloc1(size,&len_si);
4614:   PetscMalloc1(size,&merge->len_s);

4616:   m      = merge->rowmap->n;
4617:   owners = merge->rowmap->range;

4619:   /* determine the number of messages to send, their lengths */
4620:   /*---------------------------------------------------------*/
4621:   len_s = merge->len_s;

4623:   len          = 0; /* length of buf_si[] */
4624:   merge->nsend = 0;
4625:   for (proc=0; proc<size; proc++) {
4626:     len_si[proc] = 0;
4627:     if (proc == rank) {
4628:       len_s[proc] = 0;
4629:     } else {
4630:       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4631:       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4632:     }
4633:     if (len_s[proc]) {
4634:       merge->nsend++;
4635:       nrows = 0;
4636:       for (i=owners[proc]; i<owners[proc+1]; i++) {
4637:         if (ai[i+1] > ai[i]) nrows++;
4638:       }
4639:       len_si[proc] = 2*(nrows+1);
4640:       len         += len_si[proc];
4641:     }
4642:   }

4644:   /* determine the number and length of messages to receive for ij-structure */
4645:   /*-------------------------------------------------------------------------*/
4646:   PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);
4647:   PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);

4649:   /* post the Irecv of j-structure */
4650:   /*-------------------------------*/
4651:   PetscCommGetNewTag(comm,&tagj);
4652:   PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);

4654:   /* post the Isend of j-structure */
4655:   /*--------------------------------*/
4656:   PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);

4658:   for (proc=0, k=0; proc<size; proc++) {
4659:     if (!len_s[proc]) continue;
4660:     i    = owners[proc];
4661:     MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
4662:     k++;
4663:   }

4665:   /* receives and sends of j-structure are complete */
4666:   /*------------------------------------------------*/
4667:   if (merge->nrecv) MPI_Waitall(merge->nrecv,rj_waits,status);
4668:   if (merge->nsend) MPI_Waitall(merge->nsend,sj_waits,status);

4670:   /* send and recv i-structure */
4671:   /*---------------------------*/
4672:   PetscCommGetNewTag(comm,&tagi);
4673:   PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);

4675:   PetscMalloc1(len+1,&buf_s);
4676:   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4677:   for (proc=0,k=0; proc<size; proc++) {
4678:     if (!len_s[proc]) continue;
4679:     /* form outgoing message for i-structure:
4680:          buf_si[0]:                 nrows to be sent
4681:                [1:nrows]:           row index (global)
4682:                [nrows+1:2*nrows+1]: i-structure index
4683:     */
4684:     /*-------------------------------------------*/
4685:     nrows       = len_si[proc]/2 - 1;
4686:     buf_si_i    = buf_si + nrows+1;
4687:     buf_si[0]   = nrows;
4688:     buf_si_i[0] = 0;
4689:     nrows       = 0;
4690:     for (i=owners[proc]; i<owners[proc+1]; i++) {
4691:       anzi = ai[i+1] - ai[i];
4692:       if (anzi) {
4693:         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4694:         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4695:         nrows++;
4696:       }
4697:     }
4698:     MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
4699:     k++;
4700:     buf_si += len_si[proc];
4701:   }

4703:   if (merge->nrecv) MPI_Waitall(merge->nrecv,ri_waits,status);
4704:   if (merge->nsend) MPI_Waitall(merge->nsend,si_waits,status);

4706:   PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);
4707:   for (i=0; i<merge->nrecv; i++) {
4708:     PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
4709:   }

4711:   PetscFree(len_si);
4712:   PetscFree(len_ri);
4713:   PetscFree(rj_waits);
4714:   PetscFree2(si_waits,sj_waits);
4715:   PetscFree(ri_waits);
4716:   PetscFree(buf_s);
4717:   PetscFree(status);

4719:   /* compute a local seq matrix in each processor */
4720:   /*----------------------------------------------*/
4721:   /* allocate bi array and free space for accumulating nonzero column info */
4722:   PetscMalloc1(m+1,&bi);
4723:   bi[0] = 0;

4725:   /* create and initialize a linked list */
4726:   nlnk = N+1;
4727:   PetscLLCreate(N,N,nlnk,lnk,lnkbt);

4729:   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4730:   len  = ai[owners[rank+1]] - ai[owners[rank]];
4731:   PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);

4733:   current_space = free_space;

4735:   /* determine symbolic info for each local row */
4736:   PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);

4738:   for (k=0; k<merge->nrecv; k++) {
4739:     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4740:     nrows       = *buf_ri_k[k];
4741:     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4742:     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4743:   }

4745:   MatPreallocateInitialize(comm,m,n,dnz,onz);
4746:   len  = 0;
4747:   for (i=0; i<m; i++) {
4748:     bnzi = 0;
4749:     /* add local non-zero cols of this proc's seqmat into lnk */
4750:     arow  = owners[rank] + i;
4751:     anzi  = ai[arow+1] - ai[arow];
4752:     aj    = a->j + ai[arow];
4753:     PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt);
4754:     bnzi += nlnk;
4755:     /* add received col data into lnk */
4756:     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4757:       if (i == *nextrow[k]) { /* i-th row */
4758:         anzi  = *(nextai[k]+1) - *nextai[k];
4759:         aj    = buf_rj[k] + *nextai[k];
4760:         PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt);
4761:         bnzi += nlnk;
4762:         nextrow[k]++; nextai[k]++;
4763:       }
4764:     }
4765:     if (len < bnzi) len = bnzi;  /* =max(bnzi) */

4767:     /* if free space is not available, make more free space */
4768:     if (current_space->local_remaining<bnzi) {
4769:       PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);
4770:       nspacedouble++;
4771:     }
4772:     /* copy data into free space, then initialize lnk */
4773:     PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
4774:     MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);

4776:     current_space->array           += bnzi;
4777:     current_space->local_used      += bnzi;
4778:     current_space->local_remaining -= bnzi;

4780:     bi[i+1] = bi[i] + bnzi;
4781:   }

4783:   PetscFree3(buf_ri_k,nextrow,nextai);

4785:   PetscMalloc1(bi[m]+1,&bj);
4786:   PetscFreeSpaceContiguous(&free_space,bj);
4787:   PetscLLDestroy(lnk,lnkbt);

4789:   /* create symbolic parallel matrix B_mpi */
4790:   /*---------------------------------------*/
4791:   MatGetBlockSizes(seqmat,&bs,&cbs);
4792:   MatCreate(comm,&B_mpi);
4793:   if (n==PETSC_DECIDE) {
4794:     MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
4795:   } else {
4796:     MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
4797:   }
4798:   MatSetBlockSizes(B_mpi,bs,cbs);
4799:   MatSetType(B_mpi,MATMPIAIJ);
4800:   MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
4801:   MatPreallocateFinalize(dnz,onz);
4802:   MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);

4804:   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4805:   B_mpi->assembled  = PETSC_FALSE;
4806:   merge->bi         = bi;
4807:   merge->bj         = bj;
4808:   merge->buf_ri     = buf_ri;
4809:   merge->buf_rj     = buf_rj;
4810:   merge->coi        = NULL;
4811:   merge->coj        = NULL;
4812:   merge->owners_co  = NULL;

4814:   PetscCommDestroy(&comm);

4816:   /* attach the supporting struct to B_mpi for reuse */
4817:   PetscContainerCreate(PETSC_COMM_SELF,&container);
4818:   PetscContainerSetPointer(container,merge);
4819:   PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);
4820:   PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
4821:   PetscContainerDestroy(&container);
4822:   *mpimat = B_mpi;

4824:   PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);
4825:   return 0;
4826: }

4828: /*@C
4829:       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4830:                  matrices from each processor

4832:     Collective

4834:    Input Parameters:
4835: +    comm - the communicators the parallel matrix will live on
4836: .    seqmat - the input sequential matrices
4837: .    m - number of local rows (or PETSC_DECIDE)
4838: .    n - number of local columns (or PETSC_DECIDE)
4839: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

4841:    Output Parameter:
4842: .    mpimat - the parallel matrix generated

4844:     Level: advanced

4846:    Notes:
4847:      The dimensions of the sequential matrix in each processor MUST be the same.
4848:      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4849:      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4850: @*/
4851: PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4852: {
4853:   PetscMPIInt    size;

4855:   MPI_Comm_size(comm,&size);
4856:   if (size == 1) {
4857:     PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4858:     if (scall == MAT_INITIAL_MATRIX) {
4859:       MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);
4860:     } else {
4861:       MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);
4862:     }
4863:     PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4864:     return 0;
4865:   }
4866:   PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);
4867:   if (scall == MAT_INITIAL_MATRIX) {
4868:     MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);
4869:   }
4870:   MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);
4871:   PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);
4872:   return 0;
4873: }

4875: /*@
4876:      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4877:           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4878:           with MatGetSize()

4880:     Not Collective

4882:    Input Parameters:
4883: +    A - the matrix
4884: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

4886:    Output Parameter:
4887: .    A_loc - the local sequential matrix generated

4889:     Level: developer

4891:    Notes:
4892:      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
4893:      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
4894:      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
4895:      modify the values of the returned A_loc.

4897: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
4898: @*/
4899: PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4900: {
4901:   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
4902:   Mat_SeqAIJ        *mat,*a,*b;
4903:   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4904:   const PetscScalar *aa,*ba,*aav,*bav;
4905:   PetscScalar       *ca,*cam;
4906:   PetscMPIInt       size;
4907:   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4908:   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
4909:   PetscBool         match;

4911:   PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);
4913:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
4914:   if (size == 1) {
4915:     if (scall == MAT_INITIAL_MATRIX) {
4916:       PetscObjectReference((PetscObject)mpimat->A);
4917:       *A_loc = mpimat->A;
4918:     } else if (scall == MAT_REUSE_MATRIX) {
4919:       MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);
4920:     }
4921:     return 0;
4922:   }

4924:   PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
4925:   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4926:   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4927:   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4928:   MatSeqAIJGetArrayRead(mpimat->A,&aav);
4929:   MatSeqAIJGetArrayRead(mpimat->B,&bav);
4930:   aa   = aav;
4931:   ba   = bav;
4932:   if (scall == MAT_INITIAL_MATRIX) {
4933:     PetscMalloc1(1+am,&ci);
4934:     ci[0] = 0;
4935:     for (i=0; i<am; i++) {
4936:       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4937:     }
4938:     PetscMalloc1(1+ci[am],&cj);
4939:     PetscMalloc1(1+ci[am],&ca);
4940:     k    = 0;
4941:     for (i=0; i<am; i++) {
4942:       ncols_o = bi[i+1] - bi[i];
4943:       ncols_d = ai[i+1] - ai[i];
4944:       /* off-diagonal portion of A */
4945:       for (jo=0; jo<ncols_o; jo++) {
4946:         col = cmap[*bj];
4947:         if (col >= cstart) break;
4948:         cj[k]   = col; bj++;
4949:         ca[k++] = *ba++;
4950:       }
4951:       /* diagonal portion of A */
4952:       for (j=0; j<ncols_d; j++) {
4953:         cj[k]   = cstart + *aj++;
4954:         ca[k++] = *aa++;
4955:       }
4956:       /* off-diagonal portion of A */
4957:       for (j=jo; j<ncols_o; j++) {
4958:         cj[k]   = cmap[*bj++];
4959:         ca[k++] = *ba++;
4960:       }
4961:     }
4962:     /* put together the new matrix */
4963:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);
4964:     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4965:     /* Since these are PETSc arrays, change flags to free them as necessary. */
4966:     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4967:     mat->free_a  = PETSC_TRUE;
4968:     mat->free_ij = PETSC_TRUE;
4969:     mat->nonew   = 0;
4970:   } else if (scall == MAT_REUSE_MATRIX) {
4971:     mat  =(Mat_SeqAIJ*)(*A_loc)->data;
4972:     ci   = mat->i;
4973:     cj   = mat->j;
4974:     MatSeqAIJGetArrayWrite(*A_loc,&cam);
4975:     for (i=0; i<am; i++) {
4976:       /* off-diagonal portion of A */
4977:       ncols_o = bi[i+1] - bi[i];
4978:       for (jo=0; jo<ncols_o; jo++) {
4979:         col = cmap[*bj];
4980:         if (col >= cstart) break;
4981:         *cam++ = *ba++; bj++;
4982:       }
4983:       /* diagonal portion of A */
4984:       ncols_d = ai[i+1] - ai[i];
4985:       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4986:       /* off-diagonal portion of A */
4987:       for (j=jo; j<ncols_o; j++) {
4988:         *cam++ = *ba++; bj++;
4989:       }
4990:     }
4991:     MatSeqAIJRestoreArrayWrite(*A_loc,&cam);
4992:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4993:   MatSeqAIJRestoreArrayRead(mpimat->A,&aav);
4994:   MatSeqAIJRestoreArrayRead(mpimat->B,&bav);
4995:   PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
4996:   return 0;
4997: }

4999: /*@
5000:      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5001:           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part

5003:     Not Collective

5005:    Input Parameters:
5006: +    A - the matrix
5007: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

5009:    Output Parameters:
5010: +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5011: -    A_loc - the local sequential matrix generated

5013:     Level: developer

5015:    Notes:
5016:      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)

5018: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()

5020: @*/
5021: PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5022: {
5023:   Mat            Ao,Ad;
5024:   const PetscInt *cmap;
5025:   PetscMPIInt    size;
5026:   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);

5028:   MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);
5029:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
5030:   if (size == 1) {
5031:     if (scall == MAT_INITIAL_MATRIX) {
5032:       PetscObjectReference((PetscObject)Ad);
5033:       *A_loc = Ad;
5034:     } else if (scall == MAT_REUSE_MATRIX) {
5035:       MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);
5036:     }
5037:     if (glob) ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);
5038:     return 0;
5039:   }
5040:   PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);
5041:   PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);
5042:   if (f) {
5043:     (*f)(A,scall,glob,A_loc);
5044:   } else {
5045:     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5046:     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5047:     Mat_SeqAIJ        *c;
5048:     PetscInt          *ai = a->i, *aj = a->j;
5049:     PetscInt          *bi = b->i, *bj = b->j;
5050:     PetscInt          *ci,*cj;
5051:     const PetscScalar *aa,*ba;
5052:     PetscScalar       *ca;
5053:     PetscInt          i,j,am,dn,on;

5055:     MatGetLocalSize(Ad,&am,&dn);
5056:     MatGetLocalSize(Ao,NULL,&on);
5057:     MatSeqAIJGetArrayRead(Ad,&aa);
5058:     MatSeqAIJGetArrayRead(Ao,&ba);
5059:     if (scall == MAT_INITIAL_MATRIX) {
5060:       PetscInt k;
5061:       PetscMalloc1(1+am,&ci);
5062:       PetscMalloc1(ai[am]+bi[am],&cj);
5063:       PetscMalloc1(ai[am]+bi[am],&ca);
5064:       ci[0] = 0;
5065:       for (i=0,k=0; i<am; i++) {
5066:         const PetscInt ncols_o = bi[i+1] - bi[i];
5067:         const PetscInt ncols_d = ai[i+1] - ai[i];
5068:         ci[i+1] = ci[i] + ncols_o + ncols_d;
5069:         /* diagonal portion of A */
5070:         for (j=0; j<ncols_d; j++,k++) {
5071:           cj[k] = *aj++;
5072:           ca[k] = *aa++;
5073:         }
5074:         /* off-diagonal portion of A */
5075:         for (j=0; j<ncols_o; j++,k++) {
5076:           cj[k] = dn + *bj++;
5077:           ca[k] = *ba++;
5078:         }
5079:       }
5080:       /* put together the new matrix */
5081:       MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);
5082:       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5083:       /* Since these are PETSc arrays, change flags to free them as necessary. */
5084:       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5085:       c->free_a  = PETSC_TRUE;
5086:       c->free_ij = PETSC_TRUE;
5087:       c->nonew   = 0;
5088:       MatSetType(*A_loc,((PetscObject)Ad)->type_name);
5089:     } else if (scall == MAT_REUSE_MATRIX) {
5090:       MatSeqAIJGetArrayWrite(*A_loc,&ca);
5091:       for (i=0; i<am; i++) {
5092:         const PetscInt ncols_d = ai[i+1] - ai[i];
5093:         const PetscInt ncols_o = bi[i+1] - bi[i];
5094:         /* diagonal portion of A */
5095:         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5096:         /* off-diagonal portion of A */
5097:         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5098:       }
5099:       MatSeqAIJRestoreArrayWrite(*A_loc,&ca);
5100:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5101:     MatSeqAIJRestoreArrayRead(Ad,&aa);
5102:     MatSeqAIJRestoreArrayRead(Ao,&aa);
5103:     if (glob) {
5104:       PetscInt cst, *gidx;

5106:       MatGetOwnershipRangeColumn(A,&cst,NULL);
5107:       PetscMalloc1(dn+on,&gidx);
5108:       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5109:       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5110:       ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);
5111:     }
5112:   }
5113:   PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);
5114:   return 0;
5115: }

5117: /*@C
5118:      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns

5120:     Not Collective

5122:    Input Parameters:
5123: +    A - the matrix
5124: .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125: -    row, col - index sets of rows and columns to extract (or NULL)

5127:    Output Parameter:
5128: .    A_loc - the local sequential matrix generated

5130:     Level: developer

5132: .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()

5134: @*/
5135: PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5136: {
5137:   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5138:   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5139:   IS             isrowa,iscola;
5140:   Mat            *aloc;
5141:   PetscBool      match;

5143:   PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);
5145:   PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);
5146:   if (!row) {
5147:     start = A->rmap->rstart; end = A->rmap->rend;
5148:     ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
5149:   } else {
5150:     isrowa = *row;
5151:   }
5152:   if (!col) {
5153:     start = A->cmap->rstart;
5154:     cmap  = a->garray;
5155:     nzA   = a->A->cmap->n;
5156:     nzB   = a->B->cmap->n;
5157:     PetscMalloc1(nzA+nzB, &idx);
5158:     ncols = 0;
5159:     for (i=0; i<nzB; i++) {
5160:       if (cmap[i] < start) idx[ncols++] = cmap[i];
5161:       else break;
5162:     }
5163:     imark = i;
5164:     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5165:     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5166:     ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);
5167:   } else {
5168:     iscola = *col;
5169:   }
5170:   if (scall != MAT_INITIAL_MATRIX) {
5171:     PetscMalloc1(1,&aloc);
5172:     aloc[0] = *A_loc;
5173:   }
5174:   MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
5175:   if (!col) { /* attach global id of condensed columns */
5176:     PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);
5177:   }
5178:   *A_loc = aloc[0];
5179:   PetscFree(aloc);
5180:   if (!row) {
5181:     ISDestroy(&isrowa);
5182:   }
5183:   if (!col) {
5184:     ISDestroy(&iscola);
5185:   }
5186:   PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);
5187:   return 0;
5188: }

5190: /*
5191:  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5192:  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5193:  * on a global size.
5194:  * */
5195: PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5196: {
5197:   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5198:   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5199:   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5200:   PetscMPIInt              owner;
5201:   PetscSFNode              *iremote,*oiremote;
5202:   const PetscInt           *lrowindices;
5203:   PetscSF                  sf,osf;
5204:   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5205:   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5206:   MPI_Comm                 comm;
5207:   ISLocalToGlobalMapping   mapping;
5208:   const PetscScalar        *pd_a,*po_a;

5210:   PetscObjectGetComm((PetscObject)P,&comm);
5211:   /* plocalsize is the number of roots
5212:    * nrows is the number of leaves
5213:    * */
5214:   MatGetLocalSize(P,&plocalsize,NULL);
5215:   ISGetLocalSize(rows,&nrows);
5216:   PetscCalloc1(nrows,&iremote);
5217:   ISGetIndices(rows,&lrowindices);
5218:   for (i=0;i<nrows;i++) {
5219:     /* Find a remote index and an owner for a row
5220:      * The row could be local or remote
5221:      * */
5222:     owner = 0;
5223:     lidx  = 0;
5224:     PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);
5225:     iremote[i].index = lidx;
5226:     iremote[i].rank  = owner;
5227:   }
5228:   /* Create SF to communicate how many nonzero columns for each row */
5229:   PetscSFCreate(comm,&sf);
5230:   /* SF will figure out the number of nonzero colunms for each row, and their
5231:    * offsets
5232:    * */
5233:   PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
5234:   PetscSFSetFromOptions(sf);
5235:   PetscSFSetUp(sf);

5237:   PetscCalloc1(2*(plocalsize+1),&roffsets);
5238:   PetscCalloc1(2*plocalsize,&nrcols);
5239:   PetscCalloc1(nrows,&pnnz);
5240:   roffsets[0] = 0;
5241:   roffsets[1] = 0;
5242:   for (i=0;i<plocalsize;i++) {
5243:     /* diag */
5244:     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5245:     /* off diag */
5246:     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5247:     /* compute offsets so that we relative location for each row */
5248:     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5249:     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5250:   }
5251:   PetscCalloc1(2*nrows,&nlcols);
5252:   PetscCalloc1(2*nrows,&loffsets);
5253:   /* 'r' means root, and 'l' means leaf */
5254:   PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);
5255:   PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);
5256:   PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);
5257:   PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);
5258:   PetscSFDestroy(&sf);
5259:   PetscFree(roffsets);
5260:   PetscFree(nrcols);
5261:   dntotalcols = 0;
5262:   ontotalcols = 0;
5263:   ncol = 0;
5264:   for (i=0;i<nrows;i++) {
5265:     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5266:     ncol = PetscMax(pnnz[i],ncol);
5267:     /* diag */
5268:     dntotalcols += nlcols[i*2+0];
5269:     /* off diag */
5270:     ontotalcols += nlcols[i*2+1];
5271:   }
5272:   /* We do not need to figure the right number of columns
5273:    * since all the calculations will be done by going through the raw data
5274:    * */
5275:   MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);
5276:   MatSetUp(*P_oth);
5277:   PetscFree(pnnz);
5278:   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5279:   /* diag */
5280:   PetscCalloc1(dntotalcols,&iremote);
5281:   /* off diag */
5282:   PetscCalloc1(ontotalcols,&oiremote);
5283:   /* diag */
5284:   PetscCalloc1(dntotalcols,&ilocal);
5285:   /* off diag */
5286:   PetscCalloc1(ontotalcols,&oilocal);
5287:   dntotalcols = 0;
5288:   ontotalcols = 0;
5289:   ntotalcols  = 0;
5290:   for (i=0;i<nrows;i++) {
5291:     owner = 0;
5292:     PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);
5293:     /* Set iremote for diag matrix */
5294:     for (j=0;j<nlcols[i*2+0];j++) {
5295:       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5296:       iremote[dntotalcols].rank    = owner;
5297:       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5298:       ilocal[dntotalcols++]        = ntotalcols++;
5299:     }
5300:     /* off diag */
5301:     for (j=0;j<nlcols[i*2+1];j++) {
5302:       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5303:       oiremote[ontotalcols].rank    = owner;
5304:       oilocal[ontotalcols++]        = ntotalcols++;
5305:     }
5306:   }
5307:   ISRestoreIndices(rows,&lrowindices);
5308:   PetscFree(loffsets);
5309:   PetscFree(nlcols);
5310:   PetscSFCreate(comm,&sf);
5311:   /* P serves as roots and P_oth is leaves
5312:    * Diag matrix
5313:    * */
5314:   PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
5315:   PetscSFSetFromOptions(sf);
5316:   PetscSFSetUp(sf);

5318:   PetscSFCreate(comm,&osf);
5319:   /* Off diag */
5320:   PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);
5321:   PetscSFSetFromOptions(osf);
5322:   PetscSFSetUp(osf);
5323:   MatSeqAIJGetArrayRead(p->A,&pd_a);
5324:   MatSeqAIJGetArrayRead(p->B,&po_a);
5325:   /* We operate on the matrix internal data for saving memory */
5326:   PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);
5327:   PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);
5328:   MatGetOwnershipRangeColumn(P,&pcstart,NULL);
5329:   /* Convert to global indices for diag matrix */
5330:   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5331:   PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);
5332:   /* We want P_oth store global indices */
5333:   ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);
5334:   /* Use memory scalable approach */
5335:   ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);
5336:   ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);
5337:   PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);
5338:   PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);
5339:   /* Convert back to local indices */
5340:   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5341:   PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);
5342:   nout = 0;
5343:   ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);
5345:   ISLocalToGlobalMappingDestroy(&mapping);
5346:   /* Exchange values */
5347:   PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);
5348:   PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);
5349:   MatSeqAIJRestoreArrayRead(p->A,&pd_a);
5350:   MatSeqAIJRestoreArrayRead(p->B,&po_a);
5351:   /* Stop PETSc from shrinking memory */
5352:   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5353:   MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);
5354:   MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);
5355:   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5356:   PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);
5357:   PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);
5358:   PetscSFDestroy(&sf);
5359:   PetscSFDestroy(&osf);
5360:   return 0;
5361: }

5363: /*
5364:  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5365:  * This supports MPIAIJ and MAIJ
5366:  * */
5367: PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5368: {
5369:   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5370:   Mat_SeqAIJ            *p_oth;
5371:   IS                    rows,map;
5372:   PetscHMapI            hamp;
5373:   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5374:   MPI_Comm              comm;
5375:   PetscSF               sf,osf;
5376:   PetscBool             has;

5378:   PetscObjectGetComm((PetscObject)A,&comm);
5379:   PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);
5380:   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5381:    *  and then create a submatrix (that often is an overlapping matrix)
5382:    * */
5383:   if (reuse == MAT_INITIAL_MATRIX) {
5384:     /* Use a hash table to figure out unique keys */
5385:     PetscHMapICreate(&hamp);
5386:     PetscHMapIResize(hamp,a->B->cmap->n);
5387:     PetscCalloc1(a->B->cmap->n,&mapping);
5388:     count = 0;
5389:     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5390:     for (i=0;i<a->B->cmap->n;i++) {
5391:       key  = a->garray[i]/dof;
5392:       PetscHMapIHas(hamp,key,&has);
5393:       if (!has) {
5394:         mapping[i] = count;
5395:         PetscHMapISet(hamp,key,count++);
5396:       } else {
5397:         /* Current 'i' has the same value the previous step */
5398:         mapping[i] = count-1;
5399:       }
5400:     }
5401:     ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);
5402:     PetscHMapIGetSize(hamp,&htsize);
5404:     PetscCalloc1(htsize,&rowindices);
5405:     off = 0;
5406:     PetscHMapIGetKeys(hamp,&off,rowindices);
5407:     PetscHMapIDestroy(&hamp);
5408:     PetscSortInt(htsize,rowindices);
5409:     ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);
5410:     /* In case, the matrix was already created but users want to recreate the matrix */
5411:     MatDestroy(P_oth);
5412:     MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);
5413:     PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);
5414:     ISDestroy(&map);
5415:     ISDestroy(&rows);
5416:   } else if (reuse == MAT_REUSE_MATRIX) {
5417:     /* If matrix was already created, we simply update values using SF objects
5418:      * that as attached to the matrix ealier.
5419:      */
5420:     const PetscScalar *pd_a,*po_a;

5422:     PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);
5423:     PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);
5425:     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5426:     /* Update values in place */
5427:     MatSeqAIJGetArrayRead(p->A,&pd_a);
5428:     MatSeqAIJGetArrayRead(p->B,&po_a);
5429:     PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);
5430:     PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);
5431:     PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE);
5432:     PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE);
5433:     MatSeqAIJRestoreArrayRead(p->A,&pd_a);
5434:     MatSeqAIJRestoreArrayRead(p->B,&po_a);
5435:   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5436:   PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);
5437:   return 0;
5438: }

5440: /*@C
5441:   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A

5443:   Collective on Mat

5445:   Input Parameters:
5446: + A - the first matrix in mpiaij format
5447: . B - the second matrix in mpiaij format
5448: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

5450:   Output Parameters:
5451: + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5452: . colb - On input index sets of columns of B to extract (or NULL), modified on output
5453: - B_seq - the sequential matrix generated

5455:   Level: developer

5457: @*/
5458: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5459: {
5460:   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5461:   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5462:   IS             isrowb,iscolb;
5463:   Mat            *bseq=NULL;

5465:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5466:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5467:   }
5468:   PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);

5470:   if (scall == MAT_INITIAL_MATRIX) {
5471:     start = A->cmap->rstart;
5472:     cmap  = a->garray;
5473:     nzA   = a->A->cmap->n;
5474:     nzB   = a->B->cmap->n;
5475:     PetscMalloc1(nzA+nzB, &idx);
5476:     ncols = 0;
5477:     for (i=0; i<nzB; i++) {  /* row < local row index */
5478:       if (cmap[i] < start) idx[ncols++] = cmap[i];
5479:       else break;
5480:     }
5481:     imark = i;
5482:     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5483:     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5484:     ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);
5485:     ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);
5486:   } else {
5488:     isrowb  = *rowb; iscolb = *colb;
5489:     PetscMalloc1(1,&bseq);
5490:     bseq[0] = *B_seq;
5491:   }
5492:   MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
5493:   *B_seq = bseq[0];
5494:   PetscFree(bseq);
5495:   if (!rowb) {
5496:     ISDestroy(&isrowb);
5497:   } else {
5498:     *rowb = isrowb;
5499:   }
5500:   if (!colb) {
5501:     ISDestroy(&iscolb);
5502:   } else {
5503:     *colb = iscolb;
5504:   }
5505:   PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);
5506:   return 0;
5507: }

5509: /*
5510:     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5511:     of the OFF-DIAGONAL portion of local A

5513:     Collective on Mat

5515:    Input Parameters:
5516: +    A,B - the matrices in mpiaij format
5517: -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX

5519:    Output Parameter:
5520: +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5521: .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5522: .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5523: -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N

5525:     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5526:      for this matrix. This is not desirable..

5528:     Level: developer

5530: */
5531: PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5532: {
5533:   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5534:   Mat_SeqAIJ             *b_oth;
5535:   VecScatter             ctx;
5536:   MPI_Comm               comm;
5537:   const PetscMPIInt      *rprocs,*sprocs;
5538:   const PetscInt         *srow,*rstarts,*sstarts;
5539:   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5540:   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5541:   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5542:   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5543:   PetscMPIInt            size,tag,rank,nreqs;

5545:   PetscObjectGetComm((PetscObject)A,&comm);
5546:   MPI_Comm_size(comm,&size);

5548:   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5549:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5550:   }
5551:   PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);
5552:   MPI_Comm_rank(comm,&rank);

5554:   if (size == 1) {
5555:     startsj_s = NULL;
5556:     bufa_ptr  = NULL;
5557:     *B_oth    = NULL;
5558:     return 0;
5559:   }

5561:   ctx = a->Mvctx;
5562:   tag = ((PetscObject)ctx)->tag;

5564:   VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);
5565:   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5566:   VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);
5567:   PetscMPIIntCast(nsends+nrecvs,&nreqs);
5568:   PetscMalloc1(nreqs,&reqs);
5569:   rwaits = reqs;
5570:   swaits = reqs + nrecvs;

5572:   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5573:   if (scall == MAT_INITIAL_MATRIX) {
5574:     /* i-array */
5575:     /*---------*/
5576:     /*  post receives */
5577:     if (nrecvs) PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues); /* rstarts can be NULL when nrecvs=0 */
5578:     for (i=0; i<nrecvs; i++) {
5579:       rowlen = rvalues + rstarts[i]*rbs;
5580:       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5581:       MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5582:     }

5584:     /* pack the outgoing message */
5585:     PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);

5587:     sstartsj[0] = 0;
5588:     rstartsj[0] = 0;
5589:     len         = 0; /* total length of j or a array to be sent */
5590:     if (nsends) {
5591:       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5592:       PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);
5593:     }
5594:     for (i=0; i<nsends; i++) {
5595:       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5596:       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5597:       for (j=0; j<nrows; j++) {
5598:         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5599:         for (l=0; l<sbs; l++) {
5600:           MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL); /* rowlength */

5602:           rowlen[j*sbs+l] = ncols;

5604:           len += ncols;
5605:           MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);
5606:         }
5607:         k++;
5608:       }
5609:       MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);

5611:       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5612:     }
5613:     /* recvs and sends of i-array are completed */
5614:     if (nreqs) MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);
5615:     PetscFree(svalues);

5617:     /* allocate buffers for sending j and a arrays */
5618:     PetscMalloc1(len+1,&bufj);
5619:     PetscMalloc1(len+1,&bufa);

5621:     /* create i-array of B_oth */
5622:     PetscMalloc1(aBn+2,&b_othi);

5624:     b_othi[0] = 0;
5625:     len       = 0; /* total length of j or a array to be received */
5626:     k         = 0;
5627:     for (i=0; i<nrecvs; i++) {
5628:       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5629:       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5630:       for (j=0; j<nrows; j++) {
5631:         b_othi[k+1] = b_othi[k] + rowlen[j];
5632:         PetscIntSumError(rowlen[j],len,&len);
5633:         k++;
5634:       }
5635:       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5636:     }
5637:     PetscFree(rvalues);

5639:     /* allocate space for j and a arrays of B_oth */
5640:     PetscMalloc1(b_othi[aBn]+1,&b_othj);
5641:     PetscMalloc1(b_othi[aBn]+1,&b_otha);

5643:     /* j-array */
5644:     /*---------*/
5645:     /*  post receives of j-array */
5646:     for (i=0; i<nrecvs; i++) {
5647:       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5648:       MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
5649:     }

5651:     /* pack the outgoing message j-array */
5652:     if (nsends) k = sstarts[0];
5653:     for (i=0; i<nsends; i++) {
5654:       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5655:       bufJ  = bufj+sstartsj[i];
5656:       for (j=0; j<nrows; j++) {
5657:         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5658:         for (ll=0; ll<sbs; ll++) {
5659:           MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5660:           for (l=0; l<ncols; l++) {
5661:             *bufJ++ = cols[l];
5662:           }
5663:           MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);
5664:         }
5665:       }
5666:       MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
5667:     }

5669:     /* recvs and sends of j-array are completed */
5670:     if (nreqs) MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);
5671:   } else if (scall == MAT_REUSE_MATRIX) {
5672:     sstartsj = *startsj_s;
5673:     rstartsj = *startsj_r;
5674:     bufa     = *bufa_ptr;
5675:     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5676:     MatSeqAIJGetArrayWrite(*B_oth,&b_otha);
5677:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");

5679:   /* a-array */
5680:   /*---------*/
5681:   /*  post receives of a-array */
5682:   for (i=0; i<nrecvs; i++) {
5683:     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5684:     MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
5685:   }

5687:   /* pack the outgoing message a-array */
5688:   if (nsends) k = sstarts[0];
5689:   for (i=0; i<nsends; i++) {
5690:     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5691:     bufA  = bufa+sstartsj[i];
5692:     for (j=0; j<nrows; j++) {
5693:       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5694:       for (ll=0; ll<sbs; ll++) {
5695:         MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5696:         for (l=0; l<ncols; l++) {
5697:           *bufA++ = vals[l];
5698:         }
5699:         MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);
5700:       }
5701:     }
5702:     MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
5703:   }
5704:   /* recvs and sends of a-array are completed */
5705:   if (nreqs) MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);
5706:   PetscFree(reqs);

5708:   if (scall == MAT_INITIAL_MATRIX) {
5709:     /* put together the new matrix */
5710:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);

5712:     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5713:     /* Since these are PETSc arrays, change flags to free them as necessary. */
5714:     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5715:     b_oth->free_a  = PETSC_TRUE;
5716:     b_oth->free_ij = PETSC_TRUE;
5717:     b_oth->nonew   = 0;

5719:     PetscFree(bufj);
5720:     if (!startsj_s || !bufa_ptr) {
5721:       PetscFree2(sstartsj,rstartsj);
5722:       PetscFree(bufa_ptr);
5723:     } else {
5724:       *startsj_s = sstartsj;
5725:       *startsj_r = rstartsj;
5726:       *bufa_ptr  = bufa;
5727:     }
5728:   } else if (scall == MAT_REUSE_MATRIX) {
5729:     MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha);
5730:   }

5732:   VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);
5733:   VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);
5734:   PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);
5735:   return 0;
5736: }

5738: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5739: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5740: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5741: #if defined(PETSC_HAVE_MKL_SPARSE)
5742: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5743: #endif
5744: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5745: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5746: #if defined(PETSC_HAVE_ELEMENTAL)
5747: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5748: #endif
5749: #if defined(PETSC_HAVE_SCALAPACK)
5750: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5751: #endif
5752: #if defined(PETSC_HAVE_HYPRE)
5753: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5754: #endif
5755: #if defined(PETSC_HAVE_CUDA)
5756: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5757: #endif
5758: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5759: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5760: #endif
5761: PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5762: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5763: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);

5765: /*
5766:     Computes (B'*A')' since computing B*A directly is untenable

5768:                n                       p                          p
5769:         [             ]       [             ]         [                 ]
5770:       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5771:         [             ]       [             ]         [                 ]

5773: */
5774: static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5775: {
5776:   Mat            At,Bt,Ct;

5778:   MatTranspose(A,MAT_INITIAL_MATRIX,&At);
5779:   MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);
5780:   MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);
5781:   MatDestroy(&At);
5782:   MatDestroy(&Bt);
5783:   MatTranspose(Ct,MAT_REUSE_MATRIX,&C);
5784:   MatDestroy(&Ct);
5785:   return 0;
5786: }

5788: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5789: {
5790:   PetscBool      cisdense;

5793:   MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);
5794:   MatSetBlockSizesFromMats(C,A,B);
5795:   PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");
5796:   if (!cisdense) {
5797:     MatSetType(C,((PetscObject)A)->type_name);
5798:   }
5799:   MatSetUp(C);

5801:   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5802:   return 0;
5803: }

5805: /* ----------------------------------------------------------------*/
5806: static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5807: {
5808:   Mat_Product *product = C->product;
5809:   Mat         A = product->A,B=product->B;

5811:   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5812:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);

5814:   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5815:   C->ops->productsymbolic = MatProductSymbolic_AB;
5816:   return 0;
5817: }

5819: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
5820: {
5821:   Mat_Product    *product = C->product;

5823:   if (product->type == MATPRODUCT_AB) {
5824:     MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);
5825:   }
5826:   return 0;
5827: }

5829: /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value
5830:    is greater than value, or last if there is no such element.
5831: */
5832: static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper)
5833: {
5834:   PetscCount  it,step,count = last - first;

5836:   while (count > 0) {
5837:     it   = first;
5838:     step = count / 2;
5839:     it  += step;
5840:     if (!(value < array[it])) {
5841:       first  = ++it;
5842:       count -= step + 1;
5843:     } else count = step;
5844:   }
5845:   *upper = first;
5846:   return 0;
5847: }

5849: /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix

5851:   Input Parameters:

5853:     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5854:     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)

5856:     mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat

5858:     For Set1, j1[] contains column indices of the nonzeros.
5859:     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5860:     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
5861:     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.

5863:     Similar for Set2.

5865:     This routine merges the two sets of nonzeros row by row and removes repeats.

5867:   Output Parameters: (memories are allocated by the caller)

5869:     i[],j[]: the CSR of the merged matrix, which has m rows.
5870:     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
5871:     imap2[]: similar to imap1[], but for Set2.
5872:     Note we order nonzeros row-by-row and from left to right.
5873: */
5874: static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[],
5875:   const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[],
5876:   PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[])
5877: {
5878:   PetscInt       r,m; /* Row index of mat */
5879:   PetscCount     t,t1,t2,b1,e1,b2,e2;

5881:   MatGetLocalSize(mat,&m,NULL);
5882:   t1   = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
5883:   i[0] = 0;
5884:   for (r=0; r<m; r++) { /* Do row by row merging */
5885:     b1   = rowBegin1[r];
5886:     e1   = rowEnd1[r];
5887:     b2   = rowBegin2[r];
5888:     e2   = rowEnd2[r];
5889:     while (b1 < e1 && b2 < e2) {
5890:       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
5891:         j[t]      = j1[b1];
5892:         imap1[t1] = t;
5893:         imap2[t2] = t;
5894:         b1       += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */
5895:         b2       += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */
5896:         t1++; t2++; t++;
5897:       } else if (j1[b1] < j2[b2]) {
5898:         j[t]      = j1[b1];
5899:         imap1[t1] = t;
5900:         b1       += jmap1[t1+1] - jmap1[t1];
5901:         t1++; t++;
5902:       } else {
5903:         j[t]      = j2[b2];
5904:         imap2[t2] = t;
5905:         b2       += jmap2[t2+1] - jmap2[t2];
5906:         t2++; t++;
5907:       }
5908:     }
5909:     /* Merge the remaining in either j1[] or j2[] */
5910:     while (b1 < e1) {
5911:       j[t]      = j1[b1];
5912:       imap1[t1] = t;
5913:       b1       += jmap1[t1+1] - jmap1[t1];
5914:       t1++; t++;
5915:     }
5916:     while (b2 < e2) {
5917:       j[t]      = j2[b2];
5918:       imap2[t2] = t;
5919:       b2       += jmap2[t2+1] - jmap2[t2];
5920:       t2++; t++;
5921:     }
5922:     i[r+1] = t;
5923:   }
5924:   return 0;
5925: }

5927: /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block

5929:   Input Parameters:
5930:     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
5931:     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
5932:       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.

5934:       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
5935:       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.

5937:   Output Parameters:
5938:     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
5939:     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
5940:       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
5941:       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.

5943:     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
5944:       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
5945:         repeats (i.e., same 'i,j' pair).
5946:       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
5947:         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.

5949:       Atot: number of entries belonging to the diagonal block
5950:       Annz: number of unique nonzeros belonging to the diagonal block.

5952:     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.

5954:     Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order.
5955: */
5956: static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[],
5957:   PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[],
5958:   PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_,
5959:   PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_)
5960: {
5961:   PetscInt          cstart,cend,rstart,rend,row,col;
5962:   PetscCount        Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
5963:   PetscCount        Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
5964:   PetscCount        k,m,p,q,r,s,mid;
5965:   PetscCount        *Aperm,*Bperm,*Ajmap,*Bjmap;

5967:   PetscLayoutGetRange(mat->rmap,&rstart,&rend);
5968:   PetscLayoutGetRange(mat->cmap,&cstart,&cend);
5969:   m    = rend - rstart;

5971:   for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */

5973:   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
5974:      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
5975:   */
5976:   while (k<n) {
5977:     row = i[k];
5978:     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
5979:     for (s=k; s<n; s++) if (i[s] != row) break;
5980:     for (p=k; p<s; p++) {
5981:       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
5982:       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]);
5983:     }
5984:     PetscSortIntWithCountArray(s-k,j+k,perm+k);
5985:     PetscSortedIntUpperBound(j,k,s,-1,&mid); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
5986:     rowBegin[row-rstart] = k;
5987:     rowMid[row-rstart]   = mid;
5988:     rowEnd[row-rstart]   = s;

5990:     /* Count nonzeros of this diag/offdiag row, which might have repeats */
5991:     Atot += mid - k;
5992:     Btot += s - mid;

5994:     /* Count unique nonzeros of this diag/offdiag row */
5995:     for (p=k; p<mid;) {
5996:       col = j[p];
5997:       do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */
5998:       Annz++;
5999:     }

6001:     for (p=mid; p<s;) {
6002:       col = j[p];
6003:       do {p++;} while (p<s && j[p] == col);
6004:       Bnnz++;
6005:     }
6006:     k = s;
6007:   }

6009:   /* Allocation according to Atot, Btot, Annz, Bnnz */
6010:   PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap);

6012:   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6013:   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6014:   for (r=0; r<m; r++) {
6015:     k     = rowBegin[r];
6016:     mid   = rowMid[r];
6017:     s     = rowEnd[r];
6018:     PetscArraycpy(Aperm+Atot,perm+k,  mid-k);
6019:     PetscArraycpy(Bperm+Btot,perm+mid,s-mid);
6020:     Atot += mid - k;
6021:     Btot += s - mid;

6023:     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6024:     for (p=k; p<mid;) {
6025:       col = j[p];
6026:       q   = p;
6027:       do {p++;} while (p<mid && j[p] == col);
6028:       Ajmap[Annz+1] = Ajmap[Annz] + (p - q);
6029:       Annz++;
6030:     }

6032:     for (p=mid; p<s;) {
6033:       col = j[p];
6034:       q   = p;
6035:       do {p++;} while (p<s && j[p] == col);
6036:       Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q);
6037:       Bnnz++;
6038:     }
6039:   }
6040:   /* Output */
6041:   *Aperm_ = Aperm;
6042:   *Annz_  = Annz;
6043:   *Atot_  = Atot;
6044:   *Ajmap_ = Ajmap;
6045:   *Bperm_ = Bperm;
6046:   *Bnnz_  = Bnnz;
6047:   *Btot_  = Btot;
6048:   *Bjmap_ = Bjmap;
6049:   return 0;
6050: }

6052: PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[])
6053: {
6054:   MPI_Comm                  comm;
6055:   PetscMPIInt               rank,size;
6056:   PetscInt                  m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6057:   PetscCount                k,p,q,rem; /* Loop variables over coo arrays */
6058:   Mat_MPIAIJ                *mpiaij = (Mat_MPIAIJ*)mat->data;

6060:   PetscFree(mpiaij->garray);
6061:   VecDestroy(&mpiaij->lvec);
6062: #if defined(PETSC_USE_CTABLE)
6063:   PetscTableDestroy(&mpiaij->colmap);
6064: #else
6065:   PetscFree(mpiaij->colmap);
6066: #endif
6067:   VecScatterDestroy(&mpiaij->Mvctx);
6068:   mat->assembled = PETSC_FALSE;
6069:   mat->was_assembled = PETSC_FALSE;
6070:   MatResetPreallocationCOO_MPIAIJ(mat);

6072:   PetscObjectGetComm((PetscObject)mat,&comm);
6073:   MPI_Comm_size(comm,&size);
6074:   MPI_Comm_rank(comm,&rank);
6075:   PetscLayoutSetUp(mat->rmap);
6076:   PetscLayoutSetUp(mat->cmap);
6077:   PetscLayoutGetRange(mat->rmap,&rstart,&rend);
6078:   PetscLayoutGetRange(mat->cmap,&cstart,&cend);
6079:   MatGetLocalSize(mat,&m,&n);
6080:   MatGetSize(mat,&M,&N);

6082:   /* ---------------------------------------------------------------------------*/
6083:   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6084:   /* entries come first, then local rows, then remote rows.                     */
6085:   /* ---------------------------------------------------------------------------*/
6086:   PetscCount n1 = coo_n,*perm1;
6087:   PetscInt   *i1,*j1; /* Copies of input COOs along with a permutation array */
6088:   PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1);
6089:   PetscArraycpy(i1,coo_i,n1); /* Make a copy since we'll modify it */
6090:   PetscArraycpy(j1,coo_j,n1);
6091:   for (k=0; k<n1; k++) perm1[k] = k;

6093:   /* Manipulate indices so that entries with negative row or col indices will have smallest
6094:      row indices, local entries will have greater but negative row indices, and remote entries
6095:      will have positive row indices.
6096:   */
6097:   for (k=0; k<n1; k++) {
6098:     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */
6099:     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6101:     else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6102:   }

6104:   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6105:   PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1);
6106:   for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */
6107:   PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem); /* rem is upper bound of the last local row */
6108:   for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/

6110:   /* ---------------------------------------------------------------------------*/
6111:   /*           Split local rows into diag/offdiag portions                      */
6112:   /* ---------------------------------------------------------------------------*/
6113:   PetscCount   *rowBegin1,*rowMid1,*rowEnd1;
6114:   PetscCount   *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1;
6115:   PetscCount   Annz1,Bnnz1,Atot1,Btot1;

6117:   PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1);
6118:   PetscMalloc1(n1-rem,&Cperm1);
6119:   MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1);

6121:   /* ---------------------------------------------------------------------------*/
6122:   /*           Send remote rows to their owner                                  */
6123:   /* ---------------------------------------------------------------------------*/
6124:   /* Find which rows should be sent to which remote ranks*/
6125:   PetscInt       nsend = 0; /* Number of MPI ranks to send data to */
6126:   PetscMPIInt    *sendto; /* [nsend], storing remote ranks */
6127:   PetscInt       *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6128:   const PetscInt *ranges;
6129:   PetscInt       maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */

6131:   PetscLayoutGetRanges(mat->rmap,&ranges);
6132:   PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries);
6133:   for (k=rem; k<n1;) {
6134:     PetscMPIInt  owner;
6135:     PetscInt     firstRow,lastRow;

6137:     /* Locate a row range */
6138:     firstRow = i1[k]; /* first row of this owner */
6139:     PetscLayoutFindOwner(mat->rmap,firstRow,&owner);
6140:     lastRow  = ranges[owner+1]-1; /* last row of this owner */

6142:     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6143:     PetscSortedIntUpperBound(i1,k,n1,lastRow,&p);

6145:     /* All entries in [k,p) belong to this remote owner */
6146:     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6147:       PetscMPIInt *sendto2;
6148:       PetscInt    *nentries2;
6149:       PetscInt    maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size;

6151:       PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2);
6152:       PetscArraycpy(sendto2,sendto,maxNsend);
6153:       PetscArraycpy(nentries2,nentries2,maxNsend+1);
6154:       PetscFree2(sendto,nentries2);
6155:       sendto      = sendto2;
6156:       nentries    = nentries2;
6157:       maxNsend    = maxNsend2;
6158:     }
6159:     sendto[nsend]   = owner;
6160:     nentries[nsend] = p - k;
6161:     PetscCountCast(p-k,&nentries[nsend]);
6162:     nsend++;
6163:     k = p;
6164:   }

6166:   /* Build 1st SF to know offsets on remote to send data */
6167:   PetscSF     sf1;
6168:   PetscInt    nroots = 1,nroots2 = 0;
6169:   PetscInt    nleaves = nsend,nleaves2 = 0;
6170:   PetscInt    *offsets;
6171:   PetscSFNode *iremote;

6173:   PetscSFCreate(comm,&sf1);
6174:   PetscMalloc1(nsend,&iremote);
6175:   PetscMalloc1(nsend,&offsets);
6176:   for (k=0; k<nsend; k++) {
6177:     iremote[k].rank  = sendto[k];
6178:     iremote[k].index = 0;
6179:     nleaves2        += nentries[k];
6181:   }
6182:   PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
6183:   PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM);
6184:   PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM); /* Would nroots2 overflow, we check offsets[] below */
6185:   PetscSFDestroy(&sf1);
6186:   PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem);

6188:   /* Build 2nd SF to send remote COOs to their owner */
6189:   PetscSF sf2;
6190:   nroots  = nroots2;
6191:   nleaves = nleaves2;
6192:   PetscSFCreate(comm,&sf2);
6193:   PetscSFSetFromOptions(sf2);
6194:   PetscMalloc1(nleaves,&iremote);
6195:   p       = 0;
6196:   for (k=0; k<nsend; k++) {
6198:     for (q=0; q<nentries[k]; q++,p++) {
6199:       iremote[p].rank  = sendto[k];
6200:       iremote[p].index = offsets[k] + q;
6201:     }
6202:   }
6203:   PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);

6205:   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6206:   PetscArraycpy(Cperm1,perm1+rem,n1-rem);

6208:   /* Send the remote COOs to their owner */
6209:   PetscInt   n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6210:   PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6211:   PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2);
6212:   PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE);
6213:   PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE);
6214:   PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE);
6215:   PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE);

6217:   PetscFree(offsets);
6218:   PetscFree2(sendto,nentries);

6220:   /* ---------------------------------------------------------------*/
6221:   /* Sort received COOs by row along with the permutation array     */
6222:   /* ---------------------------------------------------------------*/
6223:   for (k=0; k<n2; k++) perm2[k] = k;
6224:   PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2);

6226:   /* ---------------------------------------------------------------*/
6227:   /* Split received COOs into diag/offdiag portions                 */
6228:   /* ---------------------------------------------------------------*/
6229:   PetscCount  *rowBegin2,*rowMid2,*rowEnd2;
6230:   PetscCount  *Ajmap2,*Aperm2,*Bjmap2,*Bperm2;
6231:   PetscCount  Annz2,Bnnz2,Atot2,Btot2;

6233:   PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2);
6234:   MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2);

6236:   /* --------------------------------------------------------------------------*/
6237:   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6238:   /* --------------------------------------------------------------------------*/
6239:   PetscInt   *Ai,*Bi;
6240:   PetscInt   *Aj,*Bj;

6242:   PetscMalloc1(m+1,&Ai);
6243:   PetscMalloc1(m+1,&Bi);
6244:   PetscMalloc1(Annz1+Annz2,&Aj); /* Since local and remote entries might have dups, we might allocate excess memory */
6245:   PetscMalloc1(Bnnz1+Bnnz2,&Bj);

6247:   PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2;
6248:   PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2);

6250:   MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj);
6251:   MatMergeEntries_Internal(mat,j1,j2,rowMid1,  rowEnd1,rowMid2,  rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj);
6252:   PetscFree3(rowBegin1,rowMid1,rowEnd1);
6253:   PetscFree3(rowBegin2,rowMid2,rowEnd2);
6254:   PetscFree3(i1,j1,perm1);
6255:   PetscFree3(i2,j2,perm2);

6257:   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6258:   PetscInt Annz = Ai[m];
6259:   PetscInt Bnnz = Bi[m];
6260:   if (Annz < Annz1 + Annz2) {
6261:     PetscInt *Aj_new;
6262:     PetscMalloc1(Annz,&Aj_new);
6263:     PetscArraycpy(Aj_new,Aj,Annz);
6264:     PetscFree(Aj);
6265:     Aj   = Aj_new;
6266:   }

6268:   if (Bnnz < Bnnz1 + Bnnz2) {
6269:     PetscInt *Bj_new;
6270:     PetscMalloc1(Bnnz,&Bj_new);
6271:     PetscArraycpy(Bj_new,Bj,Bnnz);
6272:     PetscFree(Bj);
6273:     Bj   = Bj_new;
6274:   }

6276:   /* --------------------------------------------------------------------------------*/
6277:   /* Create new submatrices for on-process and off-process coupling                  */
6278:   /* --------------------------------------------------------------------------------*/
6279:   PetscScalar   *Aa,*Ba;
6280:   MatType       rtype;
6281:   Mat_SeqAIJ    *a,*b;
6282:   PetscCalloc1(Annz,&Aa); /* Zero matrix on device */
6283:   PetscCalloc1(Bnnz,&Ba);
6284:   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6285:   if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;}
6286:   MatDestroy(&mpiaij->A);
6287:   MatDestroy(&mpiaij->B);
6288:   MatGetRootType_Private(mat,&rtype);
6289:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A);
6290:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B);
6291:   MatSetUpMultiply_MPIAIJ(mat);

6293:   a = (Mat_SeqAIJ*)mpiaij->A->data;
6294:   b = (Mat_SeqAIJ*)mpiaij->B->data;
6295:   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6296:   a->free_a       = b->free_a       = PETSC_TRUE;
6297:   a->free_ij      = b->free_ij      = PETSC_TRUE;

6299:   /* conversion must happen AFTER multiply setup */
6300:   MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A);
6301:   MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B);
6302:   VecDestroy(&mpiaij->lvec);
6303:   MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL);
6304:   PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec);

6306:   mpiaij->coo_n   = coo_n;
6307:   mpiaij->coo_sf  = sf2;
6308:   mpiaij->sendlen = nleaves;
6309:   mpiaij->recvlen = nroots;

6311:   mpiaij->Annz1   = Annz1;
6312:   mpiaij->Annz2   = Annz2;
6313:   mpiaij->Bnnz1   = Bnnz1;
6314:   mpiaij->Bnnz2   = Bnnz2;

6316:   mpiaij->Atot1   = Atot1;
6317:   mpiaij->Atot2   = Atot2;
6318:   mpiaij->Btot1   = Btot1;
6319:   mpiaij->Btot2   = Btot2;

6321:   mpiaij->Aimap1  = Aimap1;
6322:   mpiaij->Aimap2  = Aimap2;
6323:   mpiaij->Bimap1  = Bimap1;
6324:   mpiaij->Bimap2  = Bimap2;

6326:   mpiaij->Ajmap1  = Ajmap1;
6327:   mpiaij->Ajmap2  = Ajmap2;
6328:   mpiaij->Bjmap1  = Bjmap1;
6329:   mpiaij->Bjmap2  = Bjmap2;

6331:   mpiaij->Aperm1  = Aperm1;
6332:   mpiaij->Aperm2  = Aperm2;
6333:   mpiaij->Bperm1  = Bperm1;
6334:   mpiaij->Bperm2  = Bperm2;

6336:   mpiaij->Cperm1  = Cperm1;

6338:   /* Allocate in preallocation. If not used, it has zero cost on host */
6339:   PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf);
6340:   return 0;
6341: }

6343: static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode)
6344: {
6345:   Mat_MPIAIJ           *mpiaij = (Mat_MPIAIJ*)mat->data;
6346:   Mat                  A = mpiaij->A,B = mpiaij->B;
6347:   PetscCount           Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2;
6348:   PetscScalar          *Aa,*Ba;
6349:   PetscScalar          *sendbuf = mpiaij->sendbuf;
6350:   PetscScalar          *recvbuf = mpiaij->recvbuf;
6351:   const PetscCount     *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2;
6352:   const PetscCount     *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2;
6353:   const PetscCount     *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2;
6354:   const PetscCount     *Cperm1 = mpiaij->Cperm1;

6356:   MatSeqAIJGetArray(A,&Aa); /* Might read and write matrix values */
6357:   MatSeqAIJGetArray(B,&Ba);
6358:   if (imode == INSERT_VALUES) {
6359:     PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar));
6360:     PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar));
6361:   }

6363:   /* Pack entries to be sent to remote */
6364:   for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];

6366:   /* Send remote entries to their owner and overlap the communication with local computation */
6367:   PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE);
6368:   /* Add local entries to A and B */
6369:   for (PetscCount i=0; i<Annz1; i++) {
6370:     for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]];
6371:   }
6372:   for (PetscCount i=0; i<Bnnz1; i++) {
6373:     for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]];
6374:   }
6375:   PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE);

6377:   /* Add received remote entries to A and B */
6378:   for (PetscCount i=0; i<Annz2; i++) {
6379:     for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6380:   }
6381:   for (PetscCount i=0; i<Bnnz2; i++) {
6382:     for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6383:   }
6384:   MatSeqAIJRestoreArray(A,&Aa);
6385:   MatSeqAIJRestoreArray(B,&Ba);
6386:   return 0;
6387: }

6389: /* ----------------------------------------------------------------*/

6391: /*MC
6392:    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.

6394:    Options Database Keys:
6395: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()

6397:    Level: beginner

6399:    Notes:
6400:     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6401:     in this case the values associated with the rows and columns one passes in are set to zero
6402:     in the matrix

6404:     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6405:     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored

6407: .seealso: MatCreateAIJ()
6408: M*/

6410: PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6411: {
6412:   Mat_MPIAIJ     *b;
6413:   PetscMPIInt    size;

6415:   MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);

6417:   PetscNewLog(B,&b);
6418:   B->data       = (void*)b;
6419:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
6420:   B->assembled  = PETSC_FALSE;
6421:   B->insertmode = NOT_SET_VALUES;
6422:   b->size       = size;

6424:   MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);

6426:   /* build cache for off array entries formed */
6427:   MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);

6429:   b->donotstash  = PETSC_FALSE;
6430:   b->colmap      = NULL;
6431:   b->garray      = NULL;
6432:   b->roworiented = PETSC_TRUE;

6434:   /* stuff used for matrix vector multiply */
6435:   b->lvec  = NULL;
6436:   b->Mvctx = NULL;

6438:   /* stuff for MatGetRow() */
6439:   b->rowindices   = NULL;
6440:   b->rowvalues    = NULL;
6441:   b->getrowactive = PETSC_FALSE;

6443:   /* flexible pointer used in CUSPARSE classes */
6444:   b->spptr = NULL;

6446:   PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);
6447:   PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);
6448:   PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);
6449:   PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);
6450:   PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);
6451:   PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);
6452:   PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);
6453:   PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);
6454:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);
6455:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);
6456: #if defined(PETSC_HAVE_CUDA)
6457:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);
6458: #endif
6459: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6460:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);
6461: #endif
6462: #if defined(PETSC_HAVE_MKL_SPARSE)
6463:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);
6464: #endif
6465:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);
6466:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);
6467:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);
6468:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);
6469: #if defined(PETSC_HAVE_ELEMENTAL)
6470:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);
6471: #endif
6472: #if defined(PETSC_HAVE_SCALAPACK)
6473:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);
6474: #endif
6475:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);
6476:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);
6477: #if defined(PETSC_HAVE_HYPRE)
6478:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);
6479:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);
6480: #endif
6481:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);
6482:   PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);
6483:   PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ);
6484:   PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ);
6485:   PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
6486:   return 0;
6487: }

6489: /*@C
6490:      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6491:          and "off-diagonal" part of the matrix in CSR format.

6493:    Collective

6495:    Input Parameters:
6496: +  comm - MPI communicator
6497: .  m - number of local rows (Cannot be PETSC_DECIDE)
6498: .  n - This value should be the same as the local size used in creating the
6499:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6500:        calculated if N is given) For square matrices n is almost always m.
6501: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6502: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6503: .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6504: .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6505: .   a - matrix values
6506: .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6507: .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6508: -   oa - matrix values

6510:    Output Parameter:
6511: .   mat - the matrix

6513:    Level: advanced

6515:    Notes:
6516:        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6517:        must free the arrays once the matrix has been destroyed and not before.

6519:        The i and j indices are 0 based

6521:        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix

6523:        This sets local rows and cannot be used to set off-processor values.

6525:        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6526:        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6527:        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6528:        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6529:        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6530:        communication if it is known that only local entries will be set.

6532: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6533:           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6534: @*/
6535: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6536: {
6537:   Mat_MPIAIJ     *maij;

6542:   MatCreate(comm,mat);
6543:   MatSetSizes(*mat,m,n,M,N);
6544:   MatSetType(*mat,MATMPIAIJ);
6545:   maij = (Mat_MPIAIJ*) (*mat)->data;

6547:   (*mat)->preallocated = PETSC_TRUE;

6549:   PetscLayoutSetUp((*mat)->rmap);
6550:   PetscLayoutSetUp((*mat)->cmap);

6552:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
6553:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);

6555:   MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
6556:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
6557:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
6558:   MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);
6559:   MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
6560:   return 0;
6561: }

6563: typedef struct {
6564:   Mat       *mp;    /* intermediate products */
6565:   PetscBool *mptmp; /* is the intermediate product temporary ? */
6566:   PetscInt  cp;     /* number of intermediate products */

6568:   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6569:   PetscInt    *startsj_s,*startsj_r;
6570:   PetscScalar *bufa;
6571:   Mat         P_oth;

6573:   /* may take advantage of merging product->B */
6574:   Mat Bloc; /* B-local by merging diag and off-diag */

6576:   /* cusparse does not have support to split between symbolic and numeric phases.
6577:      When api_user is true, we don't need to update the numerical values
6578:      of the temporary storage */
6579:   PetscBool reusesym;

6581:   /* support for COO values insertion */
6582:   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6583:   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6584:   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6585:   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6586:   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6587:   PetscMemType mtype;

6589:   /* customization */
6590:   PetscBool abmerge;
6591:   PetscBool P_oth_bind;
6592: } MatMatMPIAIJBACKEND;

6594: PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6595: {
6596:   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6597:   PetscInt            i;

6599:   PetscFree2(mmdata->startsj_s,mmdata->startsj_r);
6600:   PetscFree(mmdata->bufa);
6601:   PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);
6602:   PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);
6603:   MatDestroy(&mmdata->P_oth);
6604:   MatDestroy(&mmdata->Bloc);
6605:   PetscSFDestroy(&mmdata->sf);
6606:   for (i = 0; i < mmdata->cp; i++) {
6607:     MatDestroy(&mmdata->mp[i]);
6608:   }
6609:   PetscFree2(mmdata->mp,mmdata->mptmp);
6610:   PetscFree(mmdata->own[0]);
6611:   PetscFree(mmdata->own);
6612:   PetscFree(mmdata->off[0]);
6613:   PetscFree(mmdata->off);
6614:   PetscFree(mmdata);
6615:   return 0;
6616: }

6618: /* Copy selected n entries with indices in idx[] of A to v[].
6619:    If idx is NULL, copy the whole data array of A to v[]
6620:  */
6621: static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6622: {
6623:   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);

6625:   PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);
6626:   if (f) {
6627:     (*f)(A,n,idx,v);
6628:   } else {
6629:     const PetscScalar *vv;

6631:     MatSeqAIJGetArrayRead(A,&vv);
6632:     if (n && idx) {
6633:       PetscScalar    *w = v;
6634:       const PetscInt *oi = idx;
6635:       PetscInt       j;

6637:       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6638:     } else {
6639:       PetscArraycpy(v,vv,n);
6640:     }
6641:     MatSeqAIJRestoreArrayRead(A,&vv);
6642:   }
6643:   return 0;
6644: }

6646: static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6647: {
6648:   MatMatMPIAIJBACKEND *mmdata;
6649:   PetscInt            i,n_d,n_o;

6651:   MatCheckProduct(C,1);
6653:   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6654:   if (!mmdata->reusesym) { /* update temporary matrices */
6655:     if (mmdata->P_oth) {
6656:       MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);
6657:     }
6658:     if (mmdata->Bloc) {
6659:       MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);
6660:     }
6661:   }
6662:   mmdata->reusesym = PETSC_FALSE;

6664:   for (i = 0; i < mmdata->cp; i++) {
6666:     (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);
6667:   }
6668:   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6669:     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];

6671:     if (mmdata->mptmp[i]) continue;
6672:     if (noff) {
6673:       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];

6675:       MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);
6676:       MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);
6677:       n_o += noff;
6678:       n_d += nown;
6679:     } else {
6680:       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;

6682:       MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);
6683:       n_d += mm->nz;
6684:     }
6685:   }
6686:   if (mmdata->hasoffproc) { /* offprocess insertion */
6687:     PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);
6688:     PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);
6689:   }
6690:   MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);
6691:   return 0;
6692: }

6694: /* Support for Pt * A, A * P, or Pt * A * P */
6695: #define MAX_NUMBER_INTERMEDIATE 4
6696: PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6697: {
6698:   Mat_Product            *product = C->product;
6699:   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6700:   Mat_MPIAIJ             *a,*p;
6701:   MatMatMPIAIJBACKEND    *mmdata;
6702:   ISLocalToGlobalMapping P_oth_l2g = NULL;
6703:   IS                     glob = NULL;
6704:   const char             *prefix;
6705:   char                   pprefix[256];
6706:   const PetscInt         *globidx,*P_oth_idx;
6707:   PetscInt               i,j,cp,m,n,M,N,*coo_i,*coo_j;
6708:   PetscCount             ncoo,ncoo_d,ncoo_o,ncoo_oown;
6709:   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6710:                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6711:                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6712:   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */

6714:   MatProductType         ptype;
6715:   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6716:   PetscMPIInt            size;
6717:   PetscErrorCode         ierr;

6719:   MatCheckProduct(C,1);
6721:   ptype = product->type;
6722:   if (product->A->symmetric && ptype == MATPRODUCT_AtB) {
6723:     ptype = MATPRODUCT_AB;
6724:     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6725:   }
6726:   switch (ptype) {
6727:   case MATPRODUCT_AB:
6728:     A = product->A;
6729:     P = product->B;
6730:     m = A->rmap->n;
6731:     n = P->cmap->n;
6732:     M = A->rmap->N;
6733:     N = P->cmap->N;
6734:     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6735:     break;
6736:   case MATPRODUCT_AtB:
6737:     P = product->A;
6738:     A = product->B;
6739:     m = P->cmap->n;
6740:     n = A->cmap->n;
6741:     M = P->cmap->N;
6742:     N = A->cmap->N;
6743:     hasoffproc = PETSC_TRUE;
6744:     break;
6745:   case MATPRODUCT_PtAP:
6746:     A = product->A;
6747:     P = product->B;
6748:     m = P->cmap->n;
6749:     n = P->cmap->n;
6750:     M = P->cmap->N;
6751:     N = P->cmap->N;
6752:     hasoffproc = PETSC_TRUE;
6753:     break;
6754:   default:
6755:     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6756:   }
6757:   MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);
6758:   if (size == 1) hasoffproc = PETSC_FALSE;

6760:   /* defaults */
6761:   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6762:     mp[i]    = NULL;
6763:     mptmp[i] = PETSC_FALSE;
6764:     rmapt[i] = -1;
6765:     cmapt[i] = -1;
6766:     rmapa[i] = NULL;
6767:     cmapa[i] = NULL;
6768:   }

6770:   /* customization */
6771:   PetscNew(&mmdata);
6772:   mmdata->reusesym = product->api_user;
6773:   if (ptype == MATPRODUCT_AB) {
6774:     if (product->api_user) {
6775:       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
6776:       PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);
6777:       PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);
6778:       PetscOptionsEnd();
6779:     } else {
6780:       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
6781:       PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);
6782:       PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);
6783:       PetscOptionsEnd();
6784:     }
6785:   } else if (ptype == MATPRODUCT_PtAP) {
6786:     if (product->api_user) {
6787:       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
6788:       PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);
6789:       PetscOptionsEnd();
6790:     } else {
6791:       PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
6792:       PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);
6793:       PetscOptionsEnd();
6794:     }
6795:   }
6796:   a = (Mat_MPIAIJ*)A->data;
6797:   p = (Mat_MPIAIJ*)P->data;
6798:   MatSetSizes(C,m,n,M,N);
6799:   PetscLayoutSetUp(C->rmap);
6800:   PetscLayoutSetUp(C->cmap);
6801:   MatSetType(C,((PetscObject)A)->type_name);
6802:   MatGetOptionsPrefix(C,&prefix);

6804:   cp   = 0;
6805:   switch (ptype) {
6806:   case MATPRODUCT_AB: /* A * P */
6807:     MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);

6809:     /* A_diag * P_local (merged or not) */
6810:     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6811:       /* P is product->B */
6812:       MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);
6813:       MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);
6814:       MatProductSetType(mp[cp],MATPRODUCT_AB);
6815:       MatProductSetFill(mp[cp],product->fill);
6816:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6817:       MatSetOptionsPrefix(mp[cp],prefix);
6818:       MatAppendOptionsPrefix(mp[cp],pprefix);
6819:       mp[cp]->product->api_user = product->api_user;
6820:       MatProductSetFromOptions(mp[cp]);
6822:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6823:       ISGetIndices(glob,&globidx);
6824:       rmapt[cp] = 1;
6825:       cmapt[cp] = 2;
6826:       cmapa[cp] = globidx;
6827:       mptmp[cp] = PETSC_FALSE;
6828:       cp++;
6829:     } else { /* A_diag * P_diag and A_diag * P_off */
6830:       MatProductCreate(a->A,p->A,NULL,&mp[cp]);
6831:       MatProductSetType(mp[cp],MATPRODUCT_AB);
6832:       MatProductSetFill(mp[cp],product->fill);
6833:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6834:       MatSetOptionsPrefix(mp[cp],prefix);
6835:       MatAppendOptionsPrefix(mp[cp],pprefix);
6836:       mp[cp]->product->api_user = product->api_user;
6837:       MatProductSetFromOptions(mp[cp]);
6839:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6840:       rmapt[cp] = 1;
6841:       cmapt[cp] = 1;
6842:       mptmp[cp] = PETSC_FALSE;
6843:       cp++;
6844:       MatProductCreate(a->A,p->B,NULL,&mp[cp]);
6845:       MatProductSetType(mp[cp],MATPRODUCT_AB);
6846:       MatProductSetFill(mp[cp],product->fill);
6847:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6848:       MatSetOptionsPrefix(mp[cp],prefix);
6849:       MatAppendOptionsPrefix(mp[cp],pprefix);
6850:       mp[cp]->product->api_user = product->api_user;
6851:       MatProductSetFromOptions(mp[cp]);
6853:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6854:       rmapt[cp] = 1;
6855:       cmapt[cp] = 2;
6856:       cmapa[cp] = p->garray;
6857:       mptmp[cp] = PETSC_FALSE;
6858:       cp++;
6859:     }

6861:     /* A_off * P_other */
6862:     if (mmdata->P_oth) {
6863:       MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g); /* make P_oth use local col ids */
6864:       ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);
6865:       MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);
6866:       MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);
6867:       MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);
6868:       MatProductSetType(mp[cp],MATPRODUCT_AB);
6869:       MatProductSetFill(mp[cp],product->fill);
6870:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6871:       MatSetOptionsPrefix(mp[cp],prefix);
6872:       MatAppendOptionsPrefix(mp[cp],pprefix);
6873:       mp[cp]->product->api_user = product->api_user;
6874:       MatProductSetFromOptions(mp[cp]);
6876:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6877:       rmapt[cp] = 1;
6878:       cmapt[cp] = 2;
6879:       cmapa[cp] = P_oth_idx;
6880:       mptmp[cp] = PETSC_FALSE;
6881:       cp++;
6882:     }
6883:     break;

6885:   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6886:     /* A is product->B */
6887:     MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);
6888:     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6889:       MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);
6890:       MatProductSetType(mp[cp],MATPRODUCT_AtB);
6891:       MatProductSetFill(mp[cp],product->fill);
6892:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6893:       MatSetOptionsPrefix(mp[cp],prefix);
6894:       MatAppendOptionsPrefix(mp[cp],pprefix);
6895:       mp[cp]->product->api_user = product->api_user;
6896:       MatProductSetFromOptions(mp[cp]);
6898:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6899:       ISGetIndices(glob,&globidx);
6900:       rmapt[cp] = 2;
6901:       rmapa[cp] = globidx;
6902:       cmapt[cp] = 2;
6903:       cmapa[cp] = globidx;
6904:       mptmp[cp] = PETSC_FALSE;
6905:       cp++;
6906:     } else {
6907:       MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);
6908:       MatProductSetType(mp[cp],MATPRODUCT_AtB);
6909:       MatProductSetFill(mp[cp],product->fill);
6910:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6911:       MatSetOptionsPrefix(mp[cp],prefix);
6912:       MatAppendOptionsPrefix(mp[cp],pprefix);
6913:       mp[cp]->product->api_user = product->api_user;
6914:       MatProductSetFromOptions(mp[cp]);
6916:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6917:       ISGetIndices(glob,&globidx);
6918:       rmapt[cp] = 1;
6919:       cmapt[cp] = 2;
6920:       cmapa[cp] = globidx;
6921:       mptmp[cp] = PETSC_FALSE;
6922:       cp++;
6923:       MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);
6924:       MatProductSetType(mp[cp],MATPRODUCT_AtB);
6925:       MatProductSetFill(mp[cp],product->fill);
6926:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6927:       MatSetOptionsPrefix(mp[cp],prefix);
6928:       MatAppendOptionsPrefix(mp[cp],pprefix);
6929:       mp[cp]->product->api_user = product->api_user;
6930:       MatProductSetFromOptions(mp[cp]);
6932:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6933:       rmapt[cp] = 2;
6934:       rmapa[cp] = p->garray;
6935:       cmapt[cp] = 2;
6936:       cmapa[cp] = globidx;
6937:       mptmp[cp] = PETSC_FALSE;
6938:       cp++;
6939:     }
6940:     break;
6941:   case MATPRODUCT_PtAP:
6942:     MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);
6943:     /* P is product->B */
6944:     MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);
6945:     MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);
6946:     MatProductSetType(mp[cp],MATPRODUCT_PtAP);
6947:     MatProductSetFill(mp[cp],product->fill);
6948:     PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6949:     MatSetOptionsPrefix(mp[cp],prefix);
6950:     MatAppendOptionsPrefix(mp[cp],pprefix);
6951:     mp[cp]->product->api_user = product->api_user;
6952:     MatProductSetFromOptions(mp[cp]);
6954:     (*mp[cp]->ops->productsymbolic)(mp[cp]);
6955:     ISGetIndices(glob,&globidx);
6956:     rmapt[cp] = 2;
6957:     rmapa[cp] = globidx;
6958:     cmapt[cp] = 2;
6959:     cmapa[cp] = globidx;
6960:     mptmp[cp] = PETSC_FALSE;
6961:     cp++;
6962:     if (mmdata->P_oth) {
6963:       MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);
6964:       ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);
6965:       MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);
6966:       MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);
6967:       MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);
6968:       MatProductSetType(mp[cp],MATPRODUCT_AB);
6969:       MatProductSetFill(mp[cp],product->fill);
6970:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6971:       MatSetOptionsPrefix(mp[cp],prefix);
6972:       MatAppendOptionsPrefix(mp[cp],pprefix);
6973:       mp[cp]->product->api_user = product->api_user;
6974:       MatProductSetFromOptions(mp[cp]);
6976:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6977:       mptmp[cp] = PETSC_TRUE;
6978:       cp++;
6979:       MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);
6980:       MatProductSetType(mp[cp],MATPRODUCT_AtB);
6981:       MatProductSetFill(mp[cp],product->fill);
6982:       PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp);
6983:       MatSetOptionsPrefix(mp[cp],prefix);
6984:       MatAppendOptionsPrefix(mp[cp],pprefix);
6985:       mp[cp]->product->api_user = product->api_user;
6986:       MatProductSetFromOptions(mp[cp]);
6988:       (*mp[cp]->ops->productsymbolic)(mp[cp]);
6989:       rmapt[cp] = 2;
6990:       rmapa[cp] = globidx;
6991:       cmapt[cp] = 2;
6992:       cmapa[cp] = P_oth_idx;
6993:       mptmp[cp] = PETSC_FALSE;
6994:       cp++;
6995:     }
6996:     break;
6997:   default:
6998:     SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6999:   }
7000:   /* sanity check */

7003:   PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);
7004:   for (i = 0; i < cp; i++) {
7005:     mmdata->mp[i]    = mp[i];
7006:     mmdata->mptmp[i] = mptmp[i];
7007:   }
7008:   mmdata->cp = cp;
7009:   C->product->data       = mmdata;
7010:   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7011:   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;

7013:   /* memory type */
7014:   mmdata->mtype = PETSC_MEMTYPE_HOST;
7015:   PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
7016:   PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");
7017:   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7018:   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;

7020:   /* prepare coo coordinates for values insertion */

7022:   /* count total nonzeros of those intermediate seqaij Mats
7023:     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7024:     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7025:     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7026:   */
7027:   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7028:     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7029:     if (mptmp[cp]) continue;
7030:     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7031:       const PetscInt *rmap = rmapa[cp];
7032:       const PetscInt mr = mp[cp]->rmap->n;
7033:       const PetscInt rs = C->rmap->rstart;
7034:       const PetscInt re = C->rmap->rend;
7035:       const PetscInt *ii  = mm->i;
7036:       for (i = 0; i < mr; i++) {
7037:         const PetscInt gr = rmap[i];
7038:         const PetscInt nz = ii[i+1] - ii[i];
7039:         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7040:         else ncoo_oown += nz; /* this row is local */
7041:       }
7042:     } else ncoo_d += mm->nz;
7043:   }

7045:   /*
7046:     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc

7048:     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.

7050:     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].

7052:     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7053:     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7054:     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.

7056:     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7057:     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7058:   */
7059:   PetscCalloc1(mmdata->cp+1,&mmdata->off); /* +1 to make a csr-like data structure */
7060:   PetscCalloc1(mmdata->cp+1,&mmdata->own);

7062:   /* gather (i,j) of nonzeros inserted by remote procs */
7063:   if (hasoffproc) {
7064:     PetscSF  msf;
7065:     PetscInt ncoo2,*coo_i2,*coo_j2;

7067:     PetscMalloc1(ncoo_o,&mmdata->off[0]);
7068:     PetscMalloc1(ncoo_oown,&mmdata->own[0]);
7069:     PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j); /* to collect (i,j) of entries to be sent to others */

7071:     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7072:       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
7073:       PetscInt   *idxoff = mmdata->off[cp];
7074:       PetscInt   *idxown = mmdata->own[cp];
7075:       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7076:         const PetscInt *rmap = rmapa[cp];
7077:         const PetscInt *cmap = cmapa[cp];
7078:         const PetscInt *ii  = mm->i;
7079:         PetscInt       *coi = coo_i + ncoo_o;
7080:         PetscInt       *coj = coo_j + ncoo_o;
7081:         const PetscInt mr = mp[cp]->rmap->n;
7082:         const PetscInt rs = C->rmap->rstart;
7083:         const PetscInt re = C->rmap->rend;
7084:         const PetscInt cs = C->cmap->rstart;
7085:         for (i = 0; i < mr; i++) {
7086:           const PetscInt *jj = mm->j + ii[i];
7087:           const PetscInt gr  = rmap[i];
7088:           const PetscInt nz  = ii[i+1] - ii[i];
7089:           if (gr < rs || gr >= re) { /* this is an offproc row */
7090:             for (j = ii[i]; j < ii[i+1]; j++) {
7091:               *coi++ = gr;
7092:               *idxoff++ = j;
7093:             }
7094:             if (!cmapt[cp]) { /* already global */
7095:               for (j = 0; j < nz; j++) *coj++ = jj[j];
7096:             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7097:               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7098:             } else { /* offdiag */
7099:               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7100:             }
7101:             ncoo_o += nz;
7102:           } else { /* this is a local row */
7103:             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
7104:           }
7105:         }
7106:       }
7107:       mmdata->off[cp + 1] = idxoff;
7108:       mmdata->own[cp + 1] = idxown;
7109:     }

7111:     PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);
7112:     PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);
7113:     PetscSFGetMultiSF(mmdata->sf,&msf);
7114:     PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);
7115:     ncoo = ncoo_d + ncoo_oown + ncoo2;
7116:     PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);
7117:     PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown); /* put (i,j) of remote nonzeros at back */
7118:     PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);
7119:     PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);
7120:     PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);
7121:     PetscFree2(coo_i,coo_j);
7122:     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7123:     PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);
7124:     coo_i = coo_i2;
7125:     coo_j = coo_j2;
7126:   } else { /* no offproc values insertion */
7127:     ncoo = ncoo_d;
7128:     PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);

7130:     PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);
7131:     PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);
7132:     PetscSFSetUp(mmdata->sf);
7133:   }
7134:   mmdata->hasoffproc = hasoffproc;

7136:   /* gather (i,j) of nonzeros inserted locally */
7137:   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7138:     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
7139:     PetscInt       *coi = coo_i + ncoo_d;
7140:     PetscInt       *coj = coo_j + ncoo_d;
7141:     const PetscInt *jj  = mm->j;
7142:     const PetscInt *ii  = mm->i;
7143:     const PetscInt *cmap = cmapa[cp];
7144:     const PetscInt *rmap = rmapa[cp];
7145:     const PetscInt mr = mp[cp]->rmap->n;
7146:     const PetscInt rs = C->rmap->rstart;
7147:     const PetscInt re = C->rmap->rend;
7148:     const PetscInt cs = C->cmap->rstart;

7150:     if (mptmp[cp]) continue;
7151:     if (rmapt[cp] == 1) { /* consecutive rows */
7152:       /* fill coo_i */
7153:       for (i = 0; i < mr; i++) {
7154:         const PetscInt gr = i + rs;
7155:         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
7156:       }
7157:       /* fill coo_j */
7158:       if (!cmapt[cp]) { /* type-0, already global */
7159:         PetscArraycpy(coj,jj,mm->nz);
7160:       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
7161:         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7162:       } else { /* type-2, local to global for sparse columns */
7163:         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7164:       }
7165:       ncoo_d += mm->nz;
7166:     } else if (rmapt[cp] == 2) { /* sparse rows */
7167:       for (i = 0; i < mr; i++) {
7168:         const PetscInt *jj = mm->j + ii[i];
7169:         const PetscInt gr  = rmap[i];
7170:         const PetscInt nz  = ii[i+1] - ii[i];
7171:         if (gr >= rs && gr < re) { /* local rows */
7172:           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
7173:           if (!cmapt[cp]) { /* type-0, already global */
7174:             for (j = 0; j < nz; j++) *coj++ = jj[j];
7175:           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7176:             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7177:           } else { /* type-2, local to global for sparse columns */
7178:             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7179:           }
7180:           ncoo_d += nz;
7181:         }
7182:       }
7183:     }
7184:   }
7185:   if (glob) {
7186:     ISRestoreIndices(glob,&globidx);
7187:   }
7188:   ISDestroy(&glob);
7189:   if (P_oth_l2g) {
7190:     ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);
7191:   }
7192:   ISLocalToGlobalMappingDestroy(&P_oth_l2g);
7193:   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7194:   PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);

7196:   /* preallocate with COO data */
7197:   MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);
7198:   PetscFree2(coo_i,coo_j);
7199:   return 0;
7200: }

7202: PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7203: {
7204:   Mat_Product *product = mat->product;
7205: #if defined(PETSC_HAVE_DEVICE)
7206:   PetscBool    match   = PETSC_FALSE;
7207:   PetscBool    usecpu  = PETSC_FALSE;
7208: #else
7209:   PetscBool    match   = PETSC_TRUE;
7210: #endif

7212:   MatCheckProduct(mat,1);
7213: #if defined(PETSC_HAVE_DEVICE)
7214:   if (!product->A->boundtocpu && !product->B->boundtocpu) {
7215:     PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);
7216:   }
7217:   if (match) { /* we can always fallback to the CPU if requested */
7219:     switch (product->type) {
7220:     case MATPRODUCT_AB:
7221:       if (product->api_user) {
7222:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");
7223:         PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);
7224:         PetscOptionsEnd();
7225:       } else {
7226:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");
7227:         PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);
7228:         PetscOptionsEnd();
7229:       }
7230:       break;
7231:     case MATPRODUCT_AtB:
7232:       if (product->api_user) {
7233:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");
7234:         PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);
7235:         PetscOptionsEnd();
7236:       } else {
7237:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");
7238:         PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);
7239:         PetscOptionsEnd();
7240:       }
7241:       break;
7242:     case MATPRODUCT_PtAP:
7243:       if (product->api_user) {
7244:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");
7245:         PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);
7246:         PetscOptionsEnd();
7247:       } else {
7248:         PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");
7249:         PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);
7250:         PetscOptionsEnd();
7251:       }
7252:       break;
7253:     default:
7254:       break;
7255:     }
7256:     match = (PetscBool)!usecpu;
7257:   }
7258: #endif
7259:   if (match) {
7260:     switch (product->type) {
7261:     case MATPRODUCT_AB:
7262:     case MATPRODUCT_AtB:
7263:     case MATPRODUCT_PtAP:
7264:       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7265:       break;
7266:     default:
7267:       break;
7268:     }
7269:   }
7270:   /* fallback to MPIAIJ ops */
7271:   if (!mat->ops->productsymbolic) MatProductSetFromOptions_MPIAIJ(mat);
7272:   return 0;
7273: }

7275: /*
7276:     Special version for direct calls from Fortran
7277: */
7278: #include <petsc/private/fortranimpl.h>

7280: /* Change these macros so can be used in void function */
7281: /* Identical to PetscCallVoid, except it assigns to *_ierr */
7282: #undef  PetscCall
7283: #define PetscCall(...) do {                                                                    \
7284:     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__;                                              \
7285:     if (PetscUnlikely(ierr_msv_mpiaij)) {                                                      \
7286:       *_PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \
7287:       return;                                                                                  \
7288:     }                                                                                          \
7289:   } while (0)

7291: #undef SETERRQ
7292: #define SETERRQ(comm,ierr,...) do {                                                            \
7293:     *_PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \
7294:     return;                                                                                    \
7295:   } while (0)

7297: #if defined(PETSC_HAVE_FORTRAN_CAPS)
7298: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7299: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7300: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7301: #else
7302: #endif
7303: PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
7304: {
7305:   Mat          mat  = *mmat;
7306:   PetscInt     m    = *mm, n = *mn;
7307:   InsertMode   addv = *maddv;
7308:   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ*)mat->data;
7309:   PetscScalar  value;

7311:   MatCheckPreallocated(mat,1);
7312:   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7314:   {
7315:     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
7316:     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
7317:     PetscBool roworiented = aij->roworiented;

7319:     /* Some Variables required in the macro */
7320:     Mat        A                    = aij->A;
7321:     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
7322:     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
7323:     MatScalar  *aa;
7324:     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7325:     Mat        B                    = aij->B;
7326:     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
7327:     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
7328:     MatScalar  *ba;
7329:     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7330:      * cannot use "#if defined" inside a macro. */
7331:     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;

7333:     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
7334:     PetscInt  nonew = a->nonew;
7335:     MatScalar *ap1,*ap2;

7337:     MatSeqAIJGetArray(A,&aa);
7338:     MatSeqAIJGetArray(B,&ba);
7339:     for (i=0; i<m; i++) {
7340:       if (im[i] < 0) continue;
7342:       if (im[i] >= rstart && im[i] < rend) {
7343:         row      = im[i] - rstart;
7344:         lastcol1 = -1;
7345:         rp1      = aj + ai[row];
7346:         ap1      = aa + ai[row];
7347:         rmax1    = aimax[row];
7348:         nrow1    = ailen[row];
7349:         low1     = 0;
7350:         high1    = nrow1;
7351:         lastcol2 = -1;
7352:         rp2      = bj + bi[row];
7353:         ap2      = ba + bi[row];
7354:         rmax2    = bimax[row];
7355:         nrow2    = bilen[row];
7356:         low2     = 0;
7357:         high2    = nrow2;

7359:         for (j=0; j<n; j++) {
7360:           if (roworiented) value = v[i*n+j];
7361:           else value = v[i+j*m];
7362:           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7363:           if (in[j] >= cstart && in[j] < cend) {
7364:             col = in[j] - cstart;
7365:             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
7366:           } else if (in[j] < 0) continue;
7367:           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7368:             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7369:             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
7370:           } else {
7371:             if (mat->was_assembled) {
7372:               if (!aij->colmap) {
7373:                 MatCreateColmap_MPIAIJ_Private(mat);
7374:               }
7375: #if defined(PETSC_USE_CTABLE)
7376:               PetscTableFind(aij->colmap,in[j]+1,&col);
7377:               col--;
7378: #else
7379:               col = aij->colmap[in[j]] - 1;
7380: #endif
7381:               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
7382:                 MatDisAssemble_MPIAIJ(mat);
7383:                 col  =  in[j];
7384:                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7385:                 B        = aij->B;
7386:                 b        = (Mat_SeqAIJ*)B->data;
7387:                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
7388:                 rp2      = bj + bi[row];
7389:                 ap2      = ba + bi[row];
7390:                 rmax2    = bimax[row];
7391:                 nrow2    = bilen[row];
7392:                 low2     = 0;
7393:                 high2    = nrow2;
7394:                 bm       = aij->B->rmap->n;
7395:                 ba       = b->a;
7396:                 inserted = PETSC_FALSE;
7397:               }
7398:             } else col = in[j];
7399:             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
7400:           }
7401:         }
7402:       } else if (!aij->donotstash) {
7403:         if (roworiented) {
7404:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
7405:         } else {
7406:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));
7407:         }
7408:       }
7409:     }
7410:     MatSeqAIJRestoreArray(A,&aa);
7411:     MatSeqAIJRestoreArray(B,&ba);
7412:   }
7413:   return;
7414: }
7415: /* Undefining these here since they were redefined from their original definition above! No
7416:  * other PETSc functions should be defined past this point, as it is impossible to recover the
7417:  * original definitions */
7418: #undef PetscCall
7419: #undef SETERRQ