Actual source code: mpibaij.c

  1: #include <../src/mat/impls/baij/mpi/mpibaij.h>

  3: #include <petsc/private/hashseti.h>
  4: #include <petscblaslapack.h>
  5: #include <petscsf.h>

  7: static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
  8: {
  9:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;

 11:   PetscFunctionBegin;
 12:   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
 13:   PetscCall(MatStashDestroy_Private(&mat->stash));
 14:   PetscCall(MatStashDestroy_Private(&mat->bstash));
 15:   PetscCall(MatDestroy(&baij->A));
 16:   PetscCall(MatDestroy(&baij->B));
 17: #if defined(PETSC_USE_CTABLE)
 18:   PetscCall(PetscHMapIDestroy(&baij->colmap));
 19: #else
 20:   PetscCall(PetscFree(baij->colmap));
 21: #endif
 22:   PetscCall(PetscFree(baij->garray));
 23:   PetscCall(VecDestroy(&baij->lvec));
 24:   PetscCall(VecScatterDestroy(&baij->Mvctx));
 25:   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
 26:   PetscCall(PetscFree(baij->barray));
 27:   PetscCall(PetscFree2(baij->hd, baij->ht));
 28:   PetscCall(PetscFree(baij->rangebs));
 29:   PetscCall(PetscFree(mat->data));

 31:   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
 32:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
 33:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
 34:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
 35:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
 36:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
 37:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
 38:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
 39:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
 40:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
 41: #if defined(PETSC_HAVE_HYPRE)
 42:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
 43: #endif
 44:   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
 45:   PetscFunctionReturn(PETSC_SUCCESS);
 46: }

 48: /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
 49: #define TYPE BAIJ
 50: #include "../src/mat/impls/aij/mpi/mpihashmat.h"
 51: #undef TYPE

 53: #if defined(PETSC_HAVE_HYPRE)
 54: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
 55: #endif

 57: static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
 58: {
 59:   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
 60:   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
 61:   PetscScalar       *va, *vv;
 62:   Vec                vB, vA;
 63:   const PetscScalar *vb;

 65:   PetscFunctionBegin;
 66:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
 67:   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));

 69:   PetscCall(VecGetArrayWrite(vA, &va));
 70:   if (idx) {
 71:     for (i = 0; i < m; i++) {
 72:       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
 73:     }
 74:   }

 76:   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
 77:   PetscCall(PetscMalloc1(m, &idxb));
 78:   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));

 80:   PetscCall(VecGetArrayWrite(v, &vv));
 81:   PetscCall(VecGetArrayRead(vB, &vb));
 82:   for (i = 0; i < m; i++) {
 83:     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
 84:       vv[i] = vb[i];
 85:       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
 86:     } else {
 87:       vv[i] = va[i];
 88:       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
 89:     }
 90:   }
 91:   PetscCall(VecRestoreArrayWrite(vA, &vv));
 92:   PetscCall(VecRestoreArrayWrite(vA, &va));
 93:   PetscCall(VecRestoreArrayRead(vB, &vb));
 94:   PetscCall(PetscFree(idxb));
 95:   PetscCall(VecDestroy(&vA));
 96:   PetscCall(VecDestroy(&vB));
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
101: {
102:   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;

104:   PetscFunctionBegin;
105:   PetscCall(MatStoreValues(aij->A));
106:   PetscCall(MatStoreValues(aij->B));
107:   PetscFunctionReturn(PETSC_SUCCESS);
108: }

110: static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
111: {
112:   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;

114:   PetscFunctionBegin;
115:   PetscCall(MatRetrieveValues(aij->A));
116:   PetscCall(MatRetrieveValues(aij->B));
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }

120: /*
121:      Local utility routine that creates a mapping from the global column
122:    number to the local number in the off-diagonal part of the local
123:    storage of the matrix.  This is done in a non scalable way since the
124:    length of colmap equals the global matrix length.
125: */
126: PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
127: {
128:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
129:   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
130:   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;

132:   PetscFunctionBegin;
133: #if defined(PETSC_USE_CTABLE)
134:   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
135:   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
136: #else
137:   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
138:   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
139: #endif
140:   PetscFunctionReturn(PETSC_SUCCESS);
141: }

143: #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
144:   do { \
145:     brow = row / bs; \
146:     rp   = aj + ai[brow]; \
147:     ap   = aa + bs2 * ai[brow]; \
148:     rmax = aimax[brow]; \
149:     nrow = ailen[brow]; \
150:     bcol = col / bs; \
151:     ridx = row % bs; \
152:     cidx = col % bs; \
153:     low  = 0; \
154:     high = nrow; \
155:     while (high - low > 3) { \
156:       t = (low + high) / 2; \
157:       if (rp[t] > bcol) high = t; \
158:       else low = t; \
159:     } \
160:     for (_i = low; _i < high; _i++) { \
161:       if (rp[_i] > bcol) break; \
162:       if (rp[_i] == bcol) { \
163:         bap = ap + bs2 * _i + bs * cidx + ridx; \
164:         if (addv == ADD_VALUES) *bap += value; \
165:         else *bap = value; \
166:         goto a_noinsert; \
167:       } \
168:     } \
169:     if (a->nonew == 1) goto a_noinsert; \
170:     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
171:     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
172:     N = nrow++ - 1; \
173:     /* shift up all the later entries in this row */ \
174:     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
175:     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
176:     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
177:     rp[_i]                          = bcol; \
178:     ap[bs2 * _i + bs * cidx + ridx] = value; \
179:   a_noinsert:; \
180:     ailen[brow] = nrow; \
181:   } while (0)

183: #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
184:   do { \
185:     brow = row / bs; \
186:     rp   = bj + bi[brow]; \
187:     ap   = ba + bs2 * bi[brow]; \
188:     rmax = bimax[brow]; \
189:     nrow = bilen[brow]; \
190:     bcol = col / bs; \
191:     ridx = row % bs; \
192:     cidx = col % bs; \
193:     low  = 0; \
194:     high = nrow; \
195:     while (high - low > 3) { \
196:       t = (low + high) / 2; \
197:       if (rp[t] > bcol) high = t; \
198:       else low = t; \
199:     } \
200:     for (_i = low; _i < high; _i++) { \
201:       if (rp[_i] > bcol) break; \
202:       if (rp[_i] == bcol) { \
203:         bap = ap + bs2 * _i + bs * cidx + ridx; \
204:         if (addv == ADD_VALUES) *bap += value; \
205:         else *bap = value; \
206:         goto b_noinsert; \
207:       } \
208:     } \
209:     if (b->nonew == 1) goto b_noinsert; \
210:     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
211:     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
212:     N = nrow++ - 1; \
213:     /* shift up all the later entries in this row */ \
214:     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
215:     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
216:     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
217:     rp[_i]                          = bcol; \
218:     ap[bs2 * _i + bs * cidx + ridx] = value; \
219:   b_noinsert:; \
220:     bilen[brow] = nrow; \
221:   } while (0)

223: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
224: {
225:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
226:   MatScalar    value;
227:   PetscBool    roworiented = baij->roworiented;
228:   PetscInt     i, j, row, col;
229:   PetscInt     rstart_orig = mat->rmap->rstart;
230:   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
231:   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;

233:   /* Some Variables required in the macro */
234:   Mat          A     = baij->A;
235:   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
236:   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
237:   MatScalar   *aa = a->a;

239:   Mat          B     = baij->B;
240:   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
241:   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
242:   MatScalar   *ba = b->a;

244:   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
245:   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
246:   MatScalar *ap, *bap;

248:   PetscFunctionBegin;
249:   for (i = 0; i < m; i++) {
250:     if (im[i] < 0) continue;
251:     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
252:     if (im[i] >= rstart_orig && im[i] < rend_orig) {
253:       row = im[i] - rstart_orig;
254:       for (j = 0; j < n; j++) {
255:         if (in[j] >= cstart_orig && in[j] < cend_orig) {
256:           col = in[j] - cstart_orig;
257:           if (roworiented) value = v[i * n + j];
258:           else value = v[i + j * m];
259:           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
260:         } else if (in[j] < 0) {
261:           continue;
262:         } else {
263:           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
264:           if (mat->was_assembled) {
265:             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
266: #if defined(PETSC_USE_CTABLE)
267:             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
268:             col = col - 1;
269: #else
270:             col = baij->colmap[in[j] / bs] - 1;
271: #endif
272:             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
273:               PetscCall(MatDisAssemble_MPIBAIJ(mat));
274:               col = in[j];
275:               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
276:               B     = baij->B;
277:               b     = (Mat_SeqBAIJ *)(B)->data;
278:               bimax = b->imax;
279:               bi    = b->i;
280:               bilen = b->ilen;
281:               bj    = b->j;
282:               ba    = b->a;
283:             } else {
284:               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
285:               col += in[j] % bs;
286:             }
287:           } else col = in[j];
288:           if (roworiented) value = v[i * n + j];
289:           else value = v[i + j * m];
290:           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
291:           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
292:         }
293:       }
294:     } else {
295:       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
296:       if (!baij->donotstash) {
297:         mat->assembled = PETSC_FALSE;
298:         if (roworiented) {
299:           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
300:         } else {
301:           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
302:         }
303:       }
304:     }
305:   }
306:   PetscFunctionReturn(PETSC_SUCCESS);
307: }

309: static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
310: {
311:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
312:   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
313:   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
314:   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
315:   PetscBool          roworiented = a->roworiented;
316:   const PetscScalar *value       = v;
317:   MatScalar         *ap, *aa = a->a, *bap;

319:   PetscFunctionBegin;
320:   rp    = aj + ai[row];
321:   ap    = aa + bs2 * ai[row];
322:   rmax  = imax[row];
323:   nrow  = ailen[row];
324:   value = v;
325:   low   = 0;
326:   high  = nrow;
327:   while (high - low > 7) {
328:     t = (low + high) / 2;
329:     if (rp[t] > col) high = t;
330:     else low = t;
331:   }
332:   for (i = low; i < high; i++) {
333:     if (rp[i] > col) break;
334:     if (rp[i] == col) {
335:       bap = ap + bs2 * i;
336:       if (roworiented) {
337:         if (is == ADD_VALUES) {
338:           for (ii = 0; ii < bs; ii++) {
339:             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
340:           }
341:         } else {
342:           for (ii = 0; ii < bs; ii++) {
343:             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
344:           }
345:         }
346:       } else {
347:         if (is == ADD_VALUES) {
348:           for (ii = 0; ii < bs; ii++, value += bs) {
349:             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
350:             bap += bs;
351:           }
352:         } else {
353:           for (ii = 0; ii < bs; ii++, value += bs) {
354:             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
355:             bap += bs;
356:           }
357:         }
358:       }
359:       goto noinsert2;
360:     }
361:   }
362:   if (nonew == 1) goto noinsert2;
363:   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
364:   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
365:   N = nrow++ - 1;
366:   high++;
367:   /* shift up all the later entries in this row */
368:   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
369:   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
370:   rp[i] = col;
371:   bap   = ap + bs2 * i;
372:   if (roworiented) {
373:     for (ii = 0; ii < bs; ii++) {
374:       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
375:     }
376:   } else {
377:     for (ii = 0; ii < bs; ii++) {
378:       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
379:     }
380:   }
381: noinsert2:;
382:   ailen[row] = nrow;
383:   PetscFunctionReturn(PETSC_SUCCESS);
384: }

386: /*
387:     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
388:     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
389: */
390: static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
391: {
392:   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
393:   const PetscScalar *value;
394:   MatScalar         *barray      = baij->barray;
395:   PetscBool          roworiented = baij->roworiented;
396:   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
397:   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
398:   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;

400:   PetscFunctionBegin;
401:   if (!barray) {
402:     PetscCall(PetscMalloc1(bs2, &barray));
403:     baij->barray = barray;
404:   }

406:   if (roworiented) stepval = (n - 1) * bs;
407:   else stepval = (m - 1) * bs;

409:   for (i = 0; i < m; i++) {
410:     if (im[i] < 0) continue;
411:     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
412:     if (im[i] >= rstart && im[i] < rend) {
413:       row = im[i] - rstart;
414:       for (j = 0; j < n; j++) {
415:         /* If NumCol = 1 then a copy is not required */
416:         if ((roworiented) && (n == 1)) {
417:           barray = (MatScalar *)v + i * bs2;
418:         } else if ((!roworiented) && (m == 1)) {
419:           barray = (MatScalar *)v + j * bs2;
420:         } else { /* Here a copy is required */
421:           if (roworiented) {
422:             value = v + (i * (stepval + bs) + j) * bs;
423:           } else {
424:             value = v + (j * (stepval + bs) + i) * bs;
425:           }
426:           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
427:             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
428:             barray += bs;
429:           }
430:           barray -= bs2;
431:         }

433:         if (in[j] >= cstart && in[j] < cend) {
434:           col = in[j] - cstart;
435:           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
436:         } else if (in[j] < 0) {
437:           continue;
438:         } else {
439:           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
440:           if (mat->was_assembled) {
441:             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));

443: #if defined(PETSC_USE_DEBUG)
444:   #if defined(PETSC_USE_CTABLE)
445:             {
446:               PetscInt data;
447:               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
448:               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
449:             }
450:   #else
451:             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
452:   #endif
453: #endif
454: #if defined(PETSC_USE_CTABLE)
455:             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
456:             col = (col - 1) / bs;
457: #else
458:             col = (baij->colmap[in[j]] - 1) / bs;
459: #endif
460:             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
461:               PetscCall(MatDisAssemble_MPIBAIJ(mat));
462:               col = in[j];
463:             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
464:           } else col = in[j];
465:           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
466:         }
467:       }
468:     } else {
469:       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
470:       if (!baij->donotstash) {
471:         if (roworiented) {
472:           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
473:         } else {
474:           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
475:         }
476:       }
477:     }
478:   }
479:   PetscFunctionReturn(PETSC_SUCCESS);
480: }

482: #define HASH_KEY             0.6180339887
483: #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
484: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
485: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
486: static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
487: {
488:   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
489:   PetscBool    roworiented = baij->roworiented;
490:   PetscInt     i, j, row, col;
491:   PetscInt     rstart_orig = mat->rmap->rstart;
492:   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
493:   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
494:   PetscReal    tmp;
495:   MatScalar  **HD       = baij->hd, value;
496:   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;

498:   PetscFunctionBegin;
499:   for (i = 0; i < m; i++) {
500:     if (PetscDefined(USE_DEBUG)) {
501:       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
502:       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
503:     }
504:     row = im[i];
505:     if (row >= rstart_orig && row < rend_orig) {
506:       for (j = 0; j < n; j++) {
507:         col = in[j];
508:         if (roworiented) value = v[i * n + j];
509:         else value = v[i + j * m];
510:         /* Look up PetscInto the Hash Table */
511:         key = (row / bs) * Nbs + (col / bs) + 1;
512:         h1  = HASH(size, key, tmp);

514:         idx = h1;
515:         if (PetscDefined(USE_DEBUG)) {
516:           insert_ct++;
517:           total_ct++;
518:           if (HT[idx] != key) {
519:             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
520:               ;
521:             if (idx == size) {
522:               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
523:                 ;
524:               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
525:             }
526:           }
527:         } else if (HT[idx] != key) {
528:           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
529:             ;
530:           if (idx == size) {
531:             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
532:               ;
533:             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
534:           }
535:         }
536:         /* A HASH table entry is found, so insert the values at the correct address */
537:         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
538:         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
539:       }
540:     } else if (!baij->donotstash) {
541:       if (roworiented) {
542:         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
543:       } else {
544:         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
545:       }
546:     }
547:   }
548:   if (PetscDefined(USE_DEBUG)) {
549:     baij->ht_total_ct += total_ct;
550:     baij->ht_insert_ct += insert_ct;
551:   }
552:   PetscFunctionReturn(PETSC_SUCCESS);
553: }

555: static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
556: {
557:   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
558:   PetscBool          roworiented = baij->roworiented;
559:   PetscInt           i, j, ii, jj, row, col;
560:   PetscInt           rstart = baij->rstartbs;
561:   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
562:   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
563:   PetscReal          tmp;
564:   MatScalar        **HD = baij->hd, *baij_a;
565:   const PetscScalar *v_t, *value;
566:   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;

568:   PetscFunctionBegin;
569:   if (roworiented) stepval = (n - 1) * bs;
570:   else stepval = (m - 1) * bs;

572:   for (i = 0; i < m; i++) {
573:     if (PetscDefined(USE_DEBUG)) {
574:       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
575:       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
576:     }
577:     row = im[i];
578:     v_t = v + i * nbs2;
579:     if (row >= rstart && row < rend) {
580:       for (j = 0; j < n; j++) {
581:         col = in[j];

583:         /* Look up into the Hash Table */
584:         key = row * Nbs + col + 1;
585:         h1  = HASH(size, key, tmp);

587:         idx = h1;
588:         if (PetscDefined(USE_DEBUG)) {
589:           total_ct++;
590:           insert_ct++;
591:           if (HT[idx] != key) {
592:             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
593:               ;
594:             if (idx == size) {
595:               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
596:                 ;
597:               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598:             }
599:           }
600:         } else if (HT[idx] != key) {
601:           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
602:             ;
603:           if (idx == size) {
604:             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
605:               ;
606:             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
607:           }
608:         }
609:         baij_a = HD[idx];
610:         if (roworiented) {
611:           /*value = v + i*(stepval+bs)*bs + j*bs;*/
612:           /* value = v + (i*(stepval+bs)+j)*bs; */
613:           value = v_t;
614:           v_t += bs;
615:           if (addv == ADD_VALUES) {
616:             for (ii = 0; ii < bs; ii++, value += stepval) {
617:               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
618:             }
619:           } else {
620:             for (ii = 0; ii < bs; ii++, value += stepval) {
621:               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
622:             }
623:           }
624:         } else {
625:           value = v + j * (stepval + bs) * bs + i * bs;
626:           if (addv == ADD_VALUES) {
627:             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
628:               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
629:             }
630:           } else {
631:             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
632:               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
633:             }
634:           }
635:         }
636:       }
637:     } else {
638:       if (!baij->donotstash) {
639:         if (roworiented) {
640:           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641:         } else {
642:           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
643:         }
644:       }
645:     }
646:   }
647:   if (PetscDefined(USE_DEBUG)) {
648:     baij->ht_total_ct += total_ct;
649:     baij->ht_insert_ct += insert_ct;
650:   }
651:   PetscFunctionReturn(PETSC_SUCCESS);
652: }

654: static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
655: {
656:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
657:   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
658:   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;

660:   PetscFunctionBegin;
661:   for (i = 0; i < m; i++) {
662:     if (idxm[i] < 0) continue; /* negative row */
663:     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
664:     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
665:       row = idxm[i] - bsrstart;
666:       for (j = 0; j < n; j++) {
667:         if (idxn[j] < 0) continue; /* negative column */
668:         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
669:         if (idxn[j] >= bscstart && idxn[j] < bscend) {
670:           col = idxn[j] - bscstart;
671:           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
672:         } else {
673:           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
674: #if defined(PETSC_USE_CTABLE)
675:           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
676:           data--;
677: #else
678:           data = baij->colmap[idxn[j] / bs] - 1;
679: #endif
680:           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
681:           else {
682:             col = data + idxn[j] % bs;
683:             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
684:           }
685:         }
686:       }
687:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
688:   }
689:   PetscFunctionReturn(PETSC_SUCCESS);
690: }

692: static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
693: {
694:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
695:   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
696:   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
697:   PetscReal    sum = 0.0;
698:   MatScalar   *v;

700:   PetscFunctionBegin;
701:   if (baij->size == 1) {
702:     PetscCall(MatNorm(baij->A, type, nrm));
703:   } else {
704:     if (type == NORM_FROBENIUS) {
705:       v  = amat->a;
706:       nz = amat->nz * bs2;
707:       for (i = 0; i < nz; i++) {
708:         sum += PetscRealPart(PetscConj(*v) * (*v));
709:         v++;
710:       }
711:       v  = bmat->a;
712:       nz = bmat->nz * bs2;
713:       for (i = 0; i < nz; i++) {
714:         sum += PetscRealPart(PetscConj(*v) * (*v));
715:         v++;
716:       }
717:       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
718:       *nrm = PetscSqrtReal(*nrm);
719:     } else if (type == NORM_1) { /* max column sum */
720:       PetscReal *tmp, *tmp2;
721:       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
722:       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
723:       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
724:       v  = amat->a;
725:       jj = amat->j;
726:       for (i = 0; i < amat->nz; i++) {
727:         for (j = 0; j < bs; j++) {
728:           col = bs * (cstart + *jj) + j; /* column index */
729:           for (row = 0; row < bs; row++) {
730:             tmp[col] += PetscAbsScalar(*v);
731:             v++;
732:           }
733:         }
734:         jj++;
735:       }
736:       v  = bmat->a;
737:       jj = bmat->j;
738:       for (i = 0; i < bmat->nz; i++) {
739:         for (j = 0; j < bs; j++) {
740:           col = bs * garray[*jj] + j;
741:           for (row = 0; row < bs; row++) {
742:             tmp[col] += PetscAbsScalar(*v);
743:             v++;
744:           }
745:         }
746:         jj++;
747:       }
748:       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
749:       *nrm = 0.0;
750:       for (j = 0; j < mat->cmap->N; j++) {
751:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
752:       }
753:       PetscCall(PetscFree(tmp));
754:       PetscCall(PetscFree(tmp2));
755:     } else if (type == NORM_INFINITY) { /* max row sum */
756:       PetscReal *sums;
757:       PetscCall(PetscMalloc1(bs, &sums));
758:       sum = 0.0;
759:       for (j = 0; j < amat->mbs; j++) {
760:         for (row = 0; row < bs; row++) sums[row] = 0.0;
761:         v  = amat->a + bs2 * amat->i[j];
762:         nz = amat->i[j + 1] - amat->i[j];
763:         for (i = 0; i < nz; i++) {
764:           for (col = 0; col < bs; col++) {
765:             for (row = 0; row < bs; row++) {
766:               sums[row] += PetscAbsScalar(*v);
767:               v++;
768:             }
769:           }
770:         }
771:         v  = bmat->a + bs2 * bmat->i[j];
772:         nz = bmat->i[j + 1] - bmat->i[j];
773:         for (i = 0; i < nz; i++) {
774:           for (col = 0; col < bs; col++) {
775:             for (row = 0; row < bs; row++) {
776:               sums[row] += PetscAbsScalar(*v);
777:               v++;
778:             }
779:           }
780:         }
781:         for (row = 0; row < bs; row++) {
782:           if (sums[row] > sum) sum = sums[row];
783:         }
784:       }
785:       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786:       PetscCall(PetscFree(sums));
787:     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788:   }
789:   PetscFunctionReturn(PETSC_SUCCESS);
790: }

792: /*
793:   Creates the hash table, and sets the table
794:   This table is created only once.
795:   If new entried need to be added to the matrix
796:   then the hash table has to be destroyed and
797:   recreated.
798: */
799: static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800: {
801:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802:   Mat          A = baij->A, B = baij->B;
803:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804:   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805:   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806:   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807:   PetscInt    *HT, key;
808:   MatScalar  **HD;
809:   PetscReal    tmp;
810: #if defined(PETSC_USE_INFO)
811:   PetscInt ct = 0, max = 0;
812: #endif

814:   PetscFunctionBegin;
815:   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);

817:   baij->ht_size = (PetscInt)(factor * nz);
818:   ht_size       = baij->ht_size;

820:   /* Allocate Memory for Hash Table */
821:   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822:   HD = baij->hd;
823:   HT = baij->ht;

825:   /* Loop Over A */
826:   for (i = 0; i < a->mbs; i++) {
827:     for (j = ai[i]; j < ai[i + 1]; j++) {
828:       row = i + rstart;
829:       col = aj[j] + cstart;

831:       key = row * Nbs + col + 1;
832:       h1  = HASH(ht_size, key, tmp);
833:       for (k = 0; k < ht_size; k++) {
834:         if (!HT[(h1 + k) % ht_size]) {
835:           HT[(h1 + k) % ht_size] = key;
836:           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837:           break;
838: #if defined(PETSC_USE_INFO)
839:         } else {
840:           ct++;
841: #endif
842:         }
843:       }
844: #if defined(PETSC_USE_INFO)
845:       if (k > max) max = k;
846: #endif
847:     }
848:   }
849:   /* Loop Over B */
850:   for (i = 0; i < b->mbs; i++) {
851:     for (j = bi[i]; j < bi[i + 1]; j++) {
852:       row = i + rstart;
853:       col = garray[bj[j]];
854:       key = row * Nbs + col + 1;
855:       h1  = HASH(ht_size, key, tmp);
856:       for (k = 0; k < ht_size; k++) {
857:         if (!HT[(h1 + k) % ht_size]) {
858:           HT[(h1 + k) % ht_size] = key;
859:           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860:           break;
861: #if defined(PETSC_USE_INFO)
862:         } else {
863:           ct++;
864: #endif
865:         }
866:       }
867: #if defined(PETSC_USE_INFO)
868:       if (k > max) max = k;
869: #endif
870:     }
871:   }

873:   /* Print Summary */
874: #if defined(PETSC_USE_INFO)
875:   for (i = 0, j = 0; i < ht_size; i++) {
876:     if (HT[i]) j++;
877:   }
878:   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
879: #endif
880:   PetscFunctionReturn(PETSC_SUCCESS);
881: }

883: static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884: {
885:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886:   PetscInt     nstash, reallocs;

888:   PetscFunctionBegin;
889:   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);

891:   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892:   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893:   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894:   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895:   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896:   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897:   PetscFunctionReturn(PETSC_SUCCESS);
898: }

900: static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901: {
902:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903:   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904:   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905:   PetscInt    *row, *col;
906:   PetscBool    r1, r2, r3, other_disassembled;
907:   MatScalar   *val;
908:   PetscMPIInt  n;

910:   PetscFunctionBegin;
911:   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912:   if (!baij->donotstash && !mat->nooffprocentries) {
913:     while (1) {
914:       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915:       if (!flg) break;

917:       for (i = 0; i < n;) {
918:         /* Now identify the consecutive vals belonging to the same row */
919:         for (j = i, rstart = row[j]; j < n; j++) {
920:           if (row[j] != rstart) break;
921:         }
922:         if (j < n) ncols = j - i;
923:         else ncols = n - i;
924:         /* Now assemble all these values with a single function call */
925:         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926:         i = j;
927:       }
928:     }
929:     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930:     /* Now process the block-stash. Since the values are stashed column-oriented,
931:        set the roworiented flag to column oriented, and after MatSetValues()
932:        restore the original flags */
933:     r1 = baij->roworiented;
934:     r2 = a->roworiented;
935:     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;

937:     baij->roworiented = PETSC_FALSE;
938:     a->roworiented    = PETSC_FALSE;

940:     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
941:     while (1) {
942:       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
943:       if (!flg) break;

945:       for (i = 0; i < n;) {
946:         /* Now identify the consecutive vals belonging to the same row */
947:         for (j = i, rstart = row[j]; j < n; j++) {
948:           if (row[j] != rstart) break;
949:         }
950:         if (j < n) ncols = j - i;
951:         else ncols = n - i;
952:         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
953:         i = j;
954:       }
955:     }
956:     PetscCall(MatStashScatterEnd_Private(&mat->bstash));

958:     baij->roworiented = r1;
959:     a->roworiented    = r2;

961:     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
962:   }

964:   PetscCall(MatAssemblyBegin(baij->A, mode));
965:   PetscCall(MatAssemblyEnd(baij->A, mode));

967:   /* determine if any processor has disassembled, if so we must
968:      also disassemble ourselves, in order that we may reassemble. */
969:   /*
970:      if nonzero structure of submatrix B cannot change then we know that
971:      no processor disassembled thus we can skip this stuff
972:   */
973:   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
974:     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
975:     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
976:   }

978:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
979:   PetscCall(MatAssemblyBegin(baij->B, mode));
980:   PetscCall(MatAssemblyEnd(baij->B, mode));

982: #if defined(PETSC_USE_INFO)
983:   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
984:     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));

986:     baij->ht_total_ct  = 0;
987:     baij->ht_insert_ct = 0;
988:   }
989: #endif
990:   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
991:     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));

993:     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
994:     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
995:   }

997:   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));

999:   baij->rowvalues = NULL;

1001:   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1002:   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
1003:     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1004:     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1005:   }
1006:   PetscFunctionReturn(PETSC_SUCCESS);
1007: }

1009: extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1010: #include <petscdraw.h>
1011: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1012: {
1013:   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1014:   PetscMPIInt       rank = baij->rank;
1015:   PetscInt          bs   = mat->rmap->bs;
1016:   PetscBool         iascii, isdraw;
1017:   PetscViewer       sviewer;
1018:   PetscViewerFormat format;

1020:   PetscFunctionBegin;
1021:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1022:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1023:   if (iascii) {
1024:     PetscCall(PetscViewerGetFormat(viewer, &format));
1025:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1026:       MatInfo info;
1027:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1028:       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1029:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1030:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1031:                                                    mat->rmap->bs, (double)info.memory));
1032:       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1033:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1034:       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1035:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1036:       PetscCall(PetscViewerFlush(viewer));
1037:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1038:       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1039:       PetscCall(VecScatterView(baij->Mvctx, viewer));
1040:       PetscFunctionReturn(PETSC_SUCCESS);
1041:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1042:       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1043:       PetscFunctionReturn(PETSC_SUCCESS);
1044:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1045:       PetscFunctionReturn(PETSC_SUCCESS);
1046:     }
1047:   }

1049:   if (isdraw) {
1050:     PetscDraw draw;
1051:     PetscBool isnull;
1052:     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1053:     PetscCall(PetscDrawIsNull(draw, &isnull));
1054:     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1055:   }

1057:   {
1058:     /* assemble the entire matrix onto first processor. */
1059:     Mat          A;
1060:     Mat_SeqBAIJ *Aloc;
1061:     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1062:     MatScalar   *a;
1063:     const char  *matname;

1065:     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1066:     /* Perhaps this should be the type of mat? */
1067:     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1068:     if (rank == 0) {
1069:       PetscCall(MatSetSizes(A, M, N, M, N));
1070:     } else {
1071:       PetscCall(MatSetSizes(A, 0, 0, M, N));
1072:     }
1073:     PetscCall(MatSetType(A, MATMPIBAIJ));
1074:     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1075:     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));

1077:     /* copy over the A part */
1078:     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1079:     ai   = Aloc->i;
1080:     aj   = Aloc->j;
1081:     a    = Aloc->a;
1082:     PetscCall(PetscMalloc1(bs, &rvals));

1084:     for (i = 0; i < mbs; i++) {
1085:       rvals[0] = bs * (baij->rstartbs + i);
1086:       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1087:       for (j = ai[i]; j < ai[i + 1]; j++) {
1088:         col = (baij->cstartbs + aj[j]) * bs;
1089:         for (k = 0; k < bs; k++) {
1090:           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1091:           col++;
1092:           a += bs;
1093:         }
1094:       }
1095:     }
1096:     /* copy over the B part */
1097:     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1098:     ai   = Aloc->i;
1099:     aj   = Aloc->j;
1100:     a    = Aloc->a;
1101:     for (i = 0; i < mbs; i++) {
1102:       rvals[0] = bs * (baij->rstartbs + i);
1103:       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1104:       for (j = ai[i]; j < ai[i + 1]; j++) {
1105:         col = baij->garray[aj[j]] * bs;
1106:         for (k = 0; k < bs; k++) {
1107:           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1108:           col++;
1109:           a += bs;
1110:         }
1111:       }
1112:     }
1113:     PetscCall(PetscFree(rvals));
1114:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1115:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1116:     /*
1117:        Everyone has to call to draw the matrix since the graphics waits are
1118:        synchronized across all processors that share the PetscDraw object
1119:     */
1120:     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1121:     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1122:     if (rank == 0) {
1123:       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
1124:       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
1125:     }
1126:     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1127:     PetscCall(PetscViewerFlush(viewer));
1128:     PetscCall(MatDestroy(&A));
1129:   }
1130:   PetscFunctionReturn(PETSC_SUCCESS);
1131: }

1133: /* Used for both MPIBAIJ and MPISBAIJ matrices */
1134: PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1135: {
1136:   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1137:   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1138:   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1139:   const PetscInt *garray = aij->garray;
1140:   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1141:   PetscInt64      nz, hnz;
1142:   PetscInt       *rowlens, *colidxs;
1143:   PetscScalar    *matvals;
1144:   PetscMPIInt     rank;

1146:   PetscFunctionBegin;
1147:   PetscCall(PetscViewerSetUp(viewer));

1149:   M  = mat->rmap->N;
1150:   N  = mat->cmap->N;
1151:   m  = mat->rmap->n;
1152:   rs = mat->rmap->rstart;
1153:   cs = mat->cmap->rstart;
1154:   bs = mat->rmap->bs;
1155:   nz = bs * bs * (A->nz + B->nz);

1157:   /* write matrix header */
1158:   header[0] = MAT_FILE_CLASSID;
1159:   header[1] = M;
1160:   header[2] = N;
1161:   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1162:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1163:   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1164:   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));

1166:   /* fill in and store row lengths */
1167:   PetscCall(PetscMalloc1(m, &rowlens));
1168:   for (cnt = 0, i = 0; i < A->mbs; i++)
1169:     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1170:   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1171:   PetscCall(PetscFree(rowlens));

1173:   /* fill in and store column indices */
1174:   PetscCall(PetscMalloc1(nz, &colidxs));
1175:   for (cnt = 0, i = 0; i < A->mbs; i++) {
1176:     for (k = 0; k < bs; k++) {
1177:       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1178:         if (garray[B->j[jb]] > cs / bs) break;
1179:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1180:       }
1181:       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1182:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1183:       for (; jb < B->i[i + 1]; jb++)
1184:         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1185:     }
1186:   }
1187:   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1188:   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1189:   PetscCall(PetscFree(colidxs));

1191:   /* fill in and store nonzero values */
1192:   PetscCall(PetscMalloc1(nz, &matvals));
1193:   for (cnt = 0, i = 0; i < A->mbs; i++) {
1194:     for (k = 0; k < bs; k++) {
1195:       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1196:         if (garray[B->j[jb]] > cs / bs) break;
1197:         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1198:       }
1199:       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1200:         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1201:       for (; jb < B->i[i + 1]; jb++)
1202:         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1203:     }
1204:   }
1205:   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1206:   PetscCall(PetscFree(matvals));

1208:   /* write block size option to the viewer's .info file */
1209:   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1210:   PetscFunctionReturn(PETSC_SUCCESS);
1211: }

1213: PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1214: {
1215:   PetscBool iascii, isdraw, issocket, isbinary;

1217:   PetscFunctionBegin;
1218:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1219:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1220:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1221:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1222:   if (iascii || isdraw || issocket) {
1223:     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1224:   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1225:   PetscFunctionReturn(PETSC_SUCCESS);
1226: }

1228: static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1229: {
1230:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1231:   PetscInt     nt;

1233:   PetscFunctionBegin;
1234:   PetscCall(VecGetLocalSize(xx, &nt));
1235:   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1236:   PetscCall(VecGetLocalSize(yy, &nt));
1237:   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1238:   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1239:   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1240:   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1241:   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1242:   PetscFunctionReturn(PETSC_SUCCESS);
1243: }

1245: static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1246: {
1247:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1249:   PetscFunctionBegin;
1250:   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1251:   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1252:   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1253:   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1254:   PetscFunctionReturn(PETSC_SUCCESS);
1255: }

1257: static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1258: {
1259:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1261:   PetscFunctionBegin;
1262:   /* do nondiagonal part */
1263:   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1264:   /* do local part */
1265:   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1266:   /* add partial results together */
1267:   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1268:   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1269:   PetscFunctionReturn(PETSC_SUCCESS);
1270: }

1272: static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1273: {
1274:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1276:   PetscFunctionBegin;
1277:   /* do nondiagonal part */
1278:   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1279:   /* do local part */
1280:   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1281:   /* add partial results together */
1282:   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1283:   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1284:   PetscFunctionReturn(PETSC_SUCCESS);
1285: }

1287: /*
1288:   This only works correctly for square matrices where the subblock A->A is the
1289:    diagonal block
1290: */
1291: static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1292: {
1293:   PetscFunctionBegin;
1294:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1295:   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1296:   PetscFunctionReturn(PETSC_SUCCESS);
1297: }

1299: static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1300: {
1301:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1303:   PetscFunctionBegin;
1304:   PetscCall(MatScale(a->A, aa));
1305:   PetscCall(MatScale(a->B, aa));
1306:   PetscFunctionReturn(PETSC_SUCCESS);
1307: }

1309: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1310: {
1311:   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1312:   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1313:   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1314:   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1315:   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;

1317:   PetscFunctionBegin;
1318:   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1319:   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1320:   mat->getrowactive = PETSC_TRUE;

1322:   if (!mat->rowvalues && (idx || v)) {
1323:     /*
1324:         allocate enough space to hold information from the longest row.
1325:     */
1326:     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1327:     PetscInt     max = 1, mbs = mat->mbs, tmp;
1328:     for (i = 0; i < mbs; i++) {
1329:       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1330:       if (max < tmp) max = tmp;
1331:     }
1332:     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1333:   }
1334:   lrow = row - brstart;

1336:   pvA = &vworkA;
1337:   pcA = &cworkA;
1338:   pvB = &vworkB;
1339:   pcB = &cworkB;
1340:   if (!v) {
1341:     pvA = NULL;
1342:     pvB = NULL;
1343:   }
1344:   if (!idx) {
1345:     pcA = NULL;
1346:     if (!v) pcB = NULL;
1347:   }
1348:   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1349:   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1350:   nztot = nzA + nzB;

1352:   cmap = mat->garray;
1353:   if (v || idx) {
1354:     if (nztot) {
1355:       /* Sort by increasing column numbers, assuming A and B already sorted */
1356:       PetscInt imark = -1;
1357:       if (v) {
1358:         *v = v_p = mat->rowvalues;
1359:         for (i = 0; i < nzB; i++) {
1360:           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1361:           else break;
1362:         }
1363:         imark = i;
1364:         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1365:         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1366:       }
1367:       if (idx) {
1368:         *idx = idx_p = mat->rowindices;
1369:         if (imark > -1) {
1370:           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1371:         } else {
1372:           for (i = 0; i < nzB; i++) {
1373:             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374:             else break;
1375:           }
1376:           imark = i;
1377:         }
1378:         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1379:         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1380:       }
1381:     } else {
1382:       if (idx) *idx = NULL;
1383:       if (v) *v = NULL;
1384:     }
1385:   }
1386:   *nz = nztot;
1387:   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1388:   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1389:   PetscFunctionReturn(PETSC_SUCCESS);
1390: }

1392: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1393: {
1394:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;

1396:   PetscFunctionBegin;
1397:   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1398:   baij->getrowactive = PETSC_FALSE;
1399:   PetscFunctionReturn(PETSC_SUCCESS);
1400: }

1402: static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1403: {
1404:   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;

1406:   PetscFunctionBegin;
1407:   PetscCall(MatZeroEntries(l->A));
1408:   PetscCall(MatZeroEntries(l->B));
1409:   PetscFunctionReturn(PETSC_SUCCESS);
1410: }

1412: static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1413: {
1414:   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1415:   Mat            A = a->A, B = a->B;
1416:   PetscLogDouble isend[5], irecv[5];

1418:   PetscFunctionBegin;
1419:   info->block_size = (PetscReal)matin->rmap->bs;

1421:   PetscCall(MatGetInfo(A, MAT_LOCAL, info));

1423:   isend[0] = info->nz_used;
1424:   isend[1] = info->nz_allocated;
1425:   isend[2] = info->nz_unneeded;
1426:   isend[3] = info->memory;
1427:   isend[4] = info->mallocs;

1429:   PetscCall(MatGetInfo(B, MAT_LOCAL, info));

1431:   isend[0] += info->nz_used;
1432:   isend[1] += info->nz_allocated;
1433:   isend[2] += info->nz_unneeded;
1434:   isend[3] += info->memory;
1435:   isend[4] += info->mallocs;

1437:   if (flag == MAT_LOCAL) {
1438:     info->nz_used      = isend[0];
1439:     info->nz_allocated = isend[1];
1440:     info->nz_unneeded  = isend[2];
1441:     info->memory       = isend[3];
1442:     info->mallocs      = isend[4];
1443:   } else if (flag == MAT_GLOBAL_MAX) {
1444:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));

1446:     info->nz_used      = irecv[0];
1447:     info->nz_allocated = irecv[1];
1448:     info->nz_unneeded  = irecv[2];
1449:     info->memory       = irecv[3];
1450:     info->mallocs      = irecv[4];
1451:   } else if (flag == MAT_GLOBAL_SUM) {
1452:     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));

1454:     info->nz_used      = irecv[0];
1455:     info->nz_allocated = irecv[1];
1456:     info->nz_unneeded  = irecv[2];
1457:     info->memory       = irecv[3];
1458:     info->mallocs      = irecv[4];
1459:   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1460:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1461:   info->fill_ratio_needed = 0;
1462:   info->factor_mallocs    = 0;
1463:   PetscFunctionReturn(PETSC_SUCCESS);
1464: }

1466: static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1467: {
1468:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1470:   PetscFunctionBegin;
1471:   switch (op) {
1472:   case MAT_NEW_NONZERO_LOCATIONS:
1473:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1474:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1475:   case MAT_KEEP_NONZERO_PATTERN:
1476:   case MAT_NEW_NONZERO_LOCATION_ERR:
1477:     MatCheckPreallocated(A, 1);
1478:     PetscCall(MatSetOption(a->A, op, flg));
1479:     PetscCall(MatSetOption(a->B, op, flg));
1480:     break;
1481:   case MAT_ROW_ORIENTED:
1482:     MatCheckPreallocated(A, 1);
1483:     a->roworiented = flg;

1485:     PetscCall(MatSetOption(a->A, op, flg));
1486:     PetscCall(MatSetOption(a->B, op, flg));
1487:     break;
1488:   case MAT_FORCE_DIAGONAL_ENTRIES:
1489:   case MAT_SORTED_FULL:
1490:     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1491:     break;
1492:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1493:     a->donotstash = flg;
1494:     break;
1495:   case MAT_USE_HASH_TABLE:
1496:     a->ht_flag = flg;
1497:     a->ht_fact = 1.39;
1498:     break;
1499:   case MAT_SYMMETRIC:
1500:   case MAT_STRUCTURALLY_SYMMETRIC:
1501:   case MAT_HERMITIAN:
1502:   case MAT_SUBMAT_SINGLEIS:
1503:   case MAT_SYMMETRY_ETERNAL:
1504:   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1505:   case MAT_SPD_ETERNAL:
1506:     /* if the diagonal matrix is square it inherits some of the properties above */
1507:     break;
1508:   default:
1509:     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1510:   }
1511:   PetscFunctionReturn(PETSC_SUCCESS);
1512: }

1514: static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1515: {
1516:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1517:   Mat_SeqBAIJ *Aloc;
1518:   Mat          B;
1519:   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1520:   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1521:   MatScalar   *a;

1523:   PetscFunctionBegin;
1524:   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1525:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1526:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1527:     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1528:     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1529:     /* Do not know preallocation information, but must set block size */
1530:     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1531:   } else {
1532:     B = *matout;
1533:   }

1535:   /* copy over the A part */
1536:   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1537:   ai   = Aloc->i;
1538:   aj   = Aloc->j;
1539:   a    = Aloc->a;
1540:   PetscCall(PetscMalloc1(bs, &rvals));

1542:   for (i = 0; i < mbs; i++) {
1543:     rvals[0] = bs * (baij->rstartbs + i);
1544:     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1545:     for (j = ai[i]; j < ai[i + 1]; j++) {
1546:       col = (baij->cstartbs + aj[j]) * bs;
1547:       for (k = 0; k < bs; k++) {
1548:         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));

1550:         col++;
1551:         a += bs;
1552:       }
1553:     }
1554:   }
1555:   /* copy over the B part */
1556:   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1557:   ai   = Aloc->i;
1558:   aj   = Aloc->j;
1559:   a    = Aloc->a;
1560:   for (i = 0; i < mbs; i++) {
1561:     rvals[0] = bs * (baij->rstartbs + i);
1562:     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1563:     for (j = ai[i]; j < ai[i + 1]; j++) {
1564:       col = baij->garray[aj[j]] * bs;
1565:       for (k = 0; k < bs; k++) {
1566:         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1567:         col++;
1568:         a += bs;
1569:       }
1570:     }
1571:   }
1572:   PetscCall(PetscFree(rvals));
1573:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1574:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

1576:   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1577:   else PetscCall(MatHeaderMerge(A, &B));
1578:   PetscFunctionReturn(PETSC_SUCCESS);
1579: }

1581: static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1582: {
1583:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1584:   Mat          a = baij->A, b = baij->B;
1585:   PetscInt     s1, s2, s3;

1587:   PetscFunctionBegin;
1588:   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1589:   if (rr) {
1590:     PetscCall(VecGetLocalSize(rr, &s1));
1591:     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1592:     /* Overlap communication with computation. */
1593:     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1594:   }
1595:   if (ll) {
1596:     PetscCall(VecGetLocalSize(ll, &s1));
1597:     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1598:     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1599:   }
1600:   /* scale  the diagonal block */
1601:   PetscUseTypeMethod(a, diagonalscale, ll, rr);

1603:   if (rr) {
1604:     /* Do a scatter end and then right scale the off-diagonal block */
1605:     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1606:     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1607:   }
1608:   PetscFunctionReturn(PETSC_SUCCESS);
1609: }

1611: static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1612: {
1613:   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1614:   PetscInt    *lrows;
1615:   PetscInt     r, len;
1616:   PetscBool    cong;

1618:   PetscFunctionBegin;
1619:   /* get locally owned rows */
1620:   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1621:   /* fix right hand side if needed */
1622:   if (x && b) {
1623:     const PetscScalar *xx;
1624:     PetscScalar       *bb;

1626:     PetscCall(VecGetArrayRead(x, &xx));
1627:     PetscCall(VecGetArray(b, &bb));
1628:     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1629:     PetscCall(VecRestoreArrayRead(x, &xx));
1630:     PetscCall(VecRestoreArray(b, &bb));
1631:   }

1633:   /* actually zap the local rows */
1634:   /*
1635:         Zero the required rows. If the "diagonal block" of the matrix
1636:      is square and the user wishes to set the diagonal we use separate
1637:      code so that MatSetValues() is not called for each diagonal allocating
1638:      new memory, thus calling lots of mallocs and slowing things down.

1640:   */
1641:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1642:   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1643:   PetscCall(MatHasCongruentLayouts(A, &cong));
1644:   if ((diag != 0.0) && cong) {
1645:     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1646:   } else if (diag != 0.0) {
1647:     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1648:     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1649:        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1650:     for (r = 0; r < len; ++r) {
1651:       const PetscInt row = lrows[r] + A->rmap->rstart;
1652:       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1653:     }
1654:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1655:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1656:   } else {
1657:     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1658:   }
1659:   PetscCall(PetscFree(lrows));

1661:   /* only change matrix nonzero state if pattern was allowed to be changed */
1662:   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1663:     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1664:     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1665:   }
1666:   PetscFunctionReturn(PETSC_SUCCESS);
1667: }

1669: static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1670: {
1671:   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1672:   PetscMPIInt        n = A->rmap->n, p = 0;
1673:   PetscInt           i, j, k, r, len = 0, row, col, count;
1674:   PetscInt          *lrows, *owners = A->rmap->range;
1675:   PetscSFNode       *rrows;
1676:   PetscSF            sf;
1677:   const PetscScalar *xx;
1678:   PetscScalar       *bb, *mask;
1679:   Vec                xmask, lmask;
1680:   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1681:   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1682:   PetscScalar       *aa;

1684:   PetscFunctionBegin;
1685:   /* Create SF where leaves are input rows and roots are owned rows */
1686:   PetscCall(PetscMalloc1(n, &lrows));
1687:   for (r = 0; r < n; ++r) lrows[r] = -1;
1688:   PetscCall(PetscMalloc1(N, &rrows));
1689:   for (r = 0; r < N; ++r) {
1690:     const PetscInt idx = rows[r];
1691:     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1692:     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1693:       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1694:     }
1695:     rrows[r].rank  = p;
1696:     rrows[r].index = rows[r] - owners[p];
1697:   }
1698:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1699:   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1700:   /* Collect flags for rows to be zeroed */
1701:   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1702:   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1703:   PetscCall(PetscSFDestroy(&sf));
1704:   /* Compress and put in row numbers */
1705:   for (r = 0; r < n; ++r)
1706:     if (lrows[r] >= 0) lrows[len++] = r;
1707:   /* zero diagonal part of matrix */
1708:   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1709:   /* handle off-diagonal part of matrix */
1710:   PetscCall(MatCreateVecs(A, &xmask, NULL));
1711:   PetscCall(VecDuplicate(l->lvec, &lmask));
1712:   PetscCall(VecGetArray(xmask, &bb));
1713:   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1714:   PetscCall(VecRestoreArray(xmask, &bb));
1715:   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1716:   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1717:   PetscCall(VecDestroy(&xmask));
1718:   if (x) {
1719:     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1720:     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1721:     PetscCall(VecGetArrayRead(l->lvec, &xx));
1722:     PetscCall(VecGetArray(b, &bb));
1723:   }
1724:   PetscCall(VecGetArray(lmask, &mask));
1725:   /* remove zeroed rows of off-diagonal matrix */
1726:   for (i = 0; i < len; ++i) {
1727:     row   = lrows[i];
1728:     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1729:     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
1730:     for (k = 0; k < count; ++k) {
1731:       aa[0] = 0.0;
1732:       aa += bs;
1733:     }
1734:   }
1735:   /* loop over all elements of off process part of matrix zeroing removed columns*/
1736:   for (i = 0; i < l->B->rmap->N; ++i) {
1737:     row = i / bs;
1738:     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1739:       for (k = 0; k < bs; ++k) {
1740:         col = bs * baij->j[j] + k;
1741:         if (PetscAbsScalar(mask[col])) {
1742:           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
1743:           if (x) bb[i] -= aa[0] * xx[col];
1744:           aa[0] = 0.0;
1745:         }
1746:       }
1747:     }
1748:   }
1749:   if (x) {
1750:     PetscCall(VecRestoreArray(b, &bb));
1751:     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1752:   }
1753:   PetscCall(VecRestoreArray(lmask, &mask));
1754:   PetscCall(VecDestroy(&lmask));
1755:   PetscCall(PetscFree(lrows));

1757:   /* only change matrix nonzero state if pattern was allowed to be changed */
1758:   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1759:     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1760:     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1761:   }
1762:   PetscFunctionReturn(PETSC_SUCCESS);
1763: }

1765: static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1766: {
1767:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1769:   PetscFunctionBegin;
1770:   PetscCall(MatSetUnfactored(a->A));
1771:   PetscFunctionReturn(PETSC_SUCCESS);
1772: }

1774: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);

1776: static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1777: {
1778:   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1779:   Mat          a, b, c, d;
1780:   PetscBool    flg;

1782:   PetscFunctionBegin;
1783:   a = matA->A;
1784:   b = matA->B;
1785:   c = matB->A;
1786:   d = matB->B;

1788:   PetscCall(MatEqual(a, c, &flg));
1789:   if (flg) PetscCall(MatEqual(b, d, &flg));
1790:   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1791:   PetscFunctionReturn(PETSC_SUCCESS);
1792: }

1794: static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1795: {
1796:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1797:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;

1799:   PetscFunctionBegin;
1800:   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1801:   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1802:     PetscCall(MatCopy_Basic(A, B, str));
1803:   } else {
1804:     PetscCall(MatCopy(a->A, b->A, str));
1805:     PetscCall(MatCopy(a->B, b->B, str));
1806:   }
1807:   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1808:   PetscFunctionReturn(PETSC_SUCCESS);
1809: }

1811: PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1812: {
1813:   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1814:   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1815:   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;

1817:   PetscFunctionBegin;
1818:   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1819:   PetscFunctionReturn(PETSC_SUCCESS);
1820: }

1822: static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1823: {
1824:   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1825:   PetscBLASInt bnz, one                         = 1;
1826:   Mat_SeqBAIJ *x, *y;
1827:   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;

1829:   PetscFunctionBegin;
1830:   if (str == SAME_NONZERO_PATTERN) {
1831:     PetscScalar alpha = a;
1832:     x                 = (Mat_SeqBAIJ *)xx->A->data;
1833:     y                 = (Mat_SeqBAIJ *)yy->A->data;
1834:     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1835:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1836:     x = (Mat_SeqBAIJ *)xx->B->data;
1837:     y = (Mat_SeqBAIJ *)yy->B->data;
1838:     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1839:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1840:     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1841:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1842:     PetscCall(MatAXPY_Basic(Y, a, X, str));
1843:   } else {
1844:     Mat       B;
1845:     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1846:     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1847:     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1848:     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1849:     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1850:     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1851:     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1852:     PetscCall(MatSetType(B, MATMPIBAIJ));
1853:     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1854:     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1855:     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1856:     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1857:     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1858:     PetscCall(MatHeaderMerge(Y, &B));
1859:     PetscCall(PetscFree(nnz_d));
1860:     PetscCall(PetscFree(nnz_o));
1861:   }
1862:   PetscFunctionReturn(PETSC_SUCCESS);
1863: }

1865: static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1866: {
1867:   PetscFunctionBegin;
1868:   if (PetscDefined(USE_COMPLEX)) {
1869:     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;

1871:     PetscCall(MatConjugate_SeqBAIJ(a->A));
1872:     PetscCall(MatConjugate_SeqBAIJ(a->B));
1873:   }
1874:   PetscFunctionReturn(PETSC_SUCCESS);
1875: }

1877: static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1878: {
1879:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1881:   PetscFunctionBegin;
1882:   PetscCall(MatRealPart(a->A));
1883:   PetscCall(MatRealPart(a->B));
1884:   PetscFunctionReturn(PETSC_SUCCESS);
1885: }

1887: static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1888: {
1889:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

1891:   PetscFunctionBegin;
1892:   PetscCall(MatImaginaryPart(a->A));
1893:   PetscCall(MatImaginaryPart(a->B));
1894:   PetscFunctionReturn(PETSC_SUCCESS);
1895: }

1897: static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1898: {
1899:   IS       iscol_local;
1900:   PetscInt csize;

1902:   PetscFunctionBegin;
1903:   PetscCall(ISGetLocalSize(iscol, &csize));
1904:   if (call == MAT_REUSE_MATRIX) {
1905:     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1906:     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1907:   } else {
1908:     PetscCall(ISAllGather(iscol, &iscol_local));
1909:   }
1910:   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1911:   if (call == MAT_INITIAL_MATRIX) {
1912:     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1913:     PetscCall(ISDestroy(&iscol_local));
1914:   }
1915:   PetscFunctionReturn(PETSC_SUCCESS);
1916: }

1918: /*
1919:   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1920:   in local and then by concatenating the local matrices the end result.
1921:   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1922:   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1923: */
1924: PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1925: {
1926:   PetscMPIInt  rank, size;
1927:   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1928:   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1929:   Mat          M, Mreuse;
1930:   MatScalar   *vwork, *aa;
1931:   MPI_Comm     comm;
1932:   IS           isrow_new, iscol_new;
1933:   Mat_SeqBAIJ *aij;

1935:   PetscFunctionBegin;
1936:   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1937:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1938:   PetscCallMPI(MPI_Comm_size(comm, &size));
1939:   /* The compression and expansion should be avoided. Doesn't point
1940:      out errors, might change the indices, hence buggey */
1941:   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1942:   if (isrow == iscol) {
1943:     iscol_new = isrow_new;
1944:     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1945:   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));

1947:   if (call == MAT_REUSE_MATRIX) {
1948:     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1949:     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1950:     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1951:   } else {
1952:     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1953:   }
1954:   PetscCall(ISDestroy(&isrow_new));
1955:   PetscCall(ISDestroy(&iscol_new));
1956:   /*
1957:       m - number of local rows
1958:       n - number of columns (same on all processors)
1959:       rstart - first row in new global matrix generated
1960:   */
1961:   PetscCall(MatGetBlockSize(mat, &bs));
1962:   PetscCall(MatGetSize(Mreuse, &m, &n));
1963:   m = m / bs;
1964:   n = n / bs;

1966:   if (call == MAT_INITIAL_MATRIX) {
1967:     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
1968:     ii  = aij->i;
1969:     jj  = aij->j;

1971:     /*
1972:         Determine the number of non-zeros in the diagonal and off-diagonal
1973:         portions of the matrix in order to do correct preallocation
1974:     */

1976:     /* first get start and end of "diagonal" columns */
1977:     if (csize == PETSC_DECIDE) {
1978:       PetscCall(ISGetSize(isrow, &mglobal));
1979:       if (mglobal == n * bs) { /* square matrix */
1980:         nlocal = m;
1981:       } else {
1982:         nlocal = n / size + ((n % size) > rank);
1983:       }
1984:     } else {
1985:       nlocal = csize / bs;
1986:     }
1987:     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1988:     rstart = rend - nlocal;
1989:     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);

1991:     /* next, compute all the lengths */
1992:     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1993:     for (i = 0; i < m; i++) {
1994:       jend = ii[i + 1] - ii[i];
1995:       olen = 0;
1996:       dlen = 0;
1997:       for (j = 0; j < jend; j++) {
1998:         if (*jj < rstart || *jj >= rend) olen++;
1999:         else dlen++;
2000:         jj++;
2001:       }
2002:       olens[i] = olen;
2003:       dlens[i] = dlen;
2004:     }
2005:     PetscCall(MatCreate(comm, &M));
2006:     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
2007:     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
2008:     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2009:     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2010:     PetscCall(PetscFree2(dlens, olens));
2011:   } else {
2012:     PetscInt ml, nl;

2014:     M = *newmat;
2015:     PetscCall(MatGetLocalSize(M, &ml, &nl));
2016:     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2017:     PetscCall(MatZeroEntries(M));
2018:     /*
2019:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2020:        rather than the slower MatSetValues().
2021:     */
2022:     M->was_assembled = PETSC_TRUE;
2023:     M->assembled     = PETSC_FALSE;
2024:   }
2025:   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2026:   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2027:   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
2028:   ii  = aij->i;
2029:   jj  = aij->j;
2030:   aa  = aij->a;
2031:   for (i = 0; i < m; i++) {
2032:     row   = rstart / bs + i;
2033:     nz    = ii[i + 1] - ii[i];
2034:     cwork = jj;
2035:     jj += nz;
2036:     vwork = aa;
2037:     aa += nz * bs * bs;
2038:     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2039:   }

2041:   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2042:   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2043:   *newmat = M;

2045:   /* save submatrix used in processor for next request */
2046:   if (call == MAT_INITIAL_MATRIX) {
2047:     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2048:     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2049:   }
2050:   PetscFunctionReturn(PETSC_SUCCESS);
2051: }

2053: static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2054: {
2055:   MPI_Comm        comm, pcomm;
2056:   PetscInt        clocal_size, nrows;
2057:   const PetscInt *rows;
2058:   PetscMPIInt     size;
2059:   IS              crowp, lcolp;

2061:   PetscFunctionBegin;
2062:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2063:   /* make a collective version of 'rowp' */
2064:   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2065:   if (pcomm == comm) {
2066:     crowp = rowp;
2067:   } else {
2068:     PetscCall(ISGetSize(rowp, &nrows));
2069:     PetscCall(ISGetIndices(rowp, &rows));
2070:     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2071:     PetscCall(ISRestoreIndices(rowp, &rows));
2072:   }
2073:   PetscCall(ISSetPermutation(crowp));
2074:   /* make a local version of 'colp' */
2075:   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2076:   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2077:   if (size == 1) {
2078:     lcolp = colp;
2079:   } else {
2080:     PetscCall(ISAllGather(colp, &lcolp));
2081:   }
2082:   PetscCall(ISSetPermutation(lcolp));
2083:   /* now we just get the submatrix */
2084:   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2085:   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2086:   /* clean up */
2087:   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2088:   if (size > 1) PetscCall(ISDestroy(&lcolp));
2089:   PetscFunctionReturn(PETSC_SUCCESS);
2090: }

2092: static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2093: {
2094:   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2095:   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;

2097:   PetscFunctionBegin;
2098:   if (nghosts) *nghosts = B->nbs;
2099:   if (ghosts) *ghosts = baij->garray;
2100:   PetscFunctionReturn(PETSC_SUCCESS);
2101: }

2103: static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2104: {
2105:   Mat          B;
2106:   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2107:   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2108:   Mat_SeqAIJ  *b;
2109:   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2110:   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2111:   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;

2113:   PetscFunctionBegin;
2114:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2115:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));

2117:   /*   Tell every processor the number of nonzeros per row  */
2118:   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2119:   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2120:   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2121:   displs = recvcounts + size;
2122:   for (i = 0; i < size; i++) {
2123:     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2124:     displs[i]     = A->rmap->range[i] / bs;
2125:   }
2126:   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2127:   /* Create the sequential matrix of the same type as the local block diagonal  */
2128:   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2129:   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2130:   PetscCall(MatSetType(B, MATSEQAIJ));
2131:   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2132:   b = (Mat_SeqAIJ *)B->data;

2134:   /*     Copy my part of matrix column indices over  */
2135:   sendcount  = ad->nz + bd->nz;
2136:   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2137:   a_jsendbuf = ad->j;
2138:   b_jsendbuf = bd->j;
2139:   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2140:   cnt        = 0;
2141:   for (i = 0; i < n; i++) {
2142:     /* put in lower diagonal portion */
2143:     m = bd->i[i + 1] - bd->i[i];
2144:     while (m > 0) {
2145:       /* is it above diagonal (in bd (compressed) numbering) */
2146:       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2147:       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2148:       m--;
2149:     }

2151:     /* put in diagonal portion */
2152:     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;

2154:     /* put in upper diagonal portion */
2155:     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2156:   }
2157:   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);

2159:   /*  Gather all column indices to all processors  */
2160:   for (i = 0; i < size; i++) {
2161:     recvcounts[i] = 0;
2162:     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2163:   }
2164:   displs[0] = 0;
2165:   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2166:   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2167:   /*  Assemble the matrix into useable form (note numerical values not yet set)  */
2168:   /* set the b->ilen (length of each row) values */
2169:   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2170:   /* set the b->i indices */
2171:   b->i[0] = 0;
2172:   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2173:   PetscCall(PetscFree(lens));
2174:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2175:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2176:   PetscCall(PetscFree(recvcounts));

2178:   PetscCall(MatPropagateSymmetryOptions(A, B));
2179:   *newmat = B;
2180:   PetscFunctionReturn(PETSC_SUCCESS);
2181: }

2183: static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2184: {
2185:   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2186:   Vec          bb1 = NULL;

2188:   PetscFunctionBegin;
2189:   if (flag == SOR_APPLY_UPPER) {
2190:     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2191:     PetscFunctionReturn(PETSC_SUCCESS);
2192:   }

2194:   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));

2196:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2197:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2198:       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2199:       its--;
2200:     }

2202:     while (its--) {
2203:       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2204:       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));

2206:       /* update rhs: bb1 = bb - B*x */
2207:       PetscCall(VecScale(mat->lvec, -1.0));
2208:       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));

2210:       /* local sweep */
2211:       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2212:     }
2213:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2214:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2215:       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2216:       its--;
2217:     }
2218:     while (its--) {
2219:       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2220:       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));

2222:       /* update rhs: bb1 = bb - B*x */
2223:       PetscCall(VecScale(mat->lvec, -1.0));
2224:       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));

2226:       /* local sweep */
2227:       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2228:     }
2229:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2230:     if (flag & SOR_ZERO_INITIAL_GUESS) {
2231:       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2232:       its--;
2233:     }
2234:     while (its--) {
2235:       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2236:       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));

2238:       /* update rhs: bb1 = bb - B*x */
2239:       PetscCall(VecScale(mat->lvec, -1.0));
2240:       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));

2242:       /* local sweep */
2243:       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2244:     }
2245:   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");

2247:   PetscCall(VecDestroy(&bb1));
2248:   PetscFunctionReturn(PETSC_SUCCESS);
2249: }

2251: static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2252: {
2253:   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2254:   PetscInt     m, N, i, *garray = aij->garray;
2255:   PetscInt     ib, jb, bs = A->rmap->bs;
2256:   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2257:   MatScalar   *a_val = a_aij->a;
2258:   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2259:   MatScalar   *b_val = b_aij->a;
2260:   PetscReal   *work;

2262:   PetscFunctionBegin;
2263:   PetscCall(MatGetSize(A, &m, &N));
2264:   PetscCall(PetscCalloc1(N, &work));
2265:   if (type == NORM_2) {
2266:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2267:       for (jb = 0; jb < bs; jb++) {
2268:         for (ib = 0; ib < bs; ib++) {
2269:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2270:           a_val++;
2271:         }
2272:       }
2273:     }
2274:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2275:       for (jb = 0; jb < bs; jb++) {
2276:         for (ib = 0; ib < bs; ib++) {
2277:           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2278:           b_val++;
2279:         }
2280:       }
2281:     }
2282:   } else if (type == NORM_1) {
2283:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2284:       for (jb = 0; jb < bs; jb++) {
2285:         for (ib = 0; ib < bs; ib++) {
2286:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2287:           a_val++;
2288:         }
2289:       }
2290:     }
2291:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2292:       for (jb = 0; jb < bs; jb++) {
2293:         for (ib = 0; ib < bs; ib++) {
2294:           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2295:           b_val++;
2296:         }
2297:       }
2298:     }
2299:   } else if (type == NORM_INFINITY) {
2300:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2301:       for (jb = 0; jb < bs; jb++) {
2302:         for (ib = 0; ib < bs; ib++) {
2303:           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
2304:           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2305:           a_val++;
2306:         }
2307:       }
2308:     }
2309:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2310:       for (jb = 0; jb < bs; jb++) {
2311:         for (ib = 0; ib < bs; ib++) {
2312:           int col   = garray[b_aij->j[i]] * bs + jb;
2313:           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2314:           b_val++;
2315:         }
2316:       }
2317:     }
2318:   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2319:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2320:       for (jb = 0; jb < bs; jb++) {
2321:         for (ib = 0; ib < bs; ib++) {
2322:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2323:           a_val++;
2324:         }
2325:       }
2326:     }
2327:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2328:       for (jb = 0; jb < bs; jb++) {
2329:         for (ib = 0; ib < bs; ib++) {
2330:           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2331:           b_val++;
2332:         }
2333:       }
2334:     }
2335:   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2336:     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2337:       for (jb = 0; jb < bs; jb++) {
2338:         for (ib = 0; ib < bs; ib++) {
2339:           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2340:           a_val++;
2341:         }
2342:       }
2343:     }
2344:     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2345:       for (jb = 0; jb < bs; jb++) {
2346:         for (ib = 0; ib < bs; ib++) {
2347:           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2348:           b_val++;
2349:         }
2350:       }
2351:     }
2352:   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2353:   if (type == NORM_INFINITY) {
2354:     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2355:   } else {
2356:     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2357:   }
2358:   PetscCall(PetscFree(work));
2359:   if (type == NORM_2) {
2360:     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2361:   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2362:     for (i = 0; i < N; i++) reductions[i] /= m;
2363:   }
2364:   PetscFunctionReturn(PETSC_SUCCESS);
2365: }

2367: static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2368: {
2369:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

2371:   PetscFunctionBegin;
2372:   PetscCall(MatInvertBlockDiagonal(a->A, values));
2373:   A->factorerrortype             = a->A->factorerrortype;
2374:   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2375:   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2376:   PetscFunctionReturn(PETSC_SUCCESS);
2377: }

2379: static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2380: {
2381:   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2382:   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;

2384:   PetscFunctionBegin;
2385:   if (!Y->preallocated) {
2386:     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2387:   } else if (!aij->nz) {
2388:     PetscInt nonew = aij->nonew;
2389:     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2390:     aij->nonew = nonew;
2391:   }
2392:   PetscCall(MatShift_Basic(Y, a));
2393:   PetscFunctionReturn(PETSC_SUCCESS);
2394: }

2396: static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2397: {
2398:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

2400:   PetscFunctionBegin;
2401:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2402:   PetscCall(MatMissingDiagonal(a->A, missing, d));
2403:   if (d) {
2404:     PetscInt rstart;
2405:     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2406:     *d += rstart / A->rmap->bs;
2407:   }
2408:   PetscFunctionReturn(PETSC_SUCCESS);
2409: }

2411: static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2412: {
2413:   PetscFunctionBegin;
2414:   *a = ((Mat_MPIBAIJ *)A->data)->A;
2415:   PetscFunctionReturn(PETSC_SUCCESS);
2416: }

2418: static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2419: {
2420:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;

2422:   PetscFunctionBegin;
2423:   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2424:   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2425:   PetscFunctionReturn(PETSC_SUCCESS);
2426: }

2428: static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2429:                                        MatGetRow_MPIBAIJ,
2430:                                        MatRestoreRow_MPIBAIJ,
2431:                                        MatMult_MPIBAIJ,
2432:                                        /* 4*/ MatMultAdd_MPIBAIJ,
2433:                                        MatMultTranspose_MPIBAIJ,
2434:                                        MatMultTransposeAdd_MPIBAIJ,
2435:                                        NULL,
2436:                                        NULL,
2437:                                        NULL,
2438:                                        /*10*/ NULL,
2439:                                        NULL,
2440:                                        NULL,
2441:                                        MatSOR_MPIBAIJ,
2442:                                        MatTranspose_MPIBAIJ,
2443:                                        /*15*/ MatGetInfo_MPIBAIJ,
2444:                                        MatEqual_MPIBAIJ,
2445:                                        MatGetDiagonal_MPIBAIJ,
2446:                                        MatDiagonalScale_MPIBAIJ,
2447:                                        MatNorm_MPIBAIJ,
2448:                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2449:                                        MatAssemblyEnd_MPIBAIJ,
2450:                                        MatSetOption_MPIBAIJ,
2451:                                        MatZeroEntries_MPIBAIJ,
2452:                                        /*24*/ MatZeroRows_MPIBAIJ,
2453:                                        NULL,
2454:                                        NULL,
2455:                                        NULL,
2456:                                        NULL,
2457:                                        /*29*/ MatSetUp_MPI_Hash,
2458:                                        NULL,
2459:                                        NULL,
2460:                                        MatGetDiagonalBlock_MPIBAIJ,
2461:                                        NULL,
2462:                                        /*34*/ MatDuplicate_MPIBAIJ,
2463:                                        NULL,
2464:                                        NULL,
2465:                                        NULL,
2466:                                        NULL,
2467:                                        /*39*/ MatAXPY_MPIBAIJ,
2468:                                        MatCreateSubMatrices_MPIBAIJ,
2469:                                        MatIncreaseOverlap_MPIBAIJ,
2470:                                        MatGetValues_MPIBAIJ,
2471:                                        MatCopy_MPIBAIJ,
2472:                                        /*44*/ NULL,
2473:                                        MatScale_MPIBAIJ,
2474:                                        MatShift_MPIBAIJ,
2475:                                        NULL,
2476:                                        MatZeroRowsColumns_MPIBAIJ,
2477:                                        /*49*/ NULL,
2478:                                        NULL,
2479:                                        NULL,
2480:                                        NULL,
2481:                                        NULL,
2482:                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2483:                                        NULL,
2484:                                        MatSetUnfactored_MPIBAIJ,
2485:                                        MatPermute_MPIBAIJ,
2486:                                        MatSetValuesBlocked_MPIBAIJ,
2487:                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2488:                                        MatDestroy_MPIBAIJ,
2489:                                        MatView_MPIBAIJ,
2490:                                        NULL,
2491:                                        NULL,
2492:                                        /*64*/ NULL,
2493:                                        NULL,
2494:                                        NULL,
2495:                                        NULL,
2496:                                        NULL,
2497:                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2498:                                        NULL,
2499:                                        NULL,
2500:                                        NULL,
2501:                                        NULL,
2502:                                        /*74*/ NULL,
2503:                                        MatFDColoringApply_BAIJ,
2504:                                        NULL,
2505:                                        NULL,
2506:                                        NULL,
2507:                                        /*79*/ NULL,
2508:                                        NULL,
2509:                                        NULL,
2510:                                        NULL,
2511:                                        MatLoad_MPIBAIJ,
2512:                                        /*84*/ NULL,
2513:                                        NULL,
2514:                                        NULL,
2515:                                        NULL,
2516:                                        NULL,
2517:                                        /*89*/ NULL,
2518:                                        NULL,
2519:                                        NULL,
2520:                                        NULL,
2521:                                        NULL,
2522:                                        /*94*/ NULL,
2523:                                        NULL,
2524:                                        NULL,
2525:                                        NULL,
2526:                                        NULL,
2527:                                        /*99*/ NULL,
2528:                                        NULL,
2529:                                        NULL,
2530:                                        MatConjugate_MPIBAIJ,
2531:                                        NULL,
2532:                                        /*104*/ NULL,
2533:                                        MatRealPart_MPIBAIJ,
2534:                                        MatImaginaryPart_MPIBAIJ,
2535:                                        NULL,
2536:                                        NULL,
2537:                                        /*109*/ NULL,
2538:                                        NULL,
2539:                                        NULL,
2540:                                        NULL,
2541:                                        MatMissingDiagonal_MPIBAIJ,
2542:                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2543:                                        NULL,
2544:                                        MatGetGhosts_MPIBAIJ,
2545:                                        NULL,
2546:                                        NULL,
2547:                                        /*119*/ NULL,
2548:                                        NULL,
2549:                                        NULL,
2550:                                        NULL,
2551:                                        MatGetMultiProcBlock_MPIBAIJ,
2552:                                        /*124*/ NULL,
2553:                                        MatGetColumnReductions_MPIBAIJ,
2554:                                        MatInvertBlockDiagonal_MPIBAIJ,
2555:                                        NULL,
2556:                                        NULL,
2557:                                        /*129*/ NULL,
2558:                                        NULL,
2559:                                        NULL,
2560:                                        NULL,
2561:                                        NULL,
2562:                                        /*134*/ NULL,
2563:                                        NULL,
2564:                                        NULL,
2565:                                        NULL,
2566:                                        NULL,
2567:                                        /*139*/ MatSetBlockSizes_Default,
2568:                                        NULL,
2569:                                        NULL,
2570:                                        MatFDColoringSetUp_MPIXAIJ,
2571:                                        NULL,
2572:                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2573:                                        NULL,
2574:                                        NULL,
2575:                                        NULL,
2576:                                        NULL,
2577:                                        NULL,
2578:                                        /*150*/ NULL,
2579:                                        MatEliminateZeros_MPIBAIJ};

2581: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2582: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);

2584: static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2585: {
2586:   PetscInt        m, rstart, cstart, cend;
2587:   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2588:   const PetscInt *JJ          = NULL;
2589:   PetscScalar    *values      = NULL;
2590:   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2591:   PetscBool       nooffprocentries;

2593:   PetscFunctionBegin;
2594:   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2595:   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2596:   PetscCall(PetscLayoutSetUp(B->rmap));
2597:   PetscCall(PetscLayoutSetUp(B->cmap));
2598:   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2599:   m      = B->rmap->n / bs;
2600:   rstart = B->rmap->rstart / bs;
2601:   cstart = B->cmap->rstart / bs;
2602:   cend   = B->cmap->rend / bs;

2604:   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2605:   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2606:   for (i = 0; i < m; i++) {
2607:     nz = ii[i + 1] - ii[i];
2608:     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2609:     nz_max = PetscMax(nz_max, nz);
2610:     dlen   = 0;
2611:     olen   = 0;
2612:     JJ     = jj + ii[i];
2613:     for (j = 0; j < nz; j++) {
2614:       if (*JJ < cstart || *JJ >= cend) olen++;
2615:       else dlen++;
2616:       JJ++;
2617:     }
2618:     d_nnz[i] = dlen;
2619:     o_nnz[i] = olen;
2620:   }
2621:   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2622:   PetscCall(PetscFree2(d_nnz, o_nnz));

2624:   values = (PetscScalar *)V;
2625:   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2626:   for (i = 0; i < m; i++) {
2627:     PetscInt        row   = i + rstart;
2628:     PetscInt        ncols = ii[i + 1] - ii[i];
2629:     const PetscInt *icols = jj + ii[i];
2630:     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2631:       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2632:       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2633:     } else { /* block ordering does not match so we can only insert one block at a time. */
2634:       PetscInt j;
2635:       for (j = 0; j < ncols; j++) {
2636:         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2637:         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2638:       }
2639:     }
2640:   }

2642:   if (!V) PetscCall(PetscFree(values));
2643:   nooffprocentries    = B->nooffprocentries;
2644:   B->nooffprocentries = PETSC_TRUE;
2645:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2646:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2647:   B->nooffprocentries = nooffprocentries;

2649:   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2650:   PetscFunctionReturn(PETSC_SUCCESS);
2651: }

2653: /*@C
2654:   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values

2656:   Collective

2658:   Input Parameters:
2659: + B  - the matrix
2660: . bs - the block size
2661: . i  - the indices into `j` for the start of each local row (starts with zero)
2662: . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2663: - v  - optional values in the matrix

2665:   Level: advanced

2667:   Notes:
2668:   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2669:   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2670:   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2671:   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2672:   block column and the second index is over columns within a block.

2674:   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well

2676: .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2677: @*/
2678: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2679: {
2680:   PetscFunctionBegin;
2684:   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2685:   PetscFunctionReturn(PETSC_SUCCESS);
2686: }

2688: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2689: {
2690:   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2691:   PetscInt     i;
2692:   PetscMPIInt  size;

2694:   PetscFunctionBegin;
2695:   if (B->hash_active) {
2696:     B->ops[0]      = b->cops;
2697:     B->hash_active = PETSC_FALSE;
2698:   }
2699:   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2700:   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
2701:   PetscCall(PetscLayoutSetUp(B->rmap));
2702:   PetscCall(PetscLayoutSetUp(B->cmap));
2703:   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));

2705:   if (d_nnz) {
2706:     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2707:   }
2708:   if (o_nnz) {
2709:     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2710:   }

2712:   b->bs2 = bs * bs;
2713:   b->mbs = B->rmap->n / bs;
2714:   b->nbs = B->cmap->n / bs;
2715:   b->Mbs = B->rmap->N / bs;
2716:   b->Nbs = B->cmap->N / bs;

2718:   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2719:   b->rstartbs = B->rmap->rstart / bs;
2720:   b->rendbs   = B->rmap->rend / bs;
2721:   b->cstartbs = B->cmap->rstart / bs;
2722:   b->cendbs   = B->cmap->rend / bs;

2724: #if defined(PETSC_USE_CTABLE)
2725:   PetscCall(PetscHMapIDestroy(&b->colmap));
2726: #else
2727:   PetscCall(PetscFree(b->colmap));
2728: #endif
2729:   PetscCall(PetscFree(b->garray));
2730:   PetscCall(VecDestroy(&b->lvec));
2731:   PetscCall(VecScatterDestroy(&b->Mvctx));

2733:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2734:   PetscCall(MatDestroy(&b->B));
2735:   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2736:   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2737:   PetscCall(MatSetType(b->B, MATSEQBAIJ));

2739:   PetscCall(MatDestroy(&b->A));
2740:   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2741:   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2742:   PetscCall(MatSetType(b->A, MATSEQBAIJ));

2744:   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2745:   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2746:   B->preallocated  = PETSC_TRUE;
2747:   B->was_assembled = PETSC_FALSE;
2748:   B->assembled     = PETSC_FALSE;
2749:   PetscFunctionReturn(PETSC_SUCCESS);
2750: }

2752: extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2753: extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);

2755: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2756: {
2757:   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2758:   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2759:   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2760:   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;

2762:   PetscFunctionBegin;
2763:   PetscCall(PetscMalloc1(M + 1, &ii));
2764:   ii[0] = 0;
2765:   for (i = 0; i < M; i++) {
2766:     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2767:     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2768:     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2769:     /* remove one from count of matrix has diagonal */
2770:     for (j = id[i]; j < id[i + 1]; j++) {
2771:       if (jd[j] == i) {
2772:         ii[i + 1]--;
2773:         break;
2774:       }
2775:     }
2776:   }
2777:   PetscCall(PetscMalloc1(ii[M], &jj));
2778:   cnt = 0;
2779:   for (i = 0; i < M; i++) {
2780:     for (j = io[i]; j < io[i + 1]; j++) {
2781:       if (garray[jo[j]] > rstart) break;
2782:       jj[cnt++] = garray[jo[j]];
2783:     }
2784:     for (k = id[i]; k < id[i + 1]; k++) {
2785:       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2786:     }
2787:     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2788:   }
2789:   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2790:   PetscFunctionReturn(PETSC_SUCCESS);
2791: }

2793: #include <../src/mat/impls/aij/mpi/mpiaij.h>

2795: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);

2797: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2798: {
2799:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2800:   Mat_MPIAIJ  *b;
2801:   Mat          B;

2803:   PetscFunctionBegin;
2804:   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");

2806:   if (reuse == MAT_REUSE_MATRIX) {
2807:     B = *newmat;
2808:   } else {
2809:     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2810:     PetscCall(MatSetType(B, MATMPIAIJ));
2811:     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2812:     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2813:     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2814:     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2815:   }
2816:   b = (Mat_MPIAIJ *)B->data;

2818:   if (reuse == MAT_REUSE_MATRIX) {
2819:     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2820:     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2821:   } else {
2822:     PetscInt   *garray = a->garray;
2823:     Mat_SeqAIJ *bB;
2824:     PetscInt    bs, nnz;
2825:     PetscCall(MatDestroy(&b->A));
2826:     PetscCall(MatDestroy(&b->B));
2827:     /* just clear out the data structure */
2828:     PetscCall(MatDisAssemble_MPIAIJ(B));
2829:     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2830:     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));

2832:     /* Global numbering for b->B columns */
2833:     bB  = (Mat_SeqAIJ *)b->B->data;
2834:     bs  = A->rmap->bs;
2835:     nnz = bB->i[A->rmap->n];
2836:     for (PetscInt k = 0; k < nnz; k++) {
2837:       PetscInt bj = bB->j[k] / bs;
2838:       PetscInt br = bB->j[k] % bs;
2839:       bB->j[k]    = garray[bj] * bs + br;
2840:     }
2841:   }
2842:   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2843:   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));

2845:   if (reuse == MAT_INPLACE_MATRIX) {
2846:     PetscCall(MatHeaderReplace(A, &B));
2847:   } else {
2848:     *newmat = B;
2849:   }
2850:   PetscFunctionReturn(PETSC_SUCCESS);
2851: }

2853: /*MC
2854:    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.

2856:    Options Database Keys:
2857: + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2858: . -mat_block_size <bs> - set the blocksize used to store the matrix
2859: . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2860: - -mat_use_hash_table <fact> - set hash table factor

2862:    Level: beginner

2864:    Note:
2865:     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2866:     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored

2868: .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2869: M*/

2871: PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);

2873: PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2874: {
2875:   Mat_MPIBAIJ *b;
2876:   PetscBool    flg = PETSC_FALSE;

2878:   PetscFunctionBegin;
2879:   PetscCall(PetscNew(&b));
2880:   B->data      = (void *)b;
2881:   B->ops[0]    = MatOps_Values;
2882:   B->assembled = PETSC_FALSE;

2884:   B->insertmode = NOT_SET_VALUES;
2885:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2886:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));

2888:   /* build local table of row and column ownerships */
2889:   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));

2891:   /* build cache for off array entries formed */
2892:   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));

2894:   b->donotstash  = PETSC_FALSE;
2895:   b->colmap      = NULL;
2896:   b->garray      = NULL;
2897:   b->roworiented = PETSC_TRUE;

2899:   /* stuff used in block assembly */
2900:   b->barray = NULL;

2902:   /* stuff used for matrix vector multiply */
2903:   b->lvec  = NULL;
2904:   b->Mvctx = NULL;

2906:   /* stuff for MatGetRow() */
2907:   b->rowindices   = NULL;
2908:   b->rowvalues    = NULL;
2909:   b->getrowactive = PETSC_FALSE;

2911:   /* hash table stuff */
2912:   b->ht           = NULL;
2913:   b->hd           = NULL;
2914:   b->ht_size      = 0;
2915:   b->ht_flag      = PETSC_FALSE;
2916:   b->ht_fact      = 0;
2917:   b->ht_total_ct  = 0;
2918:   b->ht_insert_ct = 0;

2920:   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2921:   b->ijonly = PETSC_FALSE;

2923:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2924:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2925:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2926: #if defined(PETSC_HAVE_HYPRE)
2927:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2928: #endif
2929:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2930:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2931:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2932:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2933:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2934:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2935:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2936:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));

2938:   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2939:   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2940:   if (flg) {
2941:     PetscReal fact = 1.39;
2942:     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2943:     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2944:     if (fact <= 1.0) fact = 1.39;
2945:     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2946:     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2947:   }
2948:   PetscOptionsEnd();
2949:   PetscFunctionReturn(PETSC_SUCCESS);
2950: }

2952: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2953: /*MC
2954:    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.

2956:    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2957:    and `MATMPIBAIJ` otherwise.

2959:    Options Database Keys:
2960: . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`

2962:   Level: beginner

2964: .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2965: M*/

2967: /*@C
2968:   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2969:   (block compressed row).

2971:   Collective

2973:   Input Parameters:
2974: + B     - the matrix
2975: . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2976:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2977: . d_nz  - number of block nonzeros per block row in diagonal portion of local
2978:            submatrix  (same for all local rows)
2979: . d_nnz - array containing the number of block nonzeros in the various block rows
2980:            of the in diagonal portion of the local (possibly different for each block
2981:            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2982:            set it even if it is zero.
2983: . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2984:            submatrix (same for all local rows).
2985: - o_nnz - array containing the number of nonzeros in the various block rows of the
2986:            off-diagonal portion of the local submatrix (possibly different for
2987:            each block row) or `NULL`.

2989:    If the *_nnz parameter is given then the *_nz parameter is ignored

2991:   Options Database Keys:
2992: + -mat_block_size            - size of the blocks to use
2993: - -mat_use_hash_table <fact> - set hash table factor

2995:   Level: intermediate

2997:   Notes:
2998:   For good matrix assembly performance
2999:   the user should preallocate the matrix storage by setting the parameters
3000:   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3001:   performance can be increased by more than a factor of 50.

3003:   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3004:   than it must be used on all processors that share the object for that argument.

3006:   Storage Information:
3007:   For a square global matrix we define each processor's diagonal portion
3008:   to be its local rows and the corresponding columns (a square submatrix);
3009:   each processor's off-diagonal portion encompasses the remainder of the
3010:   local matrix (a rectangular submatrix).

3012:   The user can specify preallocated storage for the diagonal part of
3013:   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3014:   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3015:   memory allocation.  Likewise, specify preallocated storage for the
3016:   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).

3018:   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3019:   the figure below we depict these three local rows and all columns (0-11).

3021: .vb
3022:            0 1 2 3 4 5 6 7 8 9 10 11
3023:           --------------------------
3024:    row 3  |o o o d d d o o o o  o  o
3025:    row 4  |o o o d d d o o o o  o  o
3026:    row 5  |o o o d d d o o o o  o  o
3027:           --------------------------
3028: .ve

3030:   Thus, any entries in the d locations are stored in the d (diagonal)
3031:   submatrix, and any entries in the o locations are stored in the
3032:   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3033:   stored simply in the `MATSEQBAIJ` format for compressed row storage.

3035:   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3036:   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3037:   In general, for PDE problems in which most nonzeros are near the diagonal,
3038:   one expects `d_nz` >> `o_nz`.

3040:   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3041:   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3042:   You can also run with the option `-info` and look for messages with the string
3043:   malloc in them to see if additional memory allocation was needed.

3045: .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3046: @*/
3047: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3048: {
3049:   PetscFunctionBegin;
3053:   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3054:   PetscFunctionReturn(PETSC_SUCCESS);
3055: }

3057: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3058: /*@C
3059:   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3060:   (block compressed row).

3062:   Collective

3064:   Input Parameters:
3065: + comm  - MPI communicator
3066: . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3067:           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3068: . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3069:            This value should be the same as the local size used in creating the
3070:            y vector for the matrix-vector product y = Ax.
3071: . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3072:            This value should be the same as the local size used in creating the
3073:            x vector for the matrix-vector product y = Ax.
3074: . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3075: . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3076: . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3077:            submatrix  (same for all local rows)
3078: . d_nnz - array containing the number of nonzero blocks in the various block rows
3079:            of the in diagonal portion of the local (possibly different for each block
3080:            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3081:            and set it even if it is zero.
3082: . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3083:            submatrix (same for all local rows).
3084: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3085:            off-diagonal portion of the local submatrix (possibly different for
3086:            each block row) or NULL.

3088:   Output Parameter:
3089: . A - the matrix

3091:   Options Database Keys:
3092: + -mat_block_size            - size of the blocks to use
3093: - -mat_use_hash_table <fact> - set hash table factor

3095:   Level: intermediate

3097:   Notes:
3098:   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3099:   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3100:   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]

3102:   For good matrix assembly performance
3103:   the user should preallocate the matrix storage by setting the parameters
3104:   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3105:   performance can be increased by more than a factor of 50.

3107:   If the *_nnz parameter is given then the *_nz parameter is ignored

3109:   A nonzero block is any block that as 1 or more nonzeros in it

3111:   The user MUST specify either the local or global matrix dimensions
3112:   (possibly both).

3114:   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3115:   than it must be used on all processors that share the object for that argument.

3117:   Storage Information:
3118:   For a square global matrix we define each processor's diagonal portion
3119:   to be its local rows and the corresponding columns (a square submatrix);
3120:   each processor's off-diagonal portion encompasses the remainder of the
3121:   local matrix (a rectangular submatrix).

3123:   The user can specify preallocated storage for the diagonal part of
3124:   the local submatrix with either d_nz or d_nnz (not both).  Set
3125:   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3126:   memory allocation.  Likewise, specify preallocated storage for the
3127:   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).

3129:   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3130:   the figure below we depict these three local rows and all columns (0-11).

3132: .vb
3133:            0 1 2 3 4 5 6 7 8 9 10 11
3134:           --------------------------
3135:    row 3  |o o o d d d o o o o  o  o
3136:    row 4  |o o o d d d o o o o  o  o
3137:    row 5  |o o o d d d o o o o  o  o
3138:           --------------------------
3139: .ve

3141:   Thus, any entries in the d locations are stored in the d (diagonal)
3142:   submatrix, and any entries in the o locations are stored in the
3143:   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3144:   stored simply in the `MATSEQBAIJ` format for compressed row storage.

3146:   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3147:   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3148:   In general, for PDE problems in which most nonzeros are near the diagonal,
3149:   one expects `d_nz` >> `o_nz`.

3151: .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3152: @*/
3153: PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3154: {
3155:   PetscMPIInt size;

3157:   PetscFunctionBegin;
3158:   PetscCall(MatCreate(comm, A));
3159:   PetscCall(MatSetSizes(*A, m, n, M, N));
3160:   PetscCallMPI(MPI_Comm_size(comm, &size));
3161:   if (size > 1) {
3162:     PetscCall(MatSetType(*A, MATMPIBAIJ));
3163:     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3164:   } else {
3165:     PetscCall(MatSetType(*A, MATSEQBAIJ));
3166:     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3167:   }
3168:   PetscFunctionReturn(PETSC_SUCCESS);
3169: }

3171: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3172: {
3173:   Mat          mat;
3174:   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3175:   PetscInt     len = 0;

3177:   PetscFunctionBegin;
3178:   *newmat = NULL;
3179:   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3180:   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3181:   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));

3183:   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3184:   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3185:   if (matin->hash_active) {
3186:     PetscCall(MatSetUp(mat));
3187:   } else {
3188:     mat->factortype   = matin->factortype;
3189:     mat->preallocated = PETSC_TRUE;
3190:     mat->assembled    = PETSC_TRUE;
3191:     mat->insertmode   = NOT_SET_VALUES;

3193:     a             = (Mat_MPIBAIJ *)mat->data;
3194:     mat->rmap->bs = matin->rmap->bs;
3195:     a->bs2        = oldmat->bs2;
3196:     a->mbs        = oldmat->mbs;
3197:     a->nbs        = oldmat->nbs;
3198:     a->Mbs        = oldmat->Mbs;
3199:     a->Nbs        = oldmat->Nbs;

3201:     a->size         = oldmat->size;
3202:     a->rank         = oldmat->rank;
3203:     a->donotstash   = oldmat->donotstash;
3204:     a->roworiented  = oldmat->roworiented;
3205:     a->rowindices   = NULL;
3206:     a->rowvalues    = NULL;
3207:     a->getrowactive = PETSC_FALSE;
3208:     a->barray       = NULL;
3209:     a->rstartbs     = oldmat->rstartbs;
3210:     a->rendbs       = oldmat->rendbs;
3211:     a->cstartbs     = oldmat->cstartbs;
3212:     a->cendbs       = oldmat->cendbs;

3214:     /* hash table stuff */
3215:     a->ht           = NULL;
3216:     a->hd           = NULL;
3217:     a->ht_size      = 0;
3218:     a->ht_flag      = oldmat->ht_flag;
3219:     a->ht_fact      = oldmat->ht_fact;
3220:     a->ht_total_ct  = 0;
3221:     a->ht_insert_ct = 0;

3223:     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3224:     if (oldmat->colmap) {
3225: #if defined(PETSC_USE_CTABLE)
3226:       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3227: #else
3228:       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3229:       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3230: #endif
3231:     } else a->colmap = NULL;

3233:     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
3234:       PetscCall(PetscMalloc1(len, &a->garray));
3235:       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3236:     } else a->garray = NULL;

3238:     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3239:     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3240:     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));

3242:     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3243:     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3244:   }
3245:   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3246:   *newmat = mat;
3247:   PetscFunctionReturn(PETSC_SUCCESS);
3248: }

3250: /* Used for both MPIBAIJ and MPISBAIJ matrices */
3251: PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3252: {
3253:   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3254:   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3255:   PetscScalar *matvals;

3257:   PetscFunctionBegin;
3258:   PetscCall(PetscViewerSetUp(viewer));

3260:   /* read in matrix header */
3261:   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3262:   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3263:   M  = header[1];
3264:   N  = header[2];
3265:   nz = header[3];
3266:   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3267:   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3268:   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");

3270:   /* set block sizes from the viewer's .info file */
3271:   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3272:   /* set local sizes if not set already */
3273:   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3274:   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3275:   /* set global sizes if not set already */
3276:   if (mat->rmap->N < 0) mat->rmap->N = M;
3277:   if (mat->cmap->N < 0) mat->cmap->N = N;
3278:   PetscCall(PetscLayoutSetUp(mat->rmap));
3279:   PetscCall(PetscLayoutSetUp(mat->cmap));

3281:   /* check if the matrix sizes are correct */
3282:   PetscCall(MatGetSize(mat, &rows, &cols));
3283:   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3284:   PetscCall(MatGetBlockSize(mat, &bs));
3285:   PetscCall(MatGetLocalSize(mat, &m, &n));
3286:   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3287:   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3288:   mbs = m / bs;
3289:   nbs = n / bs;

3291:   /* read in row lengths and build row indices */
3292:   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3293:   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3294:   rowidxs[0] = 0;
3295:   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3296:   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3297:   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);

3299:   /* read in column indices and matrix values */
3300:   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3301:   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3302:   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));

3304:   {                /* preallocate matrix storage */
3305:     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3306:     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3307:     PetscBool  sbaij, done;
3308:     PetscInt  *d_nnz, *o_nnz;

3310:     PetscCall(PetscBTCreate(nbs, &bt));
3311:     PetscCall(PetscHSetICreate(&ht));
3312:     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3313:     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3314:     for (i = 0; i < mbs; i++) {
3315:       PetscCall(PetscBTMemzero(nbs, bt));
3316:       PetscCall(PetscHSetIClear(ht));
3317:       for (k = 0; k < bs; k++) {
3318:         PetscInt row = bs * i + k;
3319:         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3320:           PetscInt col = colidxs[j];
3321:           if (!sbaij || col >= row) {
3322:             if (col >= cs && col < ce) {
3323:               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3324:             } else {
3325:               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3326:               if (done) o_nnz[i]++;
3327:             }
3328:           }
3329:         }
3330:       }
3331:     }
3332:     PetscCall(PetscBTDestroy(&bt));
3333:     PetscCall(PetscHSetIDestroy(&ht));
3334:     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3335:     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3336:     PetscCall(PetscFree2(d_nnz, o_nnz));
3337:   }

3339:   /* store matrix values */
3340:   for (i = 0; i < m; i++) {
3341:     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3342:     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3343:   }

3345:   PetscCall(PetscFree(rowidxs));
3346:   PetscCall(PetscFree2(colidxs, matvals));
3347:   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3348:   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3349:   PetscFunctionReturn(PETSC_SUCCESS);
3350: }

3352: PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3353: {
3354:   PetscBool isbinary;

3356:   PetscFunctionBegin;
3357:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3358:   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3359:   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3360:   PetscFunctionReturn(PETSC_SUCCESS);
3361: }

3363: /*@
3364:   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table

3366:   Input Parameters:
3367: + mat  - the matrix
3368: - fact - factor

3370:   Options Database Key:
3371: . -mat_use_hash_table <fact> - provide the factor

3373:   Level: advanced

3375: .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3376: @*/
3377: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3378: {
3379:   PetscFunctionBegin;
3380:   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3381:   PetscFunctionReturn(PETSC_SUCCESS);
3382: }

3384: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3385: {
3386:   Mat_MPIBAIJ *baij;

3388:   PetscFunctionBegin;
3389:   baij          = (Mat_MPIBAIJ *)mat->data;
3390:   baij->ht_fact = fact;
3391:   PetscFunctionReturn(PETSC_SUCCESS);
3392: }

3394: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3395: {
3396:   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3397:   PetscBool    flg;

3399:   PetscFunctionBegin;
3400:   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3401:   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3402:   if (Ad) *Ad = a->A;
3403:   if (Ao) *Ao = a->B;
3404:   if (colmap) *colmap = a->garray;
3405:   PetscFunctionReturn(PETSC_SUCCESS);
3406: }

3408: /*
3409:     Special version for direct calls from Fortran (to eliminate two function call overheads
3410: */
3411: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3412:   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3413: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3414:   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3415: #endif

3417: // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3418: /*@C
3419:   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`

3421:   Collective

3423:   Input Parameters:
3424: + matin  - the matrix
3425: . min    - number of input rows
3426: . im     - input rows
3427: . nin    - number of input columns
3428: . in     - input columns
3429: . v      - numerical values input
3430: - addvin - `INSERT_VALUES` or `ADD_VALUES`

3432:   Level: advanced

3434:   Developer Notes:
3435:   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.

3437: .seealso: `Mat`, `MatSetValuesBlocked()`
3438: @*/
3439: PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3440: {
3441:   /* convert input arguments to C version */
3442:   Mat        mat = *matin;
3443:   PetscInt   m = *min, n = *nin;
3444:   InsertMode addv = *addvin;

3446:   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3447:   const MatScalar *value;
3448:   MatScalar       *barray      = baij->barray;
3449:   PetscBool        roworiented = baij->roworiented;
3450:   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3451:   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3452:   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;

3454:   PetscFunctionBegin;
3455:   /* tasks normally handled by MatSetValuesBlocked() */
3456:   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3457:   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3458:   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3459:   if (mat->assembled) {
3460:     mat->was_assembled = PETSC_TRUE;
3461:     mat->assembled     = PETSC_FALSE;
3462:   }
3463:   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));

3465:   if (!barray) {
3466:     PetscCall(PetscMalloc1(bs2, &barray));
3467:     baij->barray = barray;
3468:   }

3470:   if (roworiented) stepval = (n - 1) * bs;
3471:   else stepval = (m - 1) * bs;

3473:   for (i = 0; i < m; i++) {
3474:     if (im[i] < 0) continue;
3475:     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3476:     if (im[i] >= rstart && im[i] < rend) {
3477:       row = im[i] - rstart;
3478:       for (j = 0; j < n; j++) {
3479:         /* If NumCol = 1 then a copy is not required */
3480:         if ((roworiented) && (n == 1)) {
3481:           barray = (MatScalar *)v + i * bs2;
3482:         } else if ((!roworiented) && (m == 1)) {
3483:           barray = (MatScalar *)v + j * bs2;
3484:         } else { /* Here a copy is required */
3485:           if (roworiented) {
3486:             value = v + i * (stepval + bs) * bs + j * bs;
3487:           } else {
3488:             value = v + j * (stepval + bs) * bs + i * bs;
3489:           }
3490:           for (ii = 0; ii < bs; ii++, value += stepval) {
3491:             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3492:           }
3493:           barray -= bs2;
3494:         }

3496:         if (in[j] >= cstart && in[j] < cend) {
3497:           col = in[j] - cstart;
3498:           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3499:         } else if (in[j] < 0) {
3500:           continue;
3501:         } else {
3502:           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3503:           if (mat->was_assembled) {
3504:             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));

3506: #if defined(PETSC_USE_DEBUG)
3507:   #if defined(PETSC_USE_CTABLE)
3508:             {
3509:               PetscInt data;
3510:               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3511:               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3512:             }
3513:   #else
3514:             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3515:   #endif
3516: #endif
3517: #if defined(PETSC_USE_CTABLE)
3518:             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3519:             col = (col - 1) / bs;
3520: #else
3521:             col = (baij->colmap[in[j]] - 1) / bs;
3522: #endif
3523:             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
3524:               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3525:               col = in[j];
3526:             }
3527:           } else col = in[j];
3528:           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3529:         }
3530:       }
3531:     } else {
3532:       if (!baij->donotstash) {
3533:         if (roworiented) {
3534:           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3535:         } else {
3536:           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3537:         }
3538:       }
3539:     }
3540:   }

3542:   /* task normally handled by MatSetValuesBlocked() */
3543:   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3544:   PetscFunctionReturn(PETSC_SUCCESS);
3545: }

3547: /*@
3548:   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3549:   CSR format the local rows.

3551:   Collective

3553:   Input Parameters:
3554: + comm - MPI communicator
3555: . bs   - the block size, only a block size of 1 is supported
3556: . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3557: . n    - This value should be the same as the local size used in creating the
3558:        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3559:        calculated if N is given) For square matrices n is almost always m.
3560: . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3561: . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3562: . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3563: . j    - column indices
3564: - a    - matrix values

3566:   Output Parameter:
3567: . mat - the matrix

3569:   Level: intermediate

3571:   Notes:
3572:   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3573:   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3574:   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.

3576:   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3577:   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3578:   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3579:   with column-major ordering within blocks.

3581:   The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array.

3583: .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3584:           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3585: @*/
3586: PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3587: {
3588:   PetscFunctionBegin;
3589:   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3590:   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3591:   PetscCall(MatCreate(comm, mat));
3592:   PetscCall(MatSetSizes(*mat, m, n, M, N));
3593:   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3594:   PetscCall(MatSetBlockSize(*mat, bs));
3595:   PetscCall(MatSetUp(*mat));
3596:   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3597:   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3598:   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3599:   PetscFunctionReturn(PETSC_SUCCESS);
3600: }

3602: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3603: {
3604:   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3605:   PetscInt    *indx;
3606:   PetscScalar *values;

3608:   PetscFunctionBegin;
3609:   PetscCall(MatGetSize(inmat, &m, &N));
3610:   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3611:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3612:     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3613:     PetscInt    *bindx, rmax = a->rmax, j;
3614:     PetscMPIInt  rank, size;

3616:     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3617:     mbs = m / bs;
3618:     Nbs = N / cbs;
3619:     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3620:     nbs = n / cbs;

3622:     PetscCall(PetscMalloc1(rmax, &bindx));
3623:     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */

3625:     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3626:     PetscCallMPI(MPI_Comm_rank(comm, &size));
3627:     if (rank == size - 1) {
3628:       /* Check sum(nbs) = Nbs */
3629:       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3630:     }

3632:     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3633:     for (i = 0; i < mbs; i++) {
3634:       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3635:       nnz = nnz / bs;
3636:       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3637:       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3638:       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3639:     }
3640:     PetscCall(PetscFree(bindx));

3642:     PetscCall(MatCreate(comm, outmat));
3643:     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3644:     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3645:     PetscCall(MatSetType(*outmat, MATBAIJ));
3646:     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3647:     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3648:     MatPreallocateEnd(dnz, onz);
3649:     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3650:   }

3652:   /* numeric phase */
3653:   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3654:   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));

3656:   for (i = 0; i < m; i++) {
3657:     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3658:     Ii = i + rstart;
3659:     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3660:     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3661:   }
3662:   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3663:   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3664:   PetscFunctionReturn(PETSC_SUCCESS);
3665: }