Actual source code: blockmat.c
1: /*
2: This provides a matrix that consists of Mats
3: */
5: #include <petsc/private/matimpl.h>
6: #include <../src/mat/impls/baij/seq/baij.h>
8: typedef struct {
9: SEQAIJHEADER(Mat);
10: SEQBAIJHEADER;
11: Mat *diags;
13: Vec left, right, middle, workb; /* dummy vectors to perform local parts of product */
14: } Mat_BlockMat;
16: static PetscErrorCode MatSOR_BlockMat_Symmetric(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
17: {
18: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
19: PetscScalar *x;
20: const Mat *v;
21: const PetscScalar *b;
22: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
23: const PetscInt *idx;
24: IS row, col;
25: MatFactorInfo info;
26: Vec left = a->left, right = a->right, middle = a->middle;
27: Mat *diag;
29: PetscFunctionBegin;
30: its = its * lits;
31: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
32: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
33: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
34: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
35: PetscCheck(!((flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) && !(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)), PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot do backward sweep without forward sweep");
37: if (!a->diags) {
38: PetscCall(PetscMalloc1(mbs, &a->diags));
39: PetscCall(MatFactorInfoInitialize(&info));
40: for (i = 0; i < mbs; i++) {
41: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
42: PetscCall(MatCholeskyFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, &info));
43: PetscCall(MatCholeskyFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
44: PetscCall(ISDestroy(&row));
45: PetscCall(ISDestroy(&col));
46: }
47: PetscCall(VecDuplicate(bb, &a->workb));
48: }
49: diag = a->diags;
51: PetscCall(VecSet(xx, 0.0));
52: PetscCall(VecGetArray(xx, &x));
53: /* copy right-hand side because it must be modified during iteration */
54: PetscCall(VecCopy(bb, a->workb));
55: PetscCall(VecGetArrayRead(a->workb, &b));
57: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
58: while (its--) {
59: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
60: for (i = 0; i < mbs; i++) {
61: n = a->i[i + 1] - a->i[i] - 1;
62: idx = a->j + a->i[i] + 1;
63: v = a->a + a->i[i] + 1;
65: PetscCall(VecSet(left, 0.0));
66: for (j = 0; j < n; j++) {
67: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
68: PetscCall(MatMultAdd(v[j], right, left, left));
69: PetscCall(VecResetArray(right));
70: }
71: PetscCall(VecPlaceArray(right, b + i * bs));
72: PetscCall(VecAYPX(left, -1.0, right));
73: PetscCall(VecResetArray(right));
75: PetscCall(VecPlaceArray(right, x + i * bs));
76: PetscCall(MatSolve(diag[i], left, right));
78: /* now adjust right-hand side, see MatSOR_SeqSBAIJ */
79: for (j = 0; j < n; j++) {
80: PetscCall(MatMultTranspose(v[j], right, left));
81: PetscCall(VecPlaceArray(middle, b + idx[j] * bs));
82: PetscCall(VecAXPY(middle, -1.0, left));
83: PetscCall(VecResetArray(middle));
84: }
85: PetscCall(VecResetArray(right));
86: }
87: }
88: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
89: for (i = mbs - 1; i >= 0; i--) {
90: n = a->i[i + 1] - a->i[i] - 1;
91: idx = a->j + a->i[i] + 1;
92: v = a->a + a->i[i] + 1;
94: PetscCall(VecSet(left, 0.0));
95: for (j = 0; j < n; j++) {
96: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
97: PetscCall(MatMultAdd(v[j], right, left, left));
98: PetscCall(VecResetArray(right));
99: }
100: PetscCall(VecPlaceArray(right, b + i * bs));
101: PetscCall(VecAYPX(left, -1.0, right));
102: PetscCall(VecResetArray(right));
104: PetscCall(VecPlaceArray(right, x + i * bs));
105: PetscCall(MatSolve(diag[i], left, right));
106: PetscCall(VecResetArray(right));
107: }
108: }
109: }
110: PetscCall(VecRestoreArray(xx, &x));
111: PetscCall(VecRestoreArrayRead(a->workb, &b));
112: PetscFunctionReturn(PETSC_SUCCESS);
113: }
115: static PetscErrorCode MatSOR_BlockMat(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
116: {
117: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
118: PetscScalar *x;
119: const Mat *v;
120: const PetscScalar *b;
121: PetscInt n = A->cmap->n, i, mbs = n / A->rmap->bs, j, bs = A->rmap->bs;
122: const PetscInt *idx;
123: IS row, col;
124: MatFactorInfo info;
125: Vec left = a->left, right = a->right;
126: Mat *diag;
128: PetscFunctionBegin;
129: its = its * lits;
130: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
131: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
132: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for omega not equal to 1.0");
133: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for fshift");
135: if (!a->diags) {
136: PetscCall(PetscMalloc1(mbs, &a->diags));
137: PetscCall(MatFactorInfoInitialize(&info));
138: for (i = 0; i < mbs; i++) {
139: PetscCall(MatGetOrdering(a->a[a->diag[i]], MATORDERINGND, &row, &col));
140: PetscCall(MatLUFactorSymbolic(a->diags[i], a->a[a->diag[i]], row, col, &info));
141: PetscCall(MatLUFactorNumeric(a->diags[i], a->a[a->diag[i]], &info));
142: PetscCall(ISDestroy(&row));
143: PetscCall(ISDestroy(&col));
144: }
145: }
146: diag = a->diags;
148: PetscCall(VecSet(xx, 0.0));
149: PetscCall(VecGetArray(xx, &x));
150: PetscCall(VecGetArrayRead(bb, &b));
152: /* need to add code for when initial guess is zero, see MatSOR_SeqAIJ */
153: while (its--) {
154: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
155: for (i = 0; i < mbs; i++) {
156: n = a->i[i + 1] - a->i[i];
157: idx = a->j + a->i[i];
158: v = a->a + a->i[i];
160: PetscCall(VecSet(left, 0.0));
161: for (j = 0; j < n; j++) {
162: if (idx[j] != i) {
163: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
164: PetscCall(MatMultAdd(v[j], right, left, left));
165: PetscCall(VecResetArray(right));
166: }
167: }
168: PetscCall(VecPlaceArray(right, b + i * bs));
169: PetscCall(VecAYPX(left, -1.0, right));
170: PetscCall(VecResetArray(right));
172: PetscCall(VecPlaceArray(right, x + i * bs));
173: PetscCall(MatSolve(diag[i], left, right));
174: PetscCall(VecResetArray(right));
175: }
176: }
177: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
178: for (i = mbs - 1; i >= 0; i--) {
179: n = a->i[i + 1] - a->i[i];
180: idx = a->j + a->i[i];
181: v = a->a + a->i[i];
183: PetscCall(VecSet(left, 0.0));
184: for (j = 0; j < n; j++) {
185: if (idx[j] != i) {
186: PetscCall(VecPlaceArray(right, x + idx[j] * bs));
187: PetscCall(MatMultAdd(v[j], right, left, left));
188: PetscCall(VecResetArray(right));
189: }
190: }
191: PetscCall(VecPlaceArray(right, b + i * bs));
192: PetscCall(VecAYPX(left, -1.0, right));
193: PetscCall(VecResetArray(right));
195: PetscCall(VecPlaceArray(right, x + i * bs));
196: PetscCall(MatSolve(diag[i], left, right));
197: PetscCall(VecResetArray(right));
198: }
199: }
200: }
201: PetscCall(VecRestoreArray(xx, &x));
202: PetscCall(VecRestoreArrayRead(bb, &b));
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: static PetscErrorCode MatSetValues_BlockMat(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
207: {
208: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
209: PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
210: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
211: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
212: PetscInt ridx, cidx;
213: PetscBool roworiented = a->roworiented;
214: MatScalar value;
215: Mat *ap, *aa = a->a;
217: PetscFunctionBegin;
218: for (k = 0; k < m; k++) { /* loop over added rows */
219: row = im[k];
220: brow = row / bs;
221: if (row < 0) continue;
222: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
223: rp = aj + ai[brow];
224: ap = aa + ai[brow];
225: rmax = imax[brow];
226: nrow = ailen[brow];
227: low = 0;
228: high = nrow;
229: for (l = 0; l < n; l++) { /* loop over added columns */
230: if (in[l] < 0) continue;
231: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
232: col = in[l];
233: bcol = col / bs;
234: if (A->symmetric == PETSC_BOOL3_TRUE && brow > bcol) continue;
235: ridx = row % bs;
236: cidx = col % bs;
237: if (roworiented) value = v[l + k * n];
238: else value = v[k + l * m];
240: if (col <= lastcol) low = 0;
241: else high = nrow;
242: lastcol = col;
243: while (high - low > 7) {
244: t = (low + high) / 2;
245: if (rp[t] > bcol) high = t;
246: else low = t;
247: }
248: for (i = low; i < high; i++) {
249: if (rp[i] > bcol) break;
250: if (rp[i] == bcol) goto noinsert1;
251: }
252: if (nonew == 1) goto noinsert1;
253: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
254: MatSeqXAIJReallocateAIJ(A, a->mbs, 1, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, Mat);
255: N = nrow++ - 1;
256: high++;
257: /* shift up all the later entries in this row */
258: for (ii = N; ii >= i; ii--) {
259: rp[ii + 1] = rp[ii];
260: ap[ii + 1] = ap[ii];
261: }
262: if (N >= i) ap[i] = NULL;
263: rp[i] = bcol;
264: a->nz++;
265: noinsert1:;
266: if (!*(ap + i)) PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, NULL, ap + i));
267: PetscCall(MatSetValues(ap[i], 1, &ridx, 1, &cidx, &value, is));
268: low = i;
269: }
270: ailen[brow] = nrow;
271: }
272: PetscFunctionReturn(PETSC_SUCCESS);
273: }
275: static PetscErrorCode MatLoad_BlockMat(Mat newmat, PetscViewer viewer)
276: {
277: Mat tmpA;
278: PetscInt i, j, m, n, bs = 1, ncols, *lens, currentcol, mbs, **ii, *ilens, nextcol, *llens, cnt = 0;
279: const PetscInt *cols;
280: const PetscScalar *values;
281: PetscBool flg = PETSC_FALSE, notdone;
282: Mat_SeqAIJ *a;
283: Mat_BlockMat *amat;
285: PetscFunctionBegin;
286: /* force binary viewer to load .info file if it has not yet done so */
287: PetscCall(PetscViewerSetUp(viewer));
288: PetscCall(MatCreate(PETSC_COMM_SELF, &tmpA));
289: PetscCall(MatSetType(tmpA, MATSEQAIJ));
290: PetscCall(MatLoad_SeqAIJ(tmpA, viewer));
292: PetscCall(MatGetLocalSize(tmpA, &m, &n));
293: PetscOptionsBegin(PETSC_COMM_SELF, NULL, "Options for loading BlockMat matrix 1", "Mat");
294: PetscCall(PetscOptionsInt("-matload_block_size", "Set the blocksize used to store the matrix", "MatLoad", bs, &bs, NULL));
295: PetscCall(PetscOptionsBool("-matload_symmetric", "Store the matrix as symmetric", "MatLoad", flg, &flg, NULL));
296: PetscOptionsEnd();
298: /* Determine number of nonzero blocks for each block row */
299: a = (Mat_SeqAIJ *)tmpA->data;
300: mbs = m / bs;
301: PetscCall(PetscMalloc3(mbs, &lens, bs, &ii, bs, &ilens));
302: PetscCall(PetscArrayzero(lens, mbs));
304: for (i = 0; i < mbs; i++) {
305: for (j = 0; j < bs; j++) {
306: ii[j] = a->j + a->i[i * bs + j];
307: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
308: }
310: currentcol = -1;
311: while (PETSC_TRUE) {
312: notdone = PETSC_FALSE;
313: nextcol = 1000000000;
314: for (j = 0; j < bs; j++) {
315: while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) {
316: ii[j]++;
317: ilens[j]--;
318: }
319: if (ilens[j] > 0) {
320: notdone = PETSC_TRUE;
321: nextcol = PetscMin(nextcol, ii[j][0] / bs);
322: }
323: }
324: if (!notdone) break;
325: if (!flg || (nextcol >= i)) lens[i]++;
326: currentcol = nextcol;
327: }
328: }
330: if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) PetscCall(MatSetSizes(newmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
331: PetscCall(MatBlockMatSetPreallocation(newmat, bs, 0, lens));
332: if (flg) PetscCall(MatSetOption(newmat, MAT_SYMMETRIC, PETSC_TRUE));
333: amat = (Mat_BlockMat *)(newmat)->data;
335: /* preallocate the submatrices */
336: PetscCall(PetscMalloc1(bs, &llens));
337: for (i = 0; i < mbs; i++) { /* loops for block rows */
338: for (j = 0; j < bs; j++) {
339: ii[j] = a->j + a->i[i * bs + j];
340: ilens[j] = a->i[i * bs + j + 1] - a->i[i * bs + j];
341: }
343: currentcol = 1000000000;
344: for (j = 0; j < bs; j++) { /* loop over rows in block finding first nonzero block */
345: if (ilens[j] > 0) currentcol = PetscMin(currentcol, ii[j][0] / bs);
346: }
348: while (PETSC_TRUE) { /* loops over blocks in block row */
349: notdone = PETSC_FALSE;
350: nextcol = 1000000000;
351: PetscCall(PetscArrayzero(llens, bs));
352: for (j = 0; j < bs; j++) { /* loop over rows in block */
353: while (ilens[j] > 0 && ii[j][0] / bs <= currentcol) { /* loop over columns in row */
354: ii[j]++;
355: ilens[j]--;
356: llens[j]++;
357: }
358: if (ilens[j] > 0) {
359: notdone = PETSC_TRUE;
360: nextcol = PetscMin(nextcol, ii[j][0] / bs);
361: }
362: }
363: PetscCheck(cnt < amat->maxnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of blocks found greater than expected %" PetscInt_FMT, cnt);
364: if (!flg || currentcol >= i) {
365: amat->j[cnt] = currentcol;
366: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, bs, bs, 0, llens, amat->a + cnt++));
367: }
369: if (!notdone) break;
370: currentcol = nextcol;
371: }
372: amat->ilen[i] = lens[i];
373: }
375: PetscCall(PetscFree3(lens, ii, ilens));
376: PetscCall(PetscFree(llens));
378: /* copy over the matrix, one row at a time */
379: for (i = 0; i < m; i++) {
380: PetscCall(MatGetRow(tmpA, i, &ncols, &cols, &values));
381: PetscCall(MatSetValues(newmat, 1, &i, ncols, cols, values, INSERT_VALUES));
382: PetscCall(MatRestoreRow(tmpA, i, &ncols, &cols, &values));
383: }
384: PetscCall(MatAssemblyBegin(newmat, MAT_FINAL_ASSEMBLY));
385: PetscCall(MatAssemblyEnd(newmat, MAT_FINAL_ASSEMBLY));
386: PetscFunctionReturn(PETSC_SUCCESS);
387: }
389: static PetscErrorCode MatView_BlockMat(Mat A, PetscViewer viewer)
390: {
391: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
392: const char *name;
393: PetscViewerFormat format;
395: PetscFunctionBegin;
396: PetscCall(PetscObjectGetName((PetscObject)A, &name));
397: PetscCall(PetscViewerGetFormat(viewer, &format));
398: if (format == PETSC_VIEWER_ASCII_FACTOR_INFO || format == PETSC_VIEWER_ASCII_INFO) {
399: PetscCall(PetscViewerASCIIPrintf(viewer, "Nonzero block matrices = %" PetscInt_FMT " \n", a->nz));
400: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(PetscViewerASCIIPrintf(viewer, "Only upper triangular part of symmetric matrix is stored\n"));
401: }
402: PetscFunctionReturn(PETSC_SUCCESS);
403: }
405: static PetscErrorCode MatDestroy_BlockMat(Mat mat)
406: {
407: Mat_BlockMat *bmat = (Mat_BlockMat *)mat->data;
408: PetscInt i;
410: PetscFunctionBegin;
411: PetscCall(VecDestroy(&bmat->right));
412: PetscCall(VecDestroy(&bmat->left));
413: PetscCall(VecDestroy(&bmat->middle));
414: PetscCall(VecDestroy(&bmat->workb));
415: if (bmat->diags) {
416: for (i = 0; i < mat->rmap->n / mat->rmap->bs; i++) PetscCall(MatDestroy(&bmat->diags[i]));
417: }
418: if (bmat->a) {
419: for (i = 0; i < bmat->nz; i++) PetscCall(MatDestroy(&bmat->a[i]));
420: }
421: PetscCall(MatSeqXAIJFreeAIJ(mat, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
422: PetscCall(PetscFree(mat->data));
423: PetscFunctionReturn(PETSC_SUCCESS);
424: }
426: static PetscErrorCode MatMult_BlockMat(Mat A, Vec x, Vec y)
427: {
428: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
429: PetscScalar *xx, *yy;
430: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
431: Mat *aa;
433: PetscFunctionBegin;
434: /*
435: Standard CSR multiply except each entry is a Mat
436: */
437: PetscCall(VecGetArray(x, &xx));
439: PetscCall(VecSet(y, 0.0));
440: PetscCall(VecGetArray(y, &yy));
441: aj = bmat->j;
442: aa = bmat->a;
443: ii = bmat->i;
444: for (i = 0; i < m; i++) {
445: jrow = ii[i];
446: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
447: n = ii[i + 1] - jrow;
448: for (j = 0; j < n; j++) {
449: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
450: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
451: PetscCall(VecResetArray(bmat->right));
452: jrow++;
453: }
454: PetscCall(VecResetArray(bmat->left));
455: }
456: PetscCall(VecRestoreArray(x, &xx));
457: PetscCall(VecRestoreArray(y, &yy));
458: PetscFunctionReturn(PETSC_SUCCESS);
459: }
461: static PetscErrorCode MatMult_BlockMat_Symmetric(Mat A, Vec x, Vec y)
462: {
463: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
464: PetscScalar *xx, *yy;
465: PetscInt *aj, i, *ii, jrow, m = A->rmap->n / A->rmap->bs, bs = A->rmap->bs, n, j;
466: Mat *aa;
468: PetscFunctionBegin;
469: /*
470: Standard CSR multiply except each entry is a Mat
471: */
472: PetscCall(VecGetArray(x, &xx));
474: PetscCall(VecSet(y, 0.0));
475: PetscCall(VecGetArray(y, &yy));
476: aj = bmat->j;
477: aa = bmat->a;
478: ii = bmat->i;
479: for (i = 0; i < m; i++) {
480: jrow = ii[i];
481: n = ii[i + 1] - jrow;
482: PetscCall(VecPlaceArray(bmat->left, yy + bs * i));
483: PetscCall(VecPlaceArray(bmat->middle, xx + bs * i));
484: /* if we ALWAYS required a diagonal entry then could remove this if test */
485: if (aj[jrow] == i) {
486: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow]));
487: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
488: PetscCall(VecResetArray(bmat->right));
489: jrow++;
490: n--;
491: }
492: for (j = 0; j < n; j++) {
493: PetscCall(VecPlaceArray(bmat->right, xx + bs * aj[jrow])); /* upper triangular part */
494: PetscCall(MatMultAdd(aa[jrow], bmat->right, bmat->left, bmat->left));
495: PetscCall(VecResetArray(bmat->right));
497: PetscCall(VecPlaceArray(bmat->right, yy + bs * aj[jrow])); /* lower triangular part */
498: PetscCall(MatMultTransposeAdd(aa[jrow], bmat->middle, bmat->right, bmat->right));
499: PetscCall(VecResetArray(bmat->right));
500: jrow++;
501: }
502: PetscCall(VecResetArray(bmat->left));
503: PetscCall(VecResetArray(bmat->middle));
504: }
505: PetscCall(VecRestoreArray(x, &xx));
506: PetscCall(VecRestoreArray(y, &yy));
507: PetscFunctionReturn(PETSC_SUCCESS);
508: }
510: static PetscErrorCode MatMultAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
511: {
512: PetscFunctionBegin;
513: PetscFunctionReturn(PETSC_SUCCESS);
514: }
516: static PetscErrorCode MatMultTranspose_BlockMat(Mat A, Vec x, Vec y)
517: {
518: PetscFunctionBegin;
519: PetscFunctionReturn(PETSC_SUCCESS);
520: }
522: static PetscErrorCode MatMultTransposeAdd_BlockMat(Mat A, Vec x, Vec y, Vec z)
523: {
524: PetscFunctionBegin;
525: PetscFunctionReturn(PETSC_SUCCESS);
526: }
528: /*
529: Adds diagonal pointers to sparse matrix structure.
530: */
531: static PetscErrorCode MatMarkDiagonal_BlockMat(Mat A)
532: {
533: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
534: PetscInt i, j, mbs = A->rmap->n / A->rmap->bs;
536: PetscFunctionBegin;
537: if (!a->diag) PetscCall(PetscMalloc1(mbs, &a->diag));
538: for (i = 0; i < mbs; i++) {
539: a->diag[i] = a->i[i + 1];
540: for (j = a->i[i]; j < a->i[i + 1]; j++) {
541: if (a->j[j] == i) {
542: a->diag[i] = j;
543: break;
544: }
545: }
546: }
547: PetscFunctionReturn(PETSC_SUCCESS);
548: }
550: static PetscErrorCode MatCreateSubMatrix_BlockMat(Mat A, IS isrow, IS iscol, MatReuse scall, Mat *B)
551: {
552: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
553: Mat_SeqAIJ *c;
554: PetscInt i, k, first, step, lensi, nrows, ncols;
555: PetscInt *j_new, *i_new, *aj = a->j, *ailen = a->ilen;
556: PetscScalar *a_new;
557: Mat C, *aa = a->a;
558: PetscBool stride, equal;
560: PetscFunctionBegin;
561: PetscCall(ISEqual(isrow, iscol, &equal));
562: PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for identical column and row indices");
563: PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &stride));
564: PetscCheck(stride, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only for stride indices");
565: PetscCall(ISStrideGetInfo(iscol, &first, &step));
566: PetscCheck(step == A->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_SUP, "Can only select one entry from each block");
568: PetscCall(ISGetLocalSize(isrow, &nrows));
569: ncols = nrows;
571: /* create submatrix */
572: if (scall == MAT_REUSE_MATRIX) {
573: PetscInt n_cols, n_rows;
574: C = *B;
575: PetscCall(MatGetSize(C, &n_rows, &n_cols));
576: PetscCheck(n_rows == nrows && n_cols == ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Reused submatrix wrong size");
577: PetscCall(MatZeroEntries(C));
578: } else {
579: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
580: PetscCall(MatSetSizes(C, nrows, ncols, PETSC_DETERMINE, PETSC_DETERMINE));
581: if (A->symmetric == PETSC_BOOL3_TRUE) PetscCall(MatSetType(C, MATSEQSBAIJ));
582: else PetscCall(MatSetType(C, MATSEQAIJ));
583: PetscCall(MatSeqAIJSetPreallocation(C, 0, ailen));
584: PetscCall(MatSeqSBAIJSetPreallocation(C, 1, 0, ailen));
585: }
586: c = (Mat_SeqAIJ *)C->data;
588: /* loop over rows inserting into submatrix */
589: a_new = c->a;
590: j_new = c->j;
591: i_new = c->i;
593: for (i = 0; i < nrows; i++) {
594: lensi = ailen[i];
595: for (k = 0; k < lensi; k++) {
596: *j_new++ = *aj++;
597: PetscCall(MatGetValue(*aa++, first, first, a_new++));
598: }
599: i_new[i + 1] = i_new[i] + lensi;
600: c->ilen[i] = lensi;
601: }
603: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
604: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
605: *B = C;
606: PetscFunctionReturn(PETSC_SUCCESS);
607: }
609: static PetscErrorCode MatAssemblyEnd_BlockMat(Mat A, MatAssemblyType mode)
610: {
611: Mat_BlockMat *a = (Mat_BlockMat *)A->data;
612: PetscInt fshift = 0, i, j, *ai = a->i, *aj = a->j, *imax = a->imax;
613: PetscInt m = a->mbs, *ip, N, *ailen = a->ilen, rmax = 0;
614: Mat *aa = a->a, *ap;
616: PetscFunctionBegin;
617: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
619: if (m) rmax = ailen[0]; /* determine row with most nonzeros */
620: for (i = 1; i < m; i++) {
621: /* move each row back by the amount of empty slots (fshift) before it*/
622: fshift += imax[i - 1] - ailen[i - 1];
623: rmax = PetscMax(rmax, ailen[i]);
624: if (fshift) {
625: ip = aj + ai[i];
626: ap = aa + ai[i];
627: N = ailen[i];
628: for (j = 0; j < N; j++) {
629: ip[j - fshift] = ip[j];
630: ap[j - fshift] = ap[j];
631: }
632: }
633: ai[i] = ai[i - 1] + ailen[i - 1];
634: }
635: if (m) {
636: fshift += imax[m - 1] - ailen[m - 1];
637: ai[m] = ai[m - 1] + ailen[m - 1];
638: }
639: /* reset ilen and imax for each row */
640: for (i = 0; i < m; i++) ailen[i] = imax[i] = ai[i + 1] - ai[i];
641: a->nz = ai[m];
642: for (i = 0; i < a->nz; i++) {
643: PetscAssert(aa[i], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Null matrix at location %" PetscInt_FMT " column %" PetscInt_FMT " nz %" PetscInt_FMT, i, aj[i], a->nz);
644: PetscCall(MatAssemblyBegin(aa[i], MAT_FINAL_ASSEMBLY));
645: PetscCall(MatAssemblyEnd(aa[i], MAT_FINAL_ASSEMBLY));
646: }
647: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded,%" PetscInt_FMT " used\n", m, A->cmap->n / A->cmap->bs, fshift, a->nz));
648: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues() is %" PetscInt_FMT "\n", a->reallocs));
649: PetscCall(PetscInfo(A, "Maximum nonzeros in any row is %" PetscInt_FMT "\n", rmax));
651: A->info.mallocs += a->reallocs;
652: a->reallocs = 0;
653: A->info.nz_unneeded = (double)fshift;
654: a->rmax = rmax;
655: PetscCall(MatMarkDiagonal_BlockMat(A));
656: PetscFunctionReturn(PETSC_SUCCESS);
657: }
659: static PetscErrorCode MatSetOption_BlockMat(Mat A, MatOption opt, PetscBool flg)
660: {
661: PetscFunctionBegin;
662: if (opt == MAT_SYMMETRIC && flg) {
663: A->ops->sor = MatSOR_BlockMat_Symmetric;
664: A->ops->mult = MatMult_BlockMat_Symmetric;
665: } else {
666: PetscCall(PetscInfo(A, "Unused matrix option %s\n", MatOptions[opt]));
667: }
668: PetscFunctionReturn(PETSC_SUCCESS);
669: }
671: static struct _MatOps MatOps_Values = {MatSetValues_BlockMat,
672: NULL,
673: NULL,
674: MatMult_BlockMat,
675: /* 4*/ MatMultAdd_BlockMat,
676: MatMultTranspose_BlockMat,
677: MatMultTransposeAdd_BlockMat,
678: NULL,
679: NULL,
680: NULL,
681: /* 10*/ NULL,
682: NULL,
683: NULL,
684: MatSOR_BlockMat,
685: NULL,
686: /* 15*/ NULL,
687: NULL,
688: NULL,
689: NULL,
690: NULL,
691: /* 20*/ NULL,
692: MatAssemblyEnd_BlockMat,
693: MatSetOption_BlockMat,
694: NULL,
695: /* 24*/ NULL,
696: NULL,
697: NULL,
698: NULL,
699: NULL,
700: /* 29*/ NULL,
701: NULL,
702: NULL,
703: NULL,
704: NULL,
705: /* 34*/ NULL,
706: NULL,
707: NULL,
708: NULL,
709: NULL,
710: /* 39*/ NULL,
711: NULL,
712: NULL,
713: NULL,
714: NULL,
715: /* 44*/ NULL,
716: NULL,
717: MatShift_Basic,
718: NULL,
719: NULL,
720: /* 49*/ NULL,
721: NULL,
722: NULL,
723: NULL,
724: NULL,
725: /* 54*/ NULL,
726: NULL,
727: NULL,
728: NULL,
729: NULL,
730: /* 59*/ MatCreateSubMatrix_BlockMat,
731: MatDestroy_BlockMat,
732: MatView_BlockMat,
733: NULL,
734: NULL,
735: /* 64*/ NULL,
736: NULL,
737: NULL,
738: NULL,
739: NULL,
740: /* 69*/ NULL,
741: NULL,
742: NULL,
743: NULL,
744: NULL,
745: /* 74*/ NULL,
746: NULL,
747: NULL,
748: NULL,
749: NULL,
750: /* 79*/ NULL,
751: NULL,
752: NULL,
753: NULL,
754: MatLoad_BlockMat,
755: /* 84*/ NULL,
756: NULL,
757: NULL,
758: NULL,
759: NULL,
760: /* 89*/ NULL,
761: NULL,
762: NULL,
763: NULL,
764: NULL,
765: /* 94*/ NULL,
766: NULL,
767: NULL,
768: NULL,
769: NULL,
770: /* 99*/ NULL,
771: NULL,
772: NULL,
773: NULL,
774: NULL,
775: /*104*/ NULL,
776: NULL,
777: NULL,
778: NULL,
779: NULL,
780: /*109*/ NULL,
781: NULL,
782: NULL,
783: NULL,
784: NULL,
785: /*114*/ NULL,
786: NULL,
787: NULL,
788: NULL,
789: NULL,
790: /*119*/ NULL,
791: NULL,
792: NULL,
793: NULL,
794: NULL,
795: /*124*/ NULL,
796: NULL,
797: NULL,
798: NULL,
799: NULL,
800: /*129*/ NULL,
801: NULL,
802: NULL,
803: NULL,
804: NULL,
805: /*134*/ NULL,
806: NULL,
807: NULL,
808: NULL,
809: NULL,
810: /*139*/ NULL,
811: NULL,
812: NULL,
813: NULL,
814: NULL,
815: /*144*/ NULL,
816: NULL,
817: NULL,
818: NULL,
819: NULL,
820: NULL,
821: /*150*/ NULL,
822: NULL,
823: NULL};
825: /*@C
826: MatBlockMatSetPreallocation - For good matrix assembly performance
827: the user should preallocate the matrix storage by setting the parameter nz
828: (or the array nnz). By setting these parameters accurately, performance
829: during matrix assembly can be increased by more than a factor of 50.
831: Collective
833: Input Parameters:
834: + B - The matrix
835: . bs - size of each block in matrix
836: . nz - number of nonzeros per block row (same for all rows)
837: - nnz - array containing the number of nonzeros in the various block rows
838: (possibly different for each row) or `NULL`
840: Level: intermediate
842: Notes:
843: If `nnz` is given then `nz` is ignored
845: Specify the preallocated storage with either `nz` or `nnz` (not both).
846: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
847: allocation.
849: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBlockMat()`, `MatSetValues()`
850: @*/
851: PetscErrorCode MatBlockMatSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
852: {
853: PetscFunctionBegin;
854: PetscTryMethod(B, "MatBlockMatSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
855: PetscFunctionReturn(PETSC_SUCCESS);
856: }
858: static PetscErrorCode MatBlockMatSetPreallocation_BlockMat(Mat A, PetscInt bs, PetscInt nz, PetscInt *nnz)
859: {
860: Mat_BlockMat *bmat = (Mat_BlockMat *)A->data;
861: PetscInt i;
863: PetscFunctionBegin;
864: PetscCall(PetscLayoutSetBlockSize(A->rmap, bs));
865: PetscCall(PetscLayoutSetBlockSize(A->cmap, bs));
866: PetscCall(PetscLayoutSetUp(A->rmap));
867: PetscCall(PetscLayoutSetUp(A->cmap));
868: PetscCall(PetscLayoutGetBlockSize(A->rmap, &bs));
870: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
871: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
872: if (nnz) {
873: for (i = 0; i < A->rmap->n / bs; i++) {
874: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
875: PetscCheck(nnz[i] <= A->cmap->n / bs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], A->cmap->n / bs);
876: }
877: }
878: bmat->mbs = A->rmap->n / bs;
880: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->right));
881: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs, NULL, &bmat->middle));
882: PetscCall(VecCreateSeq(PETSC_COMM_SELF, bs, &bmat->left));
884: if (!bmat->imax) PetscCall(PetscMalloc2(A->rmap->n, &bmat->imax, A->rmap->n, &bmat->ilen));
885: if (PetscLikely(nnz)) {
886: nz = 0;
887: for (i = 0; i < A->rmap->n / A->rmap->bs; i++) {
888: bmat->imax[i] = nnz[i];
889: nz += nnz[i];
890: }
891: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Currently requires block row by row preallocation");
893: /* bmat->ilen will count nonzeros in each row so far. */
894: PetscCall(PetscArrayzero(bmat->ilen, bmat->mbs));
896: /* allocate the matrix space */
897: PetscCall(MatSeqXAIJFreeAIJ(A, (PetscScalar **)&bmat->a, &bmat->j, &bmat->i));
898: PetscCall(PetscMalloc3(nz, &bmat->a, nz, &bmat->j, A->rmap->n + 1, &bmat->i));
899: bmat->i[0] = 0;
900: for (i = 1; i < bmat->mbs + 1; i++) bmat->i[i] = bmat->i[i - 1] + bmat->imax[i - 1];
901: bmat->singlemalloc = PETSC_TRUE;
902: bmat->free_a = PETSC_TRUE;
903: bmat->free_ij = PETSC_TRUE;
905: bmat->nz = 0;
906: bmat->maxnz = nz;
907: A->info.nz_unneeded = (double)bmat->maxnz;
908: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
909: PetscFunctionReturn(PETSC_SUCCESS);
910: }
912: /*MC
913: MATBLOCKMAT - A matrix that is defined by a set of `Mat`'s that represents a sparse block matrix
914: consisting of (usually) sparse blocks.
916: Level: advanced
918: .seealso: [](ch_matrices), `Mat`, `MatCreateBlockMat()`
919: M*/
921: PETSC_EXTERN PetscErrorCode MatCreate_BlockMat(Mat A)
922: {
923: Mat_BlockMat *b;
925: PetscFunctionBegin;
926: PetscCall(PetscNew(&b));
927: A->data = (void *)b;
928: A->ops[0] = MatOps_Values;
929: A->assembled = PETSC_TRUE;
930: A->preallocated = PETSC_FALSE;
931: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATBLOCKMAT));
933: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatBlockMatSetPreallocation_C", MatBlockMatSetPreallocation_BlockMat));
934: PetscFunctionReturn(PETSC_SUCCESS);
935: }
937: /*@C
938: MatCreateBlockMat - Creates a new matrix in which each block contains a uniform-size sequential `Mat` object
940: Collective
942: Input Parameters:
943: + comm - MPI communicator
944: . m - number of rows
945: . n - number of columns
946: . bs - size of each submatrix
947: . nz - expected maximum number of nonzero blocks in row (use `PETSC_DEFAULT` if not known)
948: - nnz - expected number of nonzers per block row if known (use `NULL` otherwise)
950: Output Parameter:
951: . A - the matrix
953: Level: intermediate
955: Notes:
956: Matrices of this type are nominally-sparse matrices in which each "entry" is a `Mat` object. Each `Mat` must
957: have the same size and be sequential. The local and global sizes must be compatible with this decomposition.
959: For matrices containing parallel submatrices and variable block sizes, see `MATNEST`.
961: Developer Notes:
962: I don't like the name, it is not `MATNESTMAT`
964: .seealso: [](ch_matrices), `Mat`, `MATBLOCKMAT`, `MatCreateNest()`
965: @*/
966: PetscErrorCode MatCreateBlockMat(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt bs, PetscInt nz, PetscInt *nnz, Mat *A)
967: {
968: PetscFunctionBegin;
969: PetscCall(MatCreate(comm, A));
970: PetscCall(MatSetSizes(*A, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
971: PetscCall(MatSetType(*A, MATBLOCKMAT));
972: PetscCall(MatBlockMatSetPreallocation(*A, bs, nz, nnz));
973: PetscFunctionReturn(PETSC_SUCCESS);
974: }