Actual source code: maij.c
1: #include <../src/mat/impls/maij/maij.h>
2: #include <../src/mat/utils/freespace.h>
4: /*@
5: MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix
7: Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel
9: Input Parameter:
10: . A - the `MATMAIJ` matrix
12: Output Parameter:
13: . B - the `MATAIJ` matrix
15: Level: advanced
17: Note:
18: The reference count on the `MATAIJ` matrix is not increased so you should not destroy it.
20: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()`
21: @*/
22: PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B)
23: {
24: PetscBool ismpimaij, isseqmaij;
26: PetscFunctionBegin;
27: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij));
28: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij));
29: if (ismpimaij) {
30: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
32: *B = b->A;
33: } else if (isseqmaij) {
34: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
36: *B = b->AIJ;
37: } else {
38: *B = A;
39: }
40: PetscFunctionReturn(PETSC_SUCCESS);
41: }
43: /*@
44: MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size
46: Logically Collective
48: Input Parameters:
49: + A - the `MATMAIJ` matrix
50: - dof - the block size for the new matrix
52: Output Parameter:
53: . B - the new `MATMAIJ` matrix
55: Level: advanced
57: .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()`
58: @*/
59: PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B)
60: {
61: Mat Aij = NULL;
63: PetscFunctionBegin;
65: PetscCall(MatMAIJGetAIJ(A, &Aij));
66: PetscCall(MatCreateMAIJ(Aij, dof, B));
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: static PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
71: {
72: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
74: PetscFunctionBegin;
75: PetscCall(MatDestroy(&b->AIJ));
76: PetscCall(PetscFree(A->data));
77: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL));
78: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL));
79: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL));
80: PetscFunctionReturn(PETSC_SUCCESS);
81: }
83: static PetscErrorCode MatSetUp_MAIJ(Mat A)
84: {
85: PetscFunctionBegin;
86: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices");
87: }
89: static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer)
90: {
91: Mat B;
93: PetscFunctionBegin;
94: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
95: PetscCall(MatView(B, viewer));
96: PetscCall(MatDestroy(&B));
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer)
101: {
102: Mat B;
104: PetscFunctionBegin;
105: PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B));
106: PetscCall(MatView(B, viewer));
107: PetscCall(MatDestroy(&B));
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
111: static PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
112: {
113: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
115: PetscFunctionBegin;
116: PetscCall(MatDestroy(&b->AIJ));
117: PetscCall(MatDestroy(&b->OAIJ));
118: PetscCall(MatDestroy(&b->A));
119: PetscCall(VecScatterDestroy(&b->ctx));
120: PetscCall(VecDestroy(&b->w));
121: PetscCall(PetscFree(A->data));
122: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL));
123: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL));
124: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL));
125: PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
126: PetscFunctionReturn(PETSC_SUCCESS);
127: }
129: /*MC
130: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
131: multicomponent problems, interpolating or restricting each component the same way independently.
132: The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices.
134: Operations provided:
135: .vb
136: MatMult()
137: MatMultTranspose()
138: MatMultAdd()
139: MatMultTransposeAdd()
140: .ve
142: Level: advanced
144: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()`
145: M*/
147: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
148: {
149: Mat_MPIMAIJ *b;
150: PetscMPIInt size;
152: PetscFunctionBegin;
153: PetscCall(PetscNew(&b));
154: A->data = (void *)b;
156: PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps)));
158: A->ops->setup = MatSetUp_MAIJ;
160: b->AIJ = NULL;
161: b->dof = 0;
162: b->OAIJ = NULL;
163: b->ctx = NULL;
164: b->w = NULL;
165: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
166: if (size == 1) {
167: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ));
168: } else {
169: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ));
170: }
171: A->preallocated = PETSC_TRUE;
172: A->assembled = PETSC_TRUE;
173: PetscFunctionReturn(PETSC_SUCCESS);
174: }
176: #if PetscHasAttribute(always_inline)
177: #define PETSC_FORCE_INLINE __attribute__((always_inline))
178: #else
179: #define PETSC_FORCE_INLINE
180: #endif
182: #if defined(__clang__)
183: #define PETSC_PRAGMA_UNROLL _Pragma("unroll")
184: #else
185: #define PETSC_PRAGMA_UNROLL
186: #endif
188: enum {
189: MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18
190: };
192: // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline'
193: // keyword into account for these...
194: PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
195: {
196: const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
197: const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
198: const Mat baij = b->AIJ;
199: const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
200: const PetscInt m = baij->rmap->n;
201: const PetscInt nz = a->nz;
202: const PetscInt *idx = a->j;
203: const PetscInt *ii = a->i;
204: const PetscScalar *v = a->a;
205: PetscInt nonzerorow = 0;
206: const PetscScalar *x;
207: PetscScalar *z;
209: PetscFunctionBegin;
210: PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
211: if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz));
212: PetscCall(VecGetArrayRead(xx, &x));
213: if (mult_add) {
214: PetscCall(VecGetArray(zz, &z));
215: } else {
216: PetscCall(VecGetArrayWrite(zz, &z));
217: }
219: for (PetscInt i = 0; i < m; ++i) {
220: PetscInt jrow = ii[i];
221: const PetscInt n = ii[i + 1] - jrow;
222: // leave a line so clang-format does not align these decls
223: PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0};
225: nonzerorow += n > 0;
226: for (PetscInt j = 0; j < n; ++j, ++jrow) {
227: const PetscScalar v_jrow = v[jrow];
228: const PetscInt N_idx_jrow = N * idx[jrow];
230: PETSC_PRAGMA_UNROLL
231: for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k];
232: }
234: PETSC_PRAGMA_UNROLL
235: for (int k = 0; k < N; ++k) {
236: const PetscInt z_idx = N * i + k;
238: if (mult_add) {
239: z[z_idx] += sum[k];
240: } else {
241: z[z_idx] = sum[k];
242: }
243: }
244: }
245: PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow))));
246: PetscCall(VecRestoreArrayRead(xx, &x));
247: if (mult_add) {
248: PetscCall(VecRestoreArray(zz, &z));
249: } else {
250: PetscCall(VecRestoreArrayWrite(zz, &z));
251: }
252: PetscFunctionReturn(PETSC_SUCCESS);
253: }
255: PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N)
256: {
257: const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE;
258: const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
259: const Mat baij = b->AIJ;
260: const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data;
261: const PetscInt m = baij->rmap->n;
262: const PetscInt nz = a->nz;
263: const PetscInt *a_j = a->j;
264: const PetscInt *a_i = a->i;
265: const PetscScalar *a_a = a->a;
266: const PetscScalar *x;
267: PetscScalar *z;
269: PetscFunctionBegin;
270: PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE);
271: if (mult_add) {
272: if (yy != zz) PetscCall(VecCopy(yy, zz));
273: } else {
274: PetscCall(VecSet(zz, 0.0));
275: }
276: PetscCall(VecGetArrayRead(xx, &x));
277: PetscCall(VecGetArray(zz, &z));
279: for (PetscInt i = 0; i < m; i++) {
280: const PetscInt a_ii = a_i[i];
281: const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii);
282: const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii);
283: const PetscInt n = a_i[i + 1] - a_ii;
284: PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE];
286: PETSC_PRAGMA_UNROLL
287: for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k];
288: for (PetscInt j = 0; j < n; ++j) {
289: const PetscInt N_idx_j = N * idx[j];
290: const PetscScalar v_j = v[j];
292: PETSC_PRAGMA_UNROLL
293: for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j;
294: }
295: }
297: PetscCall(PetscLogFlops(2 * N * nz));
298: PetscCall(VecRestoreArrayRead(xx, &x));
299: PetscCall(VecRestoreArray(zz, &z));
300: PetscFunctionReturn(PETSC_SUCCESS);
301: }
303: #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \
304: static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
305: { \
306: PetscFunctionBegin; \
307: PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
308: PetscFunctionReturn(PETSC_SUCCESS); \
309: } \
310: static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \
311: { \
312: PetscFunctionBegin; \
313: PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \
314: PetscFunctionReturn(PETSC_SUCCESS); \
315: } \
316: static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
317: { \
318: PetscFunctionBegin; \
319: PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
320: PetscFunctionReturn(PETSC_SUCCESS); \
321: } \
322: static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \
323: { \
324: PetscFunctionBegin; \
325: PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \
326: PetscFunctionReturn(PETSC_SUCCESS); \
327: }
329: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2)
330: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3)
331: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4)
332: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5)
333: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6)
334: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7)
335: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8)
336: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9)
337: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10)
338: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11)
339: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16)
340: MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18)
342: #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE
344: static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
345: {
346: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
347: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
348: const PetscScalar *x, *v;
349: PetscScalar *y, *sums;
350: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
351: PetscInt n, i, jrow, j, dof = b->dof, k;
353: PetscFunctionBegin;
354: PetscCall(VecGetArrayRead(xx, &x));
355: PetscCall(VecSet(yy, 0.0));
356: PetscCall(VecGetArray(yy, &y));
357: idx = a->j;
358: v = a->a;
359: ii = a->i;
361: for (i = 0; i < m; i++) {
362: jrow = ii[i];
363: n = ii[i + 1] - jrow;
364: sums = y + dof * i;
365: for (j = 0; j < n; j++) {
366: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
367: jrow++;
368: }
369: }
371: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
372: PetscCall(VecRestoreArrayRead(xx, &x));
373: PetscCall(VecRestoreArray(yy, &y));
374: PetscFunctionReturn(PETSC_SUCCESS);
375: }
377: static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
378: {
379: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
380: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
381: const PetscScalar *x, *v;
382: PetscScalar *y, *sums;
383: const PetscInt m = b->AIJ->rmap->n, *idx, *ii;
384: PetscInt n, i, jrow, j, dof = b->dof, k;
386: PetscFunctionBegin;
387: if (yy != zz) PetscCall(VecCopy(yy, zz));
388: PetscCall(VecGetArrayRead(xx, &x));
389: PetscCall(VecGetArray(zz, &y));
390: idx = a->j;
391: v = a->a;
392: ii = a->i;
394: for (i = 0; i < m; i++) {
395: jrow = ii[i];
396: n = ii[i + 1] - jrow;
397: sums = y + dof * i;
398: for (j = 0; j < n; j++) {
399: for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k];
400: jrow++;
401: }
402: }
404: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
405: PetscCall(VecRestoreArrayRead(xx, &x));
406: PetscCall(VecRestoreArray(zz, &y));
407: PetscFunctionReturn(PETSC_SUCCESS);
408: }
410: static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy)
411: {
412: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
413: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
414: const PetscScalar *x, *v, *alpha;
415: PetscScalar *y;
416: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
417: PetscInt n, i, k;
419: PetscFunctionBegin;
420: PetscCall(VecGetArrayRead(xx, &x));
421: PetscCall(VecSet(yy, 0.0));
422: PetscCall(VecGetArray(yy, &y));
423: for (i = 0; i < m; i++) {
424: idx = PetscSafePointerPlusOffset(a->j, a->i[i]);
425: v = PetscSafePointerPlusOffset(a->a, a->i[i]);
426: n = a->i[i + 1] - a->i[i];
427: alpha = x + dof * i;
428: while (n-- > 0) {
429: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
430: idx++;
431: v++;
432: }
433: }
434: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
435: PetscCall(VecRestoreArrayRead(xx, &x));
436: PetscCall(VecRestoreArray(yy, &y));
437: PetscFunctionReturn(PETSC_SUCCESS);
438: }
440: static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz)
441: {
442: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
443: Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data;
444: const PetscScalar *x, *v, *alpha;
445: PetscScalar *y;
446: const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof;
447: PetscInt n, i, k;
449: PetscFunctionBegin;
450: if (yy != zz) PetscCall(VecCopy(yy, zz));
451: PetscCall(VecGetArrayRead(xx, &x));
452: PetscCall(VecGetArray(zz, &y));
453: for (i = 0; i < m; i++) {
454: idx = a->j + a->i[i];
455: v = a->a + a->i[i];
456: n = a->i[i + 1] - a->i[i];
457: alpha = x + dof * i;
458: while (n-- > 0) {
459: for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v);
460: idx++;
461: v++;
462: }
463: }
464: PetscCall(PetscLogFlops(2.0 * dof * a->nz));
465: PetscCall(VecRestoreArrayRead(xx, &x));
466: PetscCall(VecRestoreArray(zz, &y));
467: PetscFunctionReturn(PETSC_SUCCESS);
468: }
470: static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
471: {
472: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
474: PetscFunctionBegin;
475: /* start the scatter */
476: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
477: PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy));
478: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
479: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy));
480: PetscFunctionReturn(PETSC_SUCCESS);
481: }
483: static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy)
484: {
485: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
487: PetscFunctionBegin;
488: PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
489: PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy));
490: PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
491: PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE));
492: PetscFunctionReturn(PETSC_SUCCESS);
493: }
495: static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
496: {
497: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
499: PetscFunctionBegin;
500: /* start the scatter */
501: PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
502: PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz));
503: PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD));
504: PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz));
505: PetscFunctionReturn(PETSC_SUCCESS);
506: }
508: static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz)
509: {
510: Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data;
512: PetscFunctionBegin;
513: PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w));
514: PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz));
515: PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
516: PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE));
517: PetscFunctionReturn(PETSC_SUCCESS);
518: }
520: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
521: {
522: Mat_Product *product = C->product;
524: PetscFunctionBegin;
525: if (product->type == MATPRODUCT_PtAP) {
526: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
527: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]);
528: PetscFunctionReturn(PETSC_SUCCESS);
529: }
531: static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
532: {
533: Mat_Product *product = C->product;
534: PetscBool flg = PETSC_FALSE;
535: Mat A = product->A, P = product->B;
536: PetscInt alg = 1; /* set default algorithm */
537: #if !defined(PETSC_HAVE_HYPRE)
538: const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"};
539: PetscInt nalg = 4;
540: #else
541: const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"};
542: PetscInt nalg = 5;
543: #endif
545: PetscFunctionBegin;
546: PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]);
548: /* PtAP */
549: /* Check matrix local sizes */
550: PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
551: A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend);
552: PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")",
553: A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend);
555: /* Set the default algorithm */
556: PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
557: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
559: /* Get runtime option */
560: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
561: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg));
562: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
563: PetscOptionsEnd();
565: PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg));
566: if (flg) {
567: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
568: PetscFunctionReturn(PETSC_SUCCESS);
569: }
571: PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg));
572: if (flg) {
573: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
574: PetscFunctionReturn(PETSC_SUCCESS);
575: }
577: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
578: PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n"));
579: PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P));
580: PetscCall(MatProductSetFromOptions(C));
581: PetscFunctionReturn(PETSC_SUCCESS);
582: }
584: static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C)
585: {
586: /* This routine requires testing -- first draft only */
587: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
588: Mat P = pp->AIJ;
589: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
590: Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data;
591: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
592: const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj;
593: const PetscInt *ci = c->i, *cj = c->j, *cjj;
594: const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof;
595: PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense;
596: const MatScalar *aa = a->a, *pa = p->a, *pA, *paj;
597: MatScalar *ca = c->a, *caj, *apa;
599: PetscFunctionBegin;
600: /* Allocate temporary array for storage of one row of A*P */
601: PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense));
603: /* Clear old values in C */
604: PetscCall(PetscArrayzero(ca, ci[cm]));
606: for (i = 0; i < am; i++) {
607: /* Form sparse row of A*P */
608: anzi = ai[i + 1] - ai[i];
609: apnzj = 0;
610: for (j = 0; j < anzi; j++) {
611: /* Get offset within block of P */
612: pshift = *aj % ppdof;
613: /* Get block row of P */
614: prow = *aj++ / ppdof; /* integer division */
615: pnzj = pi[prow + 1] - pi[prow];
616: pjj = pj + pi[prow];
617: paj = pa + pi[prow];
618: for (k = 0; k < pnzj; k++) {
619: poffset = pjj[k] * ppdof + pshift;
620: if (!apjdense[poffset]) {
621: apjdense[poffset] = -1;
622: apj[apnzj++] = poffset;
623: }
624: apa[poffset] += (*aa) * paj[k];
625: }
626: PetscCall(PetscLogFlops(2.0 * pnzj));
627: aa++;
628: }
630: /* Sort the j index array for quick sparse axpy. */
631: /* Note: a array does not need sorting as it is in dense storage locations. */
632: PetscCall(PetscSortInt(apnzj, apj));
634: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
635: prow = i / ppdof; /* integer division */
636: pshift = i % ppdof;
637: poffset = pi[prow];
638: pnzi = pi[prow + 1] - poffset;
639: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
640: pJ = pj + poffset;
641: pA = pa + poffset;
642: for (j = 0; j < pnzi; j++) {
643: crow = (*pJ) * ppdof + pshift;
644: cjj = cj + ci[crow];
645: caj = ca + ci[crow];
646: pJ++;
647: /* Perform sparse axpy operation. Note cjj includes apj. */
648: for (k = 0, nextap = 0; nextap < apnzj; k++) {
649: if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]];
650: }
651: PetscCall(PetscLogFlops(2.0 * apnzj));
652: pA++;
653: }
655: /* Zero the current row info for A*P */
656: for (j = 0; j < apnzj; j++) {
657: apa[apj[j]] = 0.;
658: apjdense[apj[j]] = 0;
659: }
660: }
662: /* Assemble the final matrix and clean up */
663: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
664: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
665: PetscCall(PetscFree3(apa, apj, apjdense));
666: PetscFunctionReturn(PETSC_SUCCESS);
667: }
669: static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C)
670: {
671: PetscFreeSpaceList free_space = NULL, current_space = NULL;
672: Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data;
673: Mat P = pp->AIJ;
674: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c;
675: PetscInt *pti, *ptj, *ptJ;
676: PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj;
677: const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof;
678: PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn;
679: MatScalar *ca;
680: const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj;
682: PetscFunctionBegin;
683: /* Get ij structure of P^T */
684: PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
686: cn = pn * ppdof;
687: /* Allocate ci array, arrays for fill computation and */
688: /* free space for accumulating nonzero column info */
689: PetscCall(PetscMalloc1(cn + 1, &ci));
690: ci[0] = 0;
692: /* Work arrays for rows of P^T*A */
693: PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow));
694: PetscCall(PetscArrayzero(ptadenserow, an));
695: PetscCall(PetscArrayzero(denserow, cn));
697: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
698: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
699: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
700: PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space));
701: current_space = free_space;
703: /* Determine symbolic info for each row of C: */
704: for (i = 0; i < pn; i++) {
705: ptnzi = pti[i + 1] - pti[i];
706: ptJ = ptj + pti[i];
707: for (dof = 0; dof < ppdof; dof++) {
708: ptanzi = 0;
709: /* Determine symbolic row of PtA: */
710: for (j = 0; j < ptnzi; j++) {
711: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
712: arow = ptJ[j] * ppdof + dof;
713: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
714: anzj = ai[arow + 1] - ai[arow];
715: ajj = aj + ai[arow];
716: for (k = 0; k < anzj; k++) {
717: if (!ptadenserow[ajj[k]]) {
718: ptadenserow[ajj[k]] = -1;
719: ptasparserow[ptanzi++] = ajj[k];
720: }
721: }
722: }
723: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
724: ptaj = ptasparserow;
725: cnzi = 0;
726: for (j = 0; j < ptanzi; j++) {
727: /* Get offset within block of P */
728: pshift = *ptaj % ppdof;
729: /* Get block row of P */
730: prow = (*ptaj++) / ppdof; /* integer division */
731: /* P has same number of nonzeros per row as the compressed form */
732: pnzj = pi[prow + 1] - pi[prow];
733: pjj = pj + pi[prow];
734: for (k = 0; k < pnzj; k++) {
735: /* Locations in C are shifted by the offset within the block */
736: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
737: if (!denserow[pjj[k] * ppdof + pshift]) {
738: denserow[pjj[k] * ppdof + pshift] = -1;
739: sparserow[cnzi++] = pjj[k] * ppdof + pshift;
740: }
741: }
742: }
744: /* sort sparserow */
745: PetscCall(PetscSortInt(cnzi, sparserow));
747: /* If free space is not available, make more free space */
748: /* Double the amount of total space in the list */
749: if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
751: /* Copy data into free space, and zero out denserows */
752: PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi));
754: current_space->array += cnzi;
755: current_space->local_used += cnzi;
756: current_space->local_remaining -= cnzi;
758: for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
759: for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0;
761: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
762: /* For now, we will recompute what is needed. */
763: ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi;
764: }
765: }
766: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
767: /* Allocate space for cj, initialize cj, and */
768: /* destroy list of free space and other temporary array(s) */
769: PetscCall(PetscMalloc1(ci[cn] + 1, &cj));
770: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
771: PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow));
773: /* Allocate space for ca */
774: PetscCall(PetscCalloc1(ci[cn] + 1, &ca));
776: /* put together the new matrix */
777: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C));
778: PetscCall(MatSetBlockSize(C, pp->dof));
780: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
781: /* Since these are PETSc arrays, change flags to free them as necessary. */
782: c = (Mat_SeqAIJ *)C->data;
783: c->free_a = PETSC_TRUE;
784: c->free_ij = PETSC_TRUE;
785: c->nonew = 0;
787: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
788: C->ops->productnumeric = MatProductNumeric_PtAP;
790: /* Clean up. */
791: PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj));
792: PetscFunctionReturn(PETSC_SUCCESS);
793: }
795: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
796: {
797: Mat_Product *product = C->product;
798: Mat A = product->A, P = product->B;
800: PetscFunctionBegin;
801: PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C));
802: PetscFunctionReturn(PETSC_SUCCESS);
803: }
805: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat);
807: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C)
808: {
809: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
811: PetscFunctionBegin;
812: PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C));
813: PetscFunctionReturn(PETSC_SUCCESS);
814: }
816: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat);
818: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C)
819: {
820: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
822: PetscFunctionBegin;
823: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C));
824: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
825: PetscFunctionReturn(PETSC_SUCCESS);
826: }
828: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat);
830: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C)
831: {
832: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
834: PetscFunctionBegin;
835: PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C));
836: PetscFunctionReturn(PETSC_SUCCESS);
837: }
839: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat);
841: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C)
842: {
843: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data;
845: PetscFunctionBegin;
846: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C));
847: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
848: PetscFunctionReturn(PETSC_SUCCESS);
849: }
851: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
852: {
853: Mat_Product *product = C->product;
854: Mat A = product->A, P = product->B;
855: PetscBool flg;
857: PetscFunctionBegin;
858: PetscCall(PetscStrcmp(product->alg, "allatonce", &flg));
859: if (flg) {
860: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C));
861: C->ops->productnumeric = MatProductNumeric_PtAP;
862: PetscFunctionReturn(PETSC_SUCCESS);
863: }
865: PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg));
866: if (flg) {
867: PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C));
868: C->ops->productnumeric = MatProductNumeric_PtAP;
869: PetscFunctionReturn(PETSC_SUCCESS);
870: }
872: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
873: }
875: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
876: {
877: Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data;
878: Mat a = b->AIJ, B;
879: Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data;
880: PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof;
881: PetscInt *cols;
882: PetscScalar *vals;
884: PetscFunctionBegin;
885: PetscCall(MatGetSize(a, &m, &n));
886: PetscCall(PetscMalloc1(dof * m, &ilen));
887: for (i = 0; i < m; i++) {
888: nmax = PetscMax(nmax, aij->ilen[i]);
889: for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i];
890: }
891: PetscCall(MatCreate(PETSC_COMM_SELF, &B));
892: PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n));
893: PetscCall(MatSetType(B, newtype));
894: PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen));
895: PetscCall(PetscFree(ilen));
896: PetscCall(PetscMalloc1(nmax, &icols));
897: ii = 0;
898: for (i = 0; i < m; i++) {
899: PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals));
900: for (j = 0; j < dof; j++) {
901: for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j;
902: PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
903: ii++;
904: }
905: PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals));
906: }
907: PetscCall(PetscFree(icols));
908: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
909: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
911: if (reuse == MAT_INPLACE_MATRIX) {
912: PetscCall(MatHeaderReplace(A, &B));
913: } else {
914: *newmat = B;
915: }
916: PetscFunctionReturn(PETSC_SUCCESS);
917: }
919: #include <../src/mat/impls/aij/mpi/mpiaij.h>
921: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
922: {
923: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data;
924: Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B;
925: Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ;
926: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data;
927: Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data;
928: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data;
929: PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0;
930: PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL;
931: PetscInt rstart, cstart, *garray, ii, k;
932: PetscScalar *vals, *ovals;
934: PetscFunctionBegin;
935: PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz));
936: for (i = 0; i < A->rmap->n / dof; i++) {
937: nmax = PetscMax(nmax, AIJ->ilen[i]);
938: onmax = PetscMax(onmax, OAIJ->ilen[i]);
939: for (j = 0; j < dof; j++) {
940: dnz[dof * i + j] = AIJ->ilen[i];
941: onz[dof * i + j] = OAIJ->ilen[i];
942: }
943: }
944: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
945: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
946: PetscCall(MatSetType(B, newtype));
947: PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz));
948: PetscCall(MatSetBlockSize(B, dof));
949: PetscCall(PetscFree2(dnz, onz));
951: PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols));
952: rstart = dof * maij->A->rmap->rstart;
953: cstart = dof * maij->A->cmap->rstart;
954: garray = mpiaij->garray;
956: ii = rstart;
957: for (i = 0; i < A->rmap->n / dof; i++) {
958: PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
959: PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
960: for (j = 0; j < dof; j++) {
961: for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j;
962: for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j;
963: PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES));
964: PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES));
965: ii++;
966: }
967: PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals));
968: PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals));
969: }
970: PetscCall(PetscFree2(icols, oicols));
972: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
973: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
975: if (reuse == MAT_INPLACE_MATRIX) {
976: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
977: ((PetscObject)A)->refct = 1;
979: PetscCall(MatHeaderReplace(A, &B));
981: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
982: } else {
983: *newmat = B;
984: }
985: PetscFunctionReturn(PETSC_SUCCESS);
986: }
988: static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat)
989: {
990: Mat A;
992: PetscFunctionBegin;
993: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
994: PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat));
995: PetscCall(MatDestroy(&A));
996: PetscFunctionReturn(PETSC_SUCCESS);
997: }
999: static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
1000: {
1001: Mat A;
1003: PetscFunctionBegin;
1004: PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A));
1005: PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat));
1006: PetscCall(MatDestroy(&A));
1007: PetscFunctionReturn(PETSC_SUCCESS);
1008: }
1010: /*@
1011: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
1012: operations for multicomponent problems. It interpolates each component the same
1013: way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices,
1014: and `MATMPIAIJ` for distributed matrices.
1016: Collective
1018: Input Parameters:
1019: + A - the `MATAIJ` matrix describing the action on blocks
1020: - dof - the block size (number of components per node)
1022: Output Parameter:
1023: . maij - the new `MATMAIJ` matrix
1025: Level: advanced
1027: .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`
1028: @*/
1029: PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij)
1030: {
1031: PetscInt n;
1032: Mat B;
1033: PetscBool flg;
1034: #if defined(PETSC_HAVE_CUDA)
1035: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
1036: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
1037: #endif
1039: PetscFunctionBegin;
1040: dof = PetscAbs(dof);
1041: PetscCall(PetscObjectReference((PetscObject)A));
1043: if (dof == 1) *maij = A;
1044: else {
1045: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1046: /* propagate vec type */
1047: PetscCall(MatSetVecType(B, A->defaultvectype));
1048: PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N));
1049: PetscCall(PetscLayoutSetBlockSize(B->rmap, dof));
1050: PetscCall(PetscLayoutSetBlockSize(B->cmap, dof));
1051: PetscCall(PetscLayoutSetUp(B->rmap));
1052: PetscCall(PetscLayoutSetUp(B->cmap));
1054: B->assembled = PETSC_TRUE;
1056: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
1057: if (flg) {
1058: Mat_SeqMAIJ *b;
1060: PetscCall(MatSetType(B, MATSEQMAIJ));
1062: B->ops->setup = NULL;
1063: B->ops->destroy = MatDestroy_SeqMAIJ;
1064: B->ops->view = MatView_SeqMAIJ;
1066: b = (Mat_SeqMAIJ *)B->data;
1067: b->dof = dof;
1068: b->AIJ = A;
1070: if (dof == 2) {
1071: B->ops->mult = MatMult_SeqMAIJ_2;
1072: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
1073: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
1074: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
1075: } else if (dof == 3) {
1076: B->ops->mult = MatMult_SeqMAIJ_3;
1077: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
1078: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
1079: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
1080: } else if (dof == 4) {
1081: B->ops->mult = MatMult_SeqMAIJ_4;
1082: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
1083: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
1084: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
1085: } else if (dof == 5) {
1086: B->ops->mult = MatMult_SeqMAIJ_5;
1087: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
1088: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
1089: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
1090: } else if (dof == 6) {
1091: B->ops->mult = MatMult_SeqMAIJ_6;
1092: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
1093: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
1094: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
1095: } else if (dof == 7) {
1096: B->ops->mult = MatMult_SeqMAIJ_7;
1097: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
1098: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
1099: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
1100: } else if (dof == 8) {
1101: B->ops->mult = MatMult_SeqMAIJ_8;
1102: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
1103: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
1104: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
1105: } else if (dof == 9) {
1106: B->ops->mult = MatMult_SeqMAIJ_9;
1107: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
1108: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
1109: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
1110: } else if (dof == 10) {
1111: B->ops->mult = MatMult_SeqMAIJ_10;
1112: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
1113: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
1114: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
1115: } else if (dof == 11) {
1116: B->ops->mult = MatMult_SeqMAIJ_11;
1117: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
1118: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
1119: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
1120: } else if (dof == 16) {
1121: B->ops->mult = MatMult_SeqMAIJ_16;
1122: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
1123: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
1124: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
1125: } else if (dof == 18) {
1126: B->ops->mult = MatMult_SeqMAIJ_18;
1127: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
1128: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
1129: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
1130: } else {
1131: B->ops->mult = MatMult_SeqMAIJ_N;
1132: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
1133: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
1134: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
1135: }
1136: #if defined(PETSC_HAVE_CUDA)
1137: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ));
1138: #endif
1139: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ));
1140: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ));
1141: } else {
1142: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
1143: Mat_MPIMAIJ *b;
1144: IS from, to;
1145: Vec gvec;
1147: PetscCall(MatSetType(B, MATMPIMAIJ));
1149: B->ops->setup = NULL;
1150: B->ops->destroy = MatDestroy_MPIMAIJ;
1151: B->ops->view = MatView_MPIMAIJ;
1153: b = (Mat_MPIMAIJ *)B->data;
1154: b->dof = dof;
1155: b->A = A;
1157: PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ));
1158: PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ));
1160: PetscCall(VecGetSize(mpiaij->lvec, &n));
1161: PetscCall(VecCreate(PETSC_COMM_SELF, &b->w));
1162: PetscCall(VecSetSizes(b->w, n * dof, n * dof));
1163: PetscCall(VecSetBlockSize(b->w, dof));
1164: PetscCall(VecSetType(b->w, VECSEQ));
1166: /* create two temporary Index sets for build scatter gather */
1167: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from));
1168: PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to));
1170: /* create temporary global vector to generate scatter context */
1171: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec));
1173: /* generate the scatter context */
1174: PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx));
1176: PetscCall(ISDestroy(&from));
1177: PetscCall(ISDestroy(&to));
1178: PetscCall(VecDestroy(&gvec));
1180: B->ops->mult = MatMult_MPIMAIJ_dof;
1181: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
1182: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
1183: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
1185: #if defined(PETSC_HAVE_CUDA)
1186: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ));
1187: #endif
1188: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ));
1189: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ));
1190: }
1191: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
1192: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
1193: PetscCall(MatSetUp(B));
1194: #if defined(PETSC_HAVE_CUDA)
1195: /* temporary until we have CUDA implementation of MAIJ */
1196: {
1197: PetscBool flg;
1198: if (convert) {
1199: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, ""));
1200: if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B));
1201: }
1202: }
1203: #endif
1204: *maij = B;
1205: }
1206: PetscFunctionReturn(PETSC_SUCCESS);
1207: }