Actual source code: matmatmult.c
1: /*
2: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3: C = A * B
4: */
6: #include <../src/mat/impls/aij/seq/aij.h>
7: #include <../src/mat/utils/freespace.h>
8: #include <petscbt.h>
9: #include <petsc/private/isimpl.h>
10: #include <../src/mat/impls/dense/seq/dense.h>
12: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C)
13: {
14: PetscFunctionBegin;
15: if (C->ops->matmultnumeric) PetscCall((*C->ops->matmultnumeric)(A, B, C));
16: else PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A, B, C));
17: PetscFunctionReturn(PETSC_SUCCESS);
18: }
20: /* Modified from MatCreateSeqAIJWithArrays() */
21: PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], MatType mtype, Mat mat)
22: {
23: PetscInt ii;
24: Mat_SeqAIJ *aij;
25: PetscBool isseqaij, osingle, ofree_a, ofree_ij;
27: PetscFunctionBegin;
28: PetscCheck(m <= 0 || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
29: PetscCall(MatSetSizes(mat, m, n, m, n));
31: if (!mtype) {
32: PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQAIJ, &isseqaij));
33: if (!isseqaij) PetscCall(MatSetType(mat, MATSEQAIJ));
34: } else {
35: PetscCall(MatSetType(mat, mtype));
36: }
38: aij = (Mat_SeqAIJ *)(mat)->data;
39: osingle = aij->singlemalloc;
40: ofree_a = aij->free_a;
41: ofree_ij = aij->free_ij;
42: /* changes the free flags */
43: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(mat, MAT_SKIP_ALLOCATION, NULL));
45: PetscCall(PetscFree(aij->ilen));
46: PetscCall(PetscFree(aij->imax));
47: PetscCall(PetscMalloc1(m, &aij->imax));
48: PetscCall(PetscMalloc1(m, &aij->ilen));
49: for (ii = 0, aij->nonzerorowcnt = 0, aij->rmax = 0; ii < m; ii++) {
50: const PetscInt rnz = i[ii + 1] - i[ii];
51: aij->nonzerorowcnt += !!rnz;
52: aij->rmax = PetscMax(aij->rmax, rnz);
53: aij->ilen[ii] = aij->imax[ii] = i[ii + 1] - i[ii];
54: }
55: aij->maxnz = i[m];
56: aij->nz = i[m];
58: if (osingle) {
59: PetscCall(PetscFree3(aij->a, aij->j, aij->i));
60: } else {
61: if (ofree_a) PetscCall(PetscFree(aij->a));
62: if (ofree_ij) PetscCall(PetscFree(aij->j));
63: if (ofree_ij) PetscCall(PetscFree(aij->i));
64: }
65: aij->i = i;
66: aij->j = j;
67: aij->a = a;
68: aij->nonew = -1; /* this indicates that inserting a new value in the matrix that generates a new nonzero is an error */
69: /* default to not retain ownership */
70: aij->singlemalloc = PETSC_FALSE;
71: aij->free_a = PETSC_FALSE;
72: aij->free_ij = PETSC_FALSE;
73: PetscCall(MatCheckCompressedRow(mat, aij->nonzerorowcnt, &aij->compressedrow, aij->i, m, 0.6));
74: PetscFunctionReturn(PETSC_SUCCESS);
75: }
77: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C)
78: {
79: Mat_Product *product = C->product;
80: MatProductAlgorithm alg;
81: PetscBool flg;
83: PetscFunctionBegin;
84: if (product) {
85: alg = product->alg;
86: } else {
87: alg = "sorted";
88: }
89: /* sorted */
90: PetscCall(PetscStrcmp(alg, "sorted", &flg));
91: if (flg) {
92: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A, B, fill, C));
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: /* scalable */
97: PetscCall(PetscStrcmp(alg, "scalable", &flg));
98: if (flg) {
99: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A, B, fill, C));
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: /* scalable_fast */
104: PetscCall(PetscStrcmp(alg, "scalable_fast", &flg));
105: if (flg) {
106: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A, B, fill, C));
107: PetscFunctionReturn(PETSC_SUCCESS);
108: }
110: /* heap */
111: PetscCall(PetscStrcmp(alg, "heap", &flg));
112: if (flg) {
113: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A, B, fill, C));
114: PetscFunctionReturn(PETSC_SUCCESS);
115: }
117: /* btheap */
118: PetscCall(PetscStrcmp(alg, "btheap", &flg));
119: if (flg) {
120: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A, B, fill, C));
121: PetscFunctionReturn(PETSC_SUCCESS);
122: }
124: /* llcondensed */
125: PetscCall(PetscStrcmp(alg, "llcondensed", &flg));
126: if (flg) {
127: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A, B, fill, C));
128: PetscFunctionReturn(PETSC_SUCCESS);
129: }
131: /* rowmerge */
132: PetscCall(PetscStrcmp(alg, "rowmerge", &flg));
133: if (flg) {
134: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A, B, fill, C));
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
138: #if defined(PETSC_HAVE_HYPRE)
139: PetscCall(PetscStrcmp(alg, "hypre", &flg));
140: if (flg) {
141: PetscCall(MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A, B, fill, C));
142: PetscFunctionReturn(PETSC_SUCCESS);
143: }
144: #endif
146: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
147: }
149: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A, Mat B, PetscReal fill, Mat C)
150: {
151: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
152: PetscInt *ai = a->i, *bi = b->i, *ci, *cj;
153: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
154: PetscReal afill;
155: PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax;
156: PetscHMapI ta;
157: PetscBT lnkbt;
158: PetscFreeSpaceList free_space = NULL, current_space = NULL;
160: PetscFunctionBegin;
161: /* Get ci and cj */
162: /* Allocate ci array, arrays for fill computation and */
163: /* free space for accumulating nonzero column info */
164: PetscCall(PetscMalloc1(am + 2, &ci));
165: ci[0] = 0;
167: /* create and initialize a linked list */
168: PetscCall(PetscHMapICreateWithSize(bn, &ta));
169: MatRowMergeMax_SeqAIJ(b, bm, ta);
170: PetscCall(PetscHMapIGetSize(ta, &Crmax));
171: PetscCall(PetscHMapIDestroy(&ta));
173: PetscCall(PetscLLCondensedCreate(Crmax, bn, &lnk, &lnkbt));
175: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
176: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space));
178: current_space = free_space;
180: /* Determine ci and cj */
181: for (i = 0; i < am; i++) {
182: anzi = ai[i + 1] - ai[i];
183: aj = a->j + ai[i];
184: for (j = 0; j < anzi; j++) {
185: brow = aj[j];
186: bnzj = bi[brow + 1] - bi[brow];
187: bj = b->j + bi[brow];
188: /* add non-zero cols of B into the sorted linked list lnk */
189: PetscCall(PetscLLCondensedAddSorted(bnzj, bj, lnk, lnkbt));
190: }
191: /* add possible missing diagonal entry */
192: if (C->force_diagonals) PetscCall(PetscLLCondensedAddSorted(1, &i, lnk, lnkbt));
193: cnzi = lnk[0];
195: /* If free space is not available, make more free space */
196: /* Double the amount of total space in the list */
197: if (current_space->local_remaining < cnzi) {
198: PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
199: ndouble++;
200: }
202: /* Copy data into free space, then initialize lnk */
203: PetscCall(PetscLLCondensedClean(bn, cnzi, current_space->array, lnk, lnkbt));
205: current_space->array += cnzi;
206: current_space->local_used += cnzi;
207: current_space->local_remaining -= cnzi;
209: ci[i + 1] = ci[i] + cnzi;
210: }
212: /* Column indices are in the list of free space */
213: /* Allocate space for cj, initialize cj, and */
214: /* destroy list of free space and other temporary array(s) */
215: PetscCall(PetscMalloc1(ci[am] + 1, &cj));
216: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
217: PetscCall(PetscLLCondensedDestroy(lnk, lnkbt));
219: /* put together the new symbolic matrix */
220: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C));
221: PetscCall(MatSetBlockSizesFromMats(C, A, B));
223: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
224: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
225: c = (Mat_SeqAIJ *)(C->data);
226: c->free_a = PETSC_FALSE;
227: c->free_ij = PETSC_TRUE;
228: c->nonew = 0;
230: /* fast, needs non-scalable O(bn) array 'abdense' */
231: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
233: /* set MatInfo */
234: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
235: if (afill < 1.0) afill = 1.0;
236: C->info.mallocs = ndouble;
237: C->info.fill_ratio_given = fill;
238: C->info.fill_ratio_needed = afill;
240: #if defined(PETSC_USE_INFO)
241: if (ci[am]) {
242: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
243: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
244: } else {
245: PetscCall(PetscInfo(C, "Empty matrix product\n"));
246: }
247: #endif
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A, Mat B, Mat C)
252: {
253: PetscLogDouble flops = 0.0;
254: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
255: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
256: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
257: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bjj, *ci = c->i, *cj = c->j;
258: PetscInt am = A->rmap->n, cm = C->rmap->n;
259: PetscInt i, j, k, anzi, bnzi, cnzi, brow;
260: PetscScalar *ca, valtmp;
261: PetscScalar *ab_dense;
262: PetscContainer cab_dense;
263: const PetscScalar *aa, *ba, *baj;
265: PetscFunctionBegin;
266: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
267: PetscCall(MatSeqAIJGetArrayRead(B, &ba));
268: if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
269: PetscCall(PetscMalloc1(ci[cm] + 1, &ca));
270: c->a = ca;
271: c->free_a = PETSC_TRUE;
272: } else ca = c->a;
274: /* TODO this should be done in the symbolic phase */
275: /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers
276: that is hard to eradicate) */
277: PetscCall(PetscObjectQuery((PetscObject)C, "__PETSc__ab_dense", (PetscObject *)&cab_dense));
278: if (!cab_dense) {
279: PetscCall(PetscMalloc1(B->cmap->N, &ab_dense));
280: PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &cab_dense));
281: PetscCall(PetscContainerSetPointer(cab_dense, ab_dense));
282: PetscCall(PetscContainerSetUserDestroy(cab_dense, PetscContainerUserDestroyDefault));
283: PetscCall(PetscObjectCompose((PetscObject)C, "__PETSc__ab_dense", (PetscObject)cab_dense));
284: PetscCall(PetscObjectDereference((PetscObject)cab_dense));
285: }
286: PetscCall(PetscContainerGetPointer(cab_dense, (void **)&ab_dense));
287: PetscCall(PetscArrayzero(ab_dense, B->cmap->N));
289: /* clean old values in C */
290: PetscCall(PetscArrayzero(ca, ci[cm]));
291: /* Traverse A row-wise. */
292: /* Build the ith row in C by summing over nonzero columns in A, */
293: /* the rows of B corresponding to nonzeros of A. */
294: for (i = 0; i < am; i++) {
295: anzi = ai[i + 1] - ai[i];
296: for (j = 0; j < anzi; j++) {
297: brow = aj[j];
298: bnzi = bi[brow + 1] - bi[brow];
299: bjj = bj + bi[brow];
300: baj = ba + bi[brow];
301: /* perform dense axpy */
302: valtmp = aa[j];
303: for (k = 0; k < bnzi; k++) ab_dense[bjj[k]] += valtmp * baj[k];
304: flops += 2 * bnzi;
305: }
306: aj += anzi;
307: aa += anzi;
309: cnzi = ci[i + 1] - ci[i];
310: for (k = 0; k < cnzi; k++) {
311: ca[k] += ab_dense[cj[k]];
312: ab_dense[cj[k]] = 0.0; /* zero ab_dense */
313: }
314: flops += cnzi;
315: cj += cnzi;
316: ca += cnzi;
317: }
318: #if defined(PETSC_HAVE_DEVICE)
319: if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU;
320: #endif
321: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
322: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
323: PetscCall(PetscLogFlops(flops));
324: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
325: PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
326: PetscFunctionReturn(PETSC_SUCCESS);
327: }
329: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A, Mat B, Mat C)
330: {
331: PetscLogDouble flops = 0.0;
332: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
333: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
334: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
335: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bjj, *ci = c->i, *cj = c->j;
336: PetscInt am = A->rmap->N, cm = C->rmap->N;
337: PetscInt i, j, k, anzi, bnzi, cnzi, brow;
338: PetscScalar *ca = c->a, valtmp;
339: const PetscScalar *aa, *ba, *baj;
340: PetscInt nextb;
342: PetscFunctionBegin;
343: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
344: PetscCall(MatSeqAIJGetArrayRead(B, &ba));
345: if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
346: PetscCall(PetscMalloc1(ci[cm] + 1, &ca));
347: c->a = ca;
348: c->free_a = PETSC_TRUE;
349: }
351: /* clean old values in C */
352: PetscCall(PetscArrayzero(ca, ci[cm]));
353: /* Traverse A row-wise. */
354: /* Build the ith row in C by summing over nonzero columns in A, */
355: /* the rows of B corresponding to nonzeros of A. */
356: for (i = 0; i < am; i++) {
357: anzi = ai[i + 1] - ai[i];
358: cnzi = ci[i + 1] - ci[i];
359: for (j = 0; j < anzi; j++) {
360: brow = aj[j];
361: bnzi = bi[brow + 1] - bi[brow];
362: bjj = bj + bi[brow];
363: baj = ba + bi[brow];
364: /* perform sparse axpy */
365: valtmp = aa[j];
366: nextb = 0;
367: for (k = 0; nextb < bnzi; k++) {
368: if (cj[k] == bjj[nextb]) { /* ccol == bcol */
369: ca[k] += valtmp * baj[nextb++];
370: }
371: }
372: flops += 2 * bnzi;
373: }
374: aj += anzi;
375: aa += anzi;
376: cj += cnzi;
377: ca += cnzi;
378: }
379: #if defined(PETSC_HAVE_DEVICE)
380: if (C->offloadmask != PETSC_OFFLOAD_UNALLOCATED) C->offloadmask = PETSC_OFFLOAD_CPU;
381: #endif
382: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
383: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
384: PetscCall(PetscLogFlops(flops));
385: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
386: PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A, Mat B, PetscReal fill, Mat C)
391: {
392: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
393: PetscInt *ai = a->i, *bi = b->i, *ci, *cj;
394: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
395: MatScalar *ca;
396: PetscReal afill;
397: PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax;
398: PetscHMapI ta;
399: PetscFreeSpaceList free_space = NULL, current_space = NULL;
401: PetscFunctionBegin;
402: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
403: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
404: PetscCall(PetscMalloc1(am + 2, &ci));
405: ci[0] = 0;
407: /* create and initialize a linked list */
408: PetscCall(PetscHMapICreateWithSize(bn, &ta));
409: MatRowMergeMax_SeqAIJ(b, bm, ta);
410: PetscCall(PetscHMapIGetSize(ta, &Crmax));
411: PetscCall(PetscHMapIDestroy(&ta));
413: PetscCall(PetscLLCondensedCreate_fast(Crmax, &lnk));
415: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
416: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space));
417: current_space = free_space;
419: /* Determine ci and cj */
420: for (i = 0; i < am; i++) {
421: anzi = ai[i + 1] - ai[i];
422: aj = a->j + ai[i];
423: for (j = 0; j < anzi; j++) {
424: brow = aj[j];
425: bnzj = bi[brow + 1] - bi[brow];
426: bj = b->j + bi[brow];
427: /* add non-zero cols of B into the sorted linked list lnk */
428: PetscCall(PetscLLCondensedAddSorted_fast(bnzj, bj, lnk));
429: }
430: /* add possible missing diagonal entry */
431: if (C->force_diagonals) PetscCall(PetscLLCondensedAddSorted_fast(1, &i, lnk));
432: cnzi = lnk[1];
434: /* If free space is not available, make more free space */
435: /* Double the amount of total space in the list */
436: if (current_space->local_remaining < cnzi) {
437: PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
438: ndouble++;
439: }
441: /* Copy data into free space, then initialize lnk */
442: PetscCall(PetscLLCondensedClean_fast(cnzi, current_space->array, lnk));
444: current_space->array += cnzi;
445: current_space->local_used += cnzi;
446: current_space->local_remaining -= cnzi;
448: ci[i + 1] = ci[i] + cnzi;
449: }
451: /* Column indices are in the list of free space */
452: /* Allocate space for cj, initialize cj, and */
453: /* destroy list of free space and other temporary array(s) */
454: PetscCall(PetscMalloc1(ci[am] + 1, &cj));
455: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
456: PetscCall(PetscLLCondensedDestroy_fast(lnk));
458: /* Allocate space for ca */
459: PetscCall(PetscCalloc1(ci[am] + 1, &ca));
461: /* put together the new symbolic matrix */
462: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, ca, ((PetscObject)A)->type_name, C));
463: PetscCall(MatSetBlockSizesFromMats(C, A, B));
465: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
466: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
467: c = (Mat_SeqAIJ *)(C->data);
468: c->free_a = PETSC_TRUE;
469: c->free_ij = PETSC_TRUE;
470: c->nonew = 0;
472: /* slower, less memory */
473: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
475: /* set MatInfo */
476: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
477: if (afill < 1.0) afill = 1.0;
478: C->info.mallocs = ndouble;
479: C->info.fill_ratio_given = fill;
480: C->info.fill_ratio_needed = afill;
482: #if defined(PETSC_USE_INFO)
483: if (ci[am]) {
484: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
485: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
486: } else {
487: PetscCall(PetscInfo(C, "Empty matrix product\n"));
488: }
489: #endif
490: PetscFunctionReturn(PETSC_SUCCESS);
491: }
493: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A, Mat B, PetscReal fill, Mat C)
494: {
495: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
496: PetscInt *ai = a->i, *bi = b->i, *ci, *cj;
497: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
498: MatScalar *ca;
499: PetscReal afill;
500: PetscInt i, j, anzi, brow, bnzj, cnzi, *bj, *aj, *lnk, ndouble = 0, Crmax;
501: PetscHMapI ta;
502: PetscFreeSpaceList free_space = NULL, current_space = NULL;
504: PetscFunctionBegin;
505: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
506: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
507: PetscCall(PetscMalloc1(am + 2, &ci));
508: ci[0] = 0;
510: /* create and initialize a linked list */
511: PetscCall(PetscHMapICreateWithSize(bn, &ta));
512: MatRowMergeMax_SeqAIJ(b, bm, ta);
513: PetscCall(PetscHMapIGetSize(ta, &Crmax));
514: PetscCall(PetscHMapIDestroy(&ta));
515: PetscCall(PetscLLCondensedCreate_Scalable(Crmax, &lnk));
517: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
518: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space));
519: current_space = free_space;
521: /* Determine ci and cj */
522: for (i = 0; i < am; i++) {
523: anzi = ai[i + 1] - ai[i];
524: aj = a->j + ai[i];
525: for (j = 0; j < anzi; j++) {
526: brow = aj[j];
527: bnzj = bi[brow + 1] - bi[brow];
528: bj = b->j + bi[brow];
529: /* add non-zero cols of B into the sorted linked list lnk */
530: PetscCall(PetscLLCondensedAddSorted_Scalable(bnzj, bj, lnk));
531: }
532: /* add possible missing diagonal entry */
533: if (C->force_diagonals) PetscCall(PetscLLCondensedAddSorted_Scalable(1, &i, lnk));
535: cnzi = lnk[0];
537: /* If free space is not available, make more free space */
538: /* Double the amount of total space in the list */
539: if (current_space->local_remaining < cnzi) {
540: PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space));
541: ndouble++;
542: }
544: /* Copy data into free space, then initialize lnk */
545: PetscCall(PetscLLCondensedClean_Scalable(cnzi, current_space->array, lnk));
547: current_space->array += cnzi;
548: current_space->local_used += cnzi;
549: current_space->local_remaining -= cnzi;
551: ci[i + 1] = ci[i] + cnzi;
552: }
554: /* Column indices are in the list of free space */
555: /* Allocate space for cj, initialize cj, and */
556: /* destroy list of free space and other temporary array(s) */
557: PetscCall(PetscMalloc1(ci[am] + 1, &cj));
558: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
559: PetscCall(PetscLLCondensedDestroy_Scalable(lnk));
561: /* Allocate space for ca */
562: PetscCall(PetscCalloc1(ci[am] + 1, &ca));
564: /* put together the new symbolic matrix */
565: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, ca, ((PetscObject)A)->type_name, C));
566: PetscCall(MatSetBlockSizesFromMats(C, A, B));
568: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
569: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
570: c = (Mat_SeqAIJ *)(C->data);
571: c->free_a = PETSC_TRUE;
572: c->free_ij = PETSC_TRUE;
573: c->nonew = 0;
575: /* slower, less memory */
576: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
578: /* set MatInfo */
579: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
580: if (afill < 1.0) afill = 1.0;
581: C->info.mallocs = ndouble;
582: C->info.fill_ratio_given = fill;
583: C->info.fill_ratio_needed = afill;
585: #if defined(PETSC_USE_INFO)
586: if (ci[am]) {
587: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
588: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
589: } else {
590: PetscCall(PetscInfo(C, "Empty matrix product\n"));
591: }
592: #endif
593: PetscFunctionReturn(PETSC_SUCCESS);
594: }
596: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A, Mat B, PetscReal fill, Mat C)
597: {
598: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
599: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j;
600: PetscInt *ci, *cj, *bb;
601: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
602: PetscReal afill;
603: PetscInt i, j, col, ndouble = 0;
604: PetscFreeSpaceList free_space = NULL, current_space = NULL;
605: PetscHeap h;
607: PetscFunctionBegin;
608: /* Get ci and cj - by merging sorted rows using a heap */
609: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
610: PetscCall(PetscMalloc1(am + 2, &ci));
611: ci[0] = 0;
613: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
614: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space));
615: current_space = free_space;
617: PetscCall(PetscHeapCreate(a->rmax, &h));
618: PetscCall(PetscMalloc1(a->rmax, &bb));
620: /* Determine ci and cj */
621: for (i = 0; i < am; i++) {
622: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
623: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
624: ci[i + 1] = ci[i];
625: /* Populate the min heap */
626: for (j = 0; j < anzi; j++) {
627: bb[j] = bi[acol[j]]; /* bb points at the start of the row */
628: if (bb[j] < bi[acol[j] + 1]) { /* Add if row is nonempty */
629: PetscCall(PetscHeapAdd(h, j, bj[bb[j]++]));
630: }
631: }
632: /* Pick off the min element, adding it to free space */
633: PetscCall(PetscHeapPop(h, &j, &col));
634: while (j >= 0) {
635: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
636: PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2, current_space->total_array_size), 16 << 20), ¤t_space));
637: ndouble++;
638: }
639: *(current_space->array++) = col;
640: current_space->local_used++;
641: current_space->local_remaining--;
642: ci[i + 1]++;
644: /* stash if anything else remains in this row of B */
645: if (bb[j] < bi[acol[j] + 1]) PetscCall(PetscHeapStash(h, j, bj[bb[j]++]));
646: while (1) { /* pop and stash any other rows of B that also had an entry in this column */
647: PetscInt j2, col2;
648: PetscCall(PetscHeapPeek(h, &j2, &col2));
649: if (col2 != col) break;
650: PetscCall(PetscHeapPop(h, &j2, &col2));
651: if (bb[j2] < bi[acol[j2] + 1]) PetscCall(PetscHeapStash(h, j2, bj[bb[j2]++]));
652: }
653: /* Put any stashed elements back into the min heap */
654: PetscCall(PetscHeapUnstash(h));
655: PetscCall(PetscHeapPop(h, &j, &col));
656: }
657: }
658: PetscCall(PetscFree(bb));
659: PetscCall(PetscHeapDestroy(&h));
661: /* Column indices are in the list of free space */
662: /* Allocate space for cj, initialize cj, and */
663: /* destroy list of free space and other temporary array(s) */
664: PetscCall(PetscMalloc1(ci[am], &cj));
665: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
667: /* put together the new symbolic matrix */
668: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C));
669: PetscCall(MatSetBlockSizesFromMats(C, A, B));
671: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
672: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
673: c = (Mat_SeqAIJ *)(C->data);
674: c->free_a = PETSC_TRUE;
675: c->free_ij = PETSC_TRUE;
676: c->nonew = 0;
678: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
680: /* set MatInfo */
681: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
682: if (afill < 1.0) afill = 1.0;
683: C->info.mallocs = ndouble;
684: C->info.fill_ratio_given = fill;
685: C->info.fill_ratio_needed = afill;
687: #if defined(PETSC_USE_INFO)
688: if (ci[am]) {
689: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
690: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
691: } else {
692: PetscCall(PetscInfo(C, "Empty matrix product\n"));
693: }
694: #endif
695: PetscFunctionReturn(PETSC_SUCCESS);
696: }
698: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A, Mat B, PetscReal fill, Mat C)
699: {
700: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
701: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j;
702: PetscInt *ci, *cj, *bb;
703: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
704: PetscReal afill;
705: PetscInt i, j, col, ndouble = 0;
706: PetscFreeSpaceList free_space = NULL, current_space = NULL;
707: PetscHeap h;
708: PetscBT bt;
710: PetscFunctionBegin;
711: /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
712: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
713: PetscCall(PetscMalloc1(am + 2, &ci));
714: ci[0] = 0;
716: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
717: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am], bi[bm])), &free_space));
719: current_space = free_space;
721: PetscCall(PetscHeapCreate(a->rmax, &h));
722: PetscCall(PetscMalloc1(a->rmax, &bb));
723: PetscCall(PetscBTCreate(bn, &bt));
725: /* Determine ci and cj */
726: for (i = 0; i < am; i++) {
727: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
728: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
729: const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
730: ci[i + 1] = ci[i];
731: /* Populate the min heap */
732: for (j = 0; j < anzi; j++) {
733: PetscInt brow = acol[j];
734: for (bb[j] = bi[brow]; bb[j] < bi[brow + 1]; bb[j]++) {
735: PetscInt bcol = bj[bb[j]];
736: if (!PetscBTLookupSet(bt, bcol)) { /* new entry */
737: PetscCall(PetscHeapAdd(h, j, bcol));
738: bb[j]++;
739: break;
740: }
741: }
742: }
743: /* Pick off the min element, adding it to free space */
744: PetscCall(PetscHeapPop(h, &j, &col));
745: while (j >= 0) {
746: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
747: fptr = NULL; /* need PetscBTMemzero */
748: PetscCall(PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2, current_space->total_array_size), 16 << 20), ¤t_space));
749: ndouble++;
750: }
751: *(current_space->array++) = col;
752: current_space->local_used++;
753: current_space->local_remaining--;
754: ci[i + 1]++;
756: /* stash if anything else remains in this row of B */
757: for (; bb[j] < bi[acol[j] + 1]; bb[j]++) {
758: PetscInt bcol = bj[bb[j]];
759: if (!PetscBTLookupSet(bt, bcol)) { /* new entry */
760: PetscCall(PetscHeapAdd(h, j, bcol));
761: bb[j]++;
762: break;
763: }
764: }
765: PetscCall(PetscHeapPop(h, &j, &col));
766: }
767: if (fptr) { /* Clear the bits for this row */
768: for (; fptr < current_space->array; fptr++) PetscCall(PetscBTClear(bt, *fptr));
769: } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
770: PetscCall(PetscBTMemzero(bn, bt));
771: }
772: }
773: PetscCall(PetscFree(bb));
774: PetscCall(PetscHeapDestroy(&h));
775: PetscCall(PetscBTDestroy(&bt));
777: /* Column indices are in the list of free space */
778: /* Allocate space for cj, initialize cj, and */
779: /* destroy list of free space and other temporary array(s) */
780: PetscCall(PetscMalloc1(ci[am], &cj));
781: PetscCall(PetscFreeSpaceContiguous(&free_space, cj));
783: /* put together the new symbolic matrix */
784: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C));
785: PetscCall(MatSetBlockSizesFromMats(C, A, B));
787: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
788: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
789: c = (Mat_SeqAIJ *)(C->data);
790: c->free_a = PETSC_TRUE;
791: c->free_ij = PETSC_TRUE;
792: c->nonew = 0;
794: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
796: /* set MatInfo */
797: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
798: if (afill < 1.0) afill = 1.0;
799: C->info.mallocs = ndouble;
800: C->info.fill_ratio_given = fill;
801: C->info.fill_ratio_needed = afill;
803: #if defined(PETSC_USE_INFO)
804: if (ci[am]) {
805: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
806: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
807: } else {
808: PetscCall(PetscInfo(C, "Empty matrix product\n"));
809: }
810: #endif
811: PetscFunctionReturn(PETSC_SUCCESS);
812: }
814: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A, Mat B, PetscReal fill, Mat C)
815: {
816: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
817: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j, *inputi, *inputj, *inputcol, *inputcol_L1;
818: PetscInt *ci, *cj, *outputj, worki_L1[9], worki_L2[9];
819: PetscInt c_maxmem, a_maxrownnz = 0, a_rownnz;
820: const PetscInt workcol[8] = {0, 1, 2, 3, 4, 5, 6, 7};
821: const PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
822: const PetscInt *brow_ptr[8], *brow_end[8];
823: PetscInt window[8];
824: PetscInt window_min, old_window_min, ci_nnz, outputi_nnz = 0, L1_nrows, L2_nrows;
825: PetscInt i, k, ndouble = 0, L1_rowsleft, rowsleft;
826: PetscReal afill;
827: PetscInt *workj_L1, *workj_L2, *workj_L3;
828: PetscInt L1_nnz, L2_nnz;
830: /* Step 1: Get upper bound on memory required for allocation.
831: Because of the way virtual memory works,
832: only the memory pages that are actually needed will be physically allocated. */
833: PetscFunctionBegin;
834: PetscCall(PetscMalloc1(am + 1, &ci));
835: for (i = 0; i < am; i++) {
836: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
837: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
838: a_rownnz = 0;
839: for (k = 0; k < anzi; ++k) {
840: a_rownnz += bi[acol[k] + 1] - bi[acol[k]];
841: if (a_rownnz > bn) {
842: a_rownnz = bn;
843: break;
844: }
845: }
846: a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
847: }
848: /* temporary work areas for merging rows */
849: PetscCall(PetscMalloc1(a_maxrownnz * 8, &workj_L1));
850: PetscCall(PetscMalloc1(a_maxrownnz * 8, &workj_L2));
851: PetscCall(PetscMalloc1(a_maxrownnz, &workj_L3));
853: /* This should be enough for almost all matrices. If not, memory is reallocated later. */
854: c_maxmem = 8 * (ai[am] + bi[bm]);
855: /* Step 2: Populate pattern for C */
856: PetscCall(PetscMalloc1(c_maxmem, &cj));
858: ci_nnz = 0;
859: ci[0] = 0;
860: worki_L1[0] = 0;
861: worki_L2[0] = 0;
862: for (i = 0; i < am; i++) {
863: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
864: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
865: rowsleft = anzi;
866: inputcol_L1 = acol;
867: L2_nnz = 0;
868: L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */
869: worki_L2[1] = 0;
870: outputi_nnz = 0;
872: /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */
873: while (ci_nnz + a_maxrownnz > c_maxmem) {
874: c_maxmem *= 2;
875: ndouble++;
876: PetscCall(PetscRealloc(sizeof(PetscInt) * c_maxmem, &cj));
877: }
879: while (rowsleft) {
880: L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
881: L1_nrows = 0;
882: L1_nnz = 0;
883: inputcol = inputcol_L1;
884: inputi = bi;
885: inputj = bj;
887: /* The following macro is used to specialize for small rows in A.
888: This helps with compiler unrolling, improving performance substantially.
889: Input: inputj inputi inputcol bn
890: Output: outputj outputi_nnz */
891: #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \
892: do { \
893: window_min = bn; \
894: outputi_nnz = 0; \
895: for (k = 0; k < ANNZ; ++k) { \
896: brow_ptr[k] = inputj + inputi[inputcol[k]]; \
897: brow_end[k] = inputj + inputi[inputcol[k] + 1]; \
898: window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
899: window_min = PetscMin(window[k], window_min); \
900: } \
901: while (window_min < bn) { \
902: outputj[outputi_nnz++] = window_min; \
903: /* advance front and compute new minimum */ \
904: old_window_min = window_min; \
905: window_min = bn; \
906: for (k = 0; k < ANNZ; ++k) { \
907: if (window[k] == old_window_min) { \
908: brow_ptr[k]++; \
909: window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
910: } \
911: window_min = PetscMin(window[k], window_min); \
912: } \
913: } \
914: } while (0)
916: /************** L E V E L 1 ***************/
917: /* Merge up to 8 rows of B to L1 work array*/
918: while (L1_rowsleft) {
919: outputi_nnz = 0;
920: if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/
921: else outputj = cj + ci_nnz; /* Merge directly to C */
923: switch (L1_rowsleft) {
924: case 1:
925: brow_ptr[0] = inputj + inputi[inputcol[0]];
926: brow_end[0] = inputj + inputi[inputcol[0] + 1];
927: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
928: inputcol += L1_rowsleft;
929: rowsleft -= L1_rowsleft;
930: L1_rowsleft = 0;
931: break;
932: case 2:
933: MatMatMultSymbolic_RowMergeMacro(2);
934: inputcol += L1_rowsleft;
935: rowsleft -= L1_rowsleft;
936: L1_rowsleft = 0;
937: break;
938: case 3:
939: MatMatMultSymbolic_RowMergeMacro(3);
940: inputcol += L1_rowsleft;
941: rowsleft -= L1_rowsleft;
942: L1_rowsleft = 0;
943: break;
944: case 4:
945: MatMatMultSymbolic_RowMergeMacro(4);
946: inputcol += L1_rowsleft;
947: rowsleft -= L1_rowsleft;
948: L1_rowsleft = 0;
949: break;
950: case 5:
951: MatMatMultSymbolic_RowMergeMacro(5);
952: inputcol += L1_rowsleft;
953: rowsleft -= L1_rowsleft;
954: L1_rowsleft = 0;
955: break;
956: case 6:
957: MatMatMultSymbolic_RowMergeMacro(6);
958: inputcol += L1_rowsleft;
959: rowsleft -= L1_rowsleft;
960: L1_rowsleft = 0;
961: break;
962: case 7:
963: MatMatMultSymbolic_RowMergeMacro(7);
964: inputcol += L1_rowsleft;
965: rowsleft -= L1_rowsleft;
966: L1_rowsleft = 0;
967: break;
968: default:
969: MatMatMultSymbolic_RowMergeMacro(8);
970: inputcol += 8;
971: rowsleft -= 8;
972: L1_rowsleft -= 8;
973: break;
974: }
975: inputcol_L1 = inputcol;
976: L1_nnz += outputi_nnz;
977: worki_L1[++L1_nrows] = L1_nnz;
978: }
980: /********************** L E V E L 2 ************************/
981: /* Merge from L1 work array to either C or to L2 work array */
982: if (anzi > 8) {
983: inputi = worki_L1;
984: inputj = workj_L1;
985: inputcol = workcol;
986: outputi_nnz = 0;
988: if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */
989: else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */
991: switch (L1_nrows) {
992: case 1:
993: brow_ptr[0] = inputj + inputi[inputcol[0]];
994: brow_end[0] = inputj + inputi[inputcol[0] + 1];
995: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
996: break;
997: case 2:
998: MatMatMultSymbolic_RowMergeMacro(2);
999: break;
1000: case 3:
1001: MatMatMultSymbolic_RowMergeMacro(3);
1002: break;
1003: case 4:
1004: MatMatMultSymbolic_RowMergeMacro(4);
1005: break;
1006: case 5:
1007: MatMatMultSymbolic_RowMergeMacro(5);
1008: break;
1009: case 6:
1010: MatMatMultSymbolic_RowMergeMacro(6);
1011: break;
1012: case 7:
1013: MatMatMultSymbolic_RowMergeMacro(7);
1014: break;
1015: case 8:
1016: MatMatMultSymbolic_RowMergeMacro(8);
1017: break;
1018: default:
1019: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
1020: }
1021: L2_nnz += outputi_nnz;
1022: worki_L2[++L2_nrows] = L2_nnz;
1024: /************************ L E V E L 3 **********************/
1025: /* Merge from L2 work array to either C or to L2 work array */
1026: if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
1027: inputi = worki_L2;
1028: inputj = workj_L2;
1029: inputcol = workcol;
1030: outputi_nnz = 0;
1031: if (rowsleft) outputj = workj_L3;
1032: else outputj = cj + ci_nnz;
1033: switch (L2_nrows) {
1034: case 1:
1035: brow_ptr[0] = inputj + inputi[inputcol[0]];
1036: brow_end[0] = inputj + inputi[inputcol[0] + 1];
1037: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
1038: break;
1039: case 2:
1040: MatMatMultSymbolic_RowMergeMacro(2);
1041: break;
1042: case 3:
1043: MatMatMultSymbolic_RowMergeMacro(3);
1044: break;
1045: case 4:
1046: MatMatMultSymbolic_RowMergeMacro(4);
1047: break;
1048: case 5:
1049: MatMatMultSymbolic_RowMergeMacro(5);
1050: break;
1051: case 6:
1052: MatMatMultSymbolic_RowMergeMacro(6);
1053: break;
1054: case 7:
1055: MatMatMultSymbolic_RowMergeMacro(7);
1056: break;
1057: case 8:
1058: MatMatMultSymbolic_RowMergeMacro(8);
1059: break;
1060: default:
1061: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1062: }
1063: L2_nrows = 1;
1064: L2_nnz = outputi_nnz;
1065: worki_L2[1] = outputi_nnz;
1066: /* Copy to workj_L2 */
1067: if (rowsleft) {
1068: for (k = 0; k < outputi_nnz; ++k) workj_L2[k] = outputj[k];
1069: }
1070: }
1071: }
1072: } /* while (rowsleft) */
1073: #undef MatMatMultSymbolic_RowMergeMacro
1075: /* terminate current row */
1076: ci_nnz += outputi_nnz;
1077: ci[i + 1] = ci_nnz;
1078: }
1080: /* Step 3: Create the new symbolic matrix */
1081: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C));
1082: PetscCall(MatSetBlockSizesFromMats(C, A, B));
1084: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1085: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1086: c = (Mat_SeqAIJ *)(C->data);
1087: c->free_a = PETSC_TRUE;
1088: c->free_ij = PETSC_TRUE;
1089: c->nonew = 0;
1091: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1093: /* set MatInfo */
1094: afill = (PetscReal)ci[am] / (ai[am] + bi[bm]) + 1.e-5;
1095: if (afill < 1.0) afill = 1.0;
1096: C->info.mallocs = ndouble;
1097: C->info.fill_ratio_given = fill;
1098: C->info.fill_ratio_needed = afill;
1100: #if defined(PETSC_USE_INFO)
1101: if (ci[am]) {
1102: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
1103: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
1104: } else {
1105: PetscCall(PetscInfo(C, "Empty matrix product\n"));
1106: }
1107: #endif
1109: /* Step 4: Free temporary work areas */
1110: PetscCall(PetscFree(workj_L1));
1111: PetscCall(PetscFree(workj_L2));
1112: PetscCall(PetscFree(workj_L3));
1113: PetscFunctionReturn(PETSC_SUCCESS);
1114: }
1116: /* concatenate unique entries and then sort */
1117: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A, Mat B, PetscReal fill, Mat C)
1118: {
1119: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c;
1120: const PetscInt *ai = a->i, *bi = b->i, *aj = a->j, *bj = b->j;
1121: PetscInt *ci, *cj, bcol;
1122: PetscInt am = A->rmap->N, bn = B->cmap->N, bm = B->rmap->N;
1123: PetscReal afill;
1124: PetscInt i, j, ndouble = 0;
1125: PetscSegBuffer seg, segrow;
1126: char *seen;
1128: PetscFunctionBegin;
1129: PetscCall(PetscMalloc1(am + 1, &ci));
1130: ci[0] = 0;
1132: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1133: PetscCall(PetscSegBufferCreate(sizeof(PetscInt), (PetscInt)(fill * (ai[am] + bi[bm])), &seg));
1134: PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 100, &segrow));
1135: PetscCall(PetscCalloc1(bn, &seen));
1137: /* Determine ci and cj */
1138: for (i = 0; i < am; i++) {
1139: const PetscInt anzi = ai[i + 1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1140: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1141: PetscInt packlen = 0, *PETSC_RESTRICT crow;
1143: /* Pack segrow */
1144: for (j = 0; j < anzi; j++) {
1145: PetscInt brow = acol[j], bjstart = bi[brow], bjend = bi[brow + 1], k;
1146: for (k = bjstart; k < bjend; k++) {
1147: bcol = bj[k];
1148: if (!seen[bcol]) { /* new entry */
1149: PetscInt *PETSC_RESTRICT slot;
1150: PetscCall(PetscSegBufferGetInts(segrow, 1, &slot));
1151: *slot = bcol;
1152: seen[bcol] = 1;
1153: packlen++;
1154: }
1155: }
1156: }
1158: /* Check i-th diagonal entry */
1159: if (C->force_diagonals && !seen[i]) {
1160: PetscInt *PETSC_RESTRICT slot;
1161: PetscCall(PetscSegBufferGetInts(segrow, 1, &slot));
1162: *slot = i;
1163: seen[i] = 1;
1164: packlen++;
1165: }
1167: PetscCall(PetscSegBufferGetInts(seg, packlen, &crow));
1168: PetscCall(PetscSegBufferExtractTo(segrow, crow));
1169: PetscCall(PetscSortInt(packlen, crow));
1170: ci[i + 1] = ci[i] + packlen;
1171: for (j = 0; j < packlen; j++) seen[crow[j]] = 0;
1172: }
1173: PetscCall(PetscSegBufferDestroy(&segrow));
1174: PetscCall(PetscFree(seen));
1176: /* Column indices are in the segmented buffer */
1177: PetscCall(PetscSegBufferExtractAlloc(seg, &cj));
1178: PetscCall(PetscSegBufferDestroy(&seg));
1180: /* put together the new symbolic matrix */
1181: PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), am, bn, ci, cj, NULL, ((PetscObject)A)->type_name, C));
1182: PetscCall(MatSetBlockSizesFromMats(C, A, B));
1184: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1185: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1186: c = (Mat_SeqAIJ *)(C->data);
1187: c->free_a = PETSC_TRUE;
1188: c->free_ij = PETSC_TRUE;
1189: c->nonew = 0;
1191: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1193: /* set MatInfo */
1194: afill = (PetscReal)ci[am] / PetscMax(ai[am] + bi[bm], 1) + 1.e-5;
1195: if (afill < 1.0) afill = 1.0;
1196: C->info.mallocs = ndouble;
1197: C->info.fill_ratio_given = fill;
1198: C->info.fill_ratio_needed = afill;
1200: #if defined(PETSC_USE_INFO)
1201: if (ci[am]) {
1202: PetscCall(PetscInfo(C, "Reallocs %" PetscInt_FMT "; Fill ratio: given %g needed %g.\n", ndouble, (double)fill, (double)afill));
1203: PetscCall(PetscInfo(C, "Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n", (double)afill));
1204: } else {
1205: PetscCall(PetscInfo(C, "Empty matrix product\n"));
1206: }
1207: #endif
1208: PetscFunctionReturn(PETSC_SUCCESS);
1209: }
1211: static PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data)
1212: {
1213: Mat_MatMatTransMult *abt = (Mat_MatMatTransMult *)data;
1215: PetscFunctionBegin;
1216: PetscCall(MatTransposeColoringDestroy(&abt->matcoloring));
1217: PetscCall(MatDestroy(&abt->Bt_den));
1218: PetscCall(MatDestroy(&abt->ABt_den));
1219: PetscCall(PetscFree(abt));
1220: PetscFunctionReturn(PETSC_SUCCESS);
1221: }
1223: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C)
1224: {
1225: Mat Bt;
1226: Mat_MatMatTransMult *abt;
1227: Mat_Product *product = C->product;
1228: char *alg;
1230: PetscFunctionBegin;
1231: PetscCheck(product, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct");
1232: PetscCheck(!product->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Extra product struct not empty");
1234: /* create symbolic Bt */
1235: PetscCall(MatTransposeSymbolic(B, &Bt));
1236: PetscCall(MatSetBlockSizes(Bt, PetscAbs(A->cmap->bs), PetscAbs(B->cmap->bs)));
1237: PetscCall(MatSetType(Bt, ((PetscObject)A)->type_name));
1239: /* get symbolic C=A*Bt */
1240: PetscCall(PetscStrallocpy(product->alg, &alg));
1241: PetscCall(MatProductSetAlgorithm(C, "sorted")); /* set algorithm for C = A*Bt */
1242: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(A, Bt, fill, C));
1243: PetscCall(MatProductSetAlgorithm(C, alg)); /* resume original algorithm for ABt product */
1244: PetscCall(PetscFree(alg));
1246: /* create a supporting struct for reuse intermediate dense matrices with matcoloring */
1247: PetscCall(PetscNew(&abt));
1249: product->data = abt;
1250: product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans;
1252: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
1254: abt->usecoloring = PETSC_FALSE;
1255: PetscCall(PetscStrcmp(product->alg, "color", &abt->usecoloring));
1256: if (abt->usecoloring) {
1257: /* Create MatTransposeColoring from symbolic C=A*B^T */
1258: MatTransposeColoring matcoloring;
1259: MatColoring coloring;
1260: ISColoring iscoloring;
1261: Mat Bt_dense, C_dense;
1263: /* inode causes memory problem */
1264: PetscCall(MatSetOption(C, MAT_USE_INODES, PETSC_FALSE));
1266: PetscCall(MatColoringCreate(C, &coloring));
1267: PetscCall(MatColoringSetDistance(coloring, 2));
1268: PetscCall(MatColoringSetType(coloring, MATCOLORINGSL));
1269: PetscCall(MatColoringSetFromOptions(coloring));
1270: PetscCall(MatColoringApply(coloring, &iscoloring));
1271: PetscCall(MatColoringDestroy(&coloring));
1272: PetscCall(MatTransposeColoringCreate(C, iscoloring, &matcoloring));
1274: abt->matcoloring = matcoloring;
1276: PetscCall(ISColoringDestroy(&iscoloring));
1278: /* Create Bt_dense and C_dense = A*Bt_dense */
1279: PetscCall(MatCreate(PETSC_COMM_SELF, &Bt_dense));
1280: PetscCall(MatSetSizes(Bt_dense, A->cmap->n, matcoloring->ncolors, A->cmap->n, matcoloring->ncolors));
1281: PetscCall(MatSetType(Bt_dense, MATSEQDENSE));
1282: PetscCall(MatSeqDenseSetPreallocation(Bt_dense, NULL));
1284: Bt_dense->assembled = PETSC_TRUE;
1285: abt->Bt_den = Bt_dense;
1287: PetscCall(MatCreate(PETSC_COMM_SELF, &C_dense));
1288: PetscCall(MatSetSizes(C_dense, A->rmap->n, matcoloring->ncolors, A->rmap->n, matcoloring->ncolors));
1289: PetscCall(MatSetType(C_dense, MATSEQDENSE));
1290: PetscCall(MatSeqDenseSetPreallocation(C_dense, NULL));
1292: Bt_dense->assembled = PETSC_TRUE;
1293: abt->ABt_den = C_dense;
1295: #if defined(PETSC_USE_INFO)
1296: {
1297: Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
1298: PetscCall(PetscInfo(C, "Use coloring of C=A*B^T; B^T: %" PetscInt_FMT " %" PetscInt_FMT ", Bt_dense: %" PetscInt_FMT ",%" PetscInt_FMT "; Cnz %" PetscInt_FMT " / (cm*ncolors %" PetscInt_FMT ") = %g\n", B->cmap->n, B->rmap->n, Bt_dense->rmap->n,
1299: Bt_dense->cmap->n, c->nz, A->rmap->n * matcoloring->ncolors, (double)(((PetscReal)(c->nz)) / ((PetscReal)(A->rmap->n * matcoloring->ncolors)))));
1300: }
1301: #endif
1302: }
1303: /* clean up */
1304: PetscCall(MatDestroy(&Bt));
1305: PetscFunctionReturn(PETSC_SUCCESS);
1306: }
1308: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C)
1309: {
1310: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c = (Mat_SeqAIJ *)C->data;
1311: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, anzi, bnzj, nexta, nextb, *acol, *bcol, brow;
1312: PetscInt cm = C->rmap->n, *ci = c->i, *cj = c->j, i, j, cnzi, *ccol;
1313: PetscLogDouble flops = 0.0;
1314: MatScalar *aa = a->a, *aval, *ba = b->a, *bval, *ca, *cval;
1315: Mat_MatMatTransMult *abt;
1316: Mat_Product *product = C->product;
1318: PetscFunctionBegin;
1319: PetscCheck(product, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct");
1320: abt = (Mat_MatMatTransMult *)product->data;
1321: PetscCheck(abt, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct");
1322: /* clear old values in C */
1323: if (!c->a) {
1324: PetscCall(PetscCalloc1(ci[cm] + 1, &ca));
1325: c->a = ca;
1326: c->free_a = PETSC_TRUE;
1327: } else {
1328: ca = c->a;
1329: PetscCall(PetscArrayzero(ca, ci[cm] + 1));
1330: }
1332: if (abt->usecoloring) {
1333: MatTransposeColoring matcoloring = abt->matcoloring;
1334: Mat Bt_dense, C_dense = abt->ABt_den;
1336: /* Get Bt_dense by Apply MatTransposeColoring to B */
1337: Bt_dense = abt->Bt_den;
1338: PetscCall(MatTransColoringApplySpToDen(matcoloring, B, Bt_dense));
1340: /* C_dense = A*Bt_dense */
1341: PetscCall(MatMatMultNumeric_SeqAIJ_SeqDense(A, Bt_dense, C_dense));
1343: /* Recover C from C_dense */
1344: PetscCall(MatTransColoringApplyDenToSp(matcoloring, C_dense, C));
1345: PetscFunctionReturn(PETSC_SUCCESS);
1346: }
1348: for (i = 0; i < cm; i++) {
1349: anzi = ai[i + 1] - ai[i];
1350: acol = aj + ai[i];
1351: aval = aa + ai[i];
1352: cnzi = ci[i + 1] - ci[i];
1353: ccol = cj + ci[i];
1354: cval = ca + ci[i];
1355: for (j = 0; j < cnzi; j++) {
1356: brow = ccol[j];
1357: bnzj = bi[brow + 1] - bi[brow];
1358: bcol = bj + bi[brow];
1359: bval = ba + bi[brow];
1361: /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1362: nexta = 0;
1363: nextb = 0;
1364: while (nexta < anzi && nextb < bnzj) {
1365: while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1366: if (nexta == anzi) break;
1367: while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1368: if (nextb == bnzj) break;
1369: if (acol[nexta] == bcol[nextb]) {
1370: cval[j] += aval[nexta] * bval[nextb];
1371: nexta++;
1372: nextb++;
1373: flops += 2;
1374: }
1375: }
1376: }
1377: }
1378: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1379: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1380: PetscCall(PetscLogFlops(flops));
1381: PetscFunctionReturn(PETSC_SUCCESS);
1382: }
1384: PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data)
1385: {
1386: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)data;
1388: PetscFunctionBegin;
1389: PetscCall(MatDestroy(&atb->At));
1390: if (atb->destroy) PetscCall((*atb->destroy)(atb->data));
1391: PetscCall(PetscFree(atb));
1392: PetscFunctionReturn(PETSC_SUCCESS);
1393: }
1395: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A, Mat B, PetscReal fill, Mat C)
1396: {
1397: Mat At = NULL;
1398: Mat_Product *product = C->product;
1399: PetscBool flg, def, square;
1401: PetscFunctionBegin;
1402: MatCheckProduct(C, 4);
1403: square = (PetscBool)(A == B && A->symmetric == PETSC_BOOL3_TRUE);
1404: /* outerproduct */
1405: PetscCall(PetscStrcmp(product->alg, "outerproduct", &flg));
1406: if (flg) {
1407: /* create symbolic At */
1408: if (!square) {
1409: PetscCall(MatTransposeSymbolic(A, &At));
1410: PetscCall(MatSetBlockSizes(At, PetscAbs(A->cmap->bs), PetscAbs(B->cmap->bs)));
1411: PetscCall(MatSetType(At, ((PetscObject)A)->type_name));
1412: }
1413: /* get symbolic C=At*B */
1414: PetscCall(MatProductSetAlgorithm(C, "sorted"));
1415: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At, B, fill, C));
1417: /* clean up */
1418: if (!square) PetscCall(MatDestroy(&At));
1420: C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
1421: PetscCall(MatProductSetAlgorithm(C, "outerproduct"));
1422: PetscFunctionReturn(PETSC_SUCCESS);
1423: }
1425: /* matmatmult */
1426: PetscCall(PetscStrcmp(product->alg, "default", &def));
1427: PetscCall(PetscStrcmp(product->alg, "at*b", &flg));
1428: if (flg || def) {
1429: Mat_MatTransMatMult *atb;
1431: PetscCheck(!product->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Extra product struct not empty");
1432: PetscCall(PetscNew(&atb));
1433: if (!square) PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
1434: PetscCall(MatProductSetAlgorithm(C, "sorted"));
1435: PetscCall(MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At, B, fill, C));
1436: PetscCall(MatProductSetAlgorithm(C, "at*b"));
1437: product->data = atb;
1438: product->destroy = MatDestroy_SeqAIJ_MatTransMatMult;
1439: atb->At = At;
1441: C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
1442: PetscFunctionReturn(PETSC_SUCCESS);
1443: }
1445: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product Algorithm is not supported");
1446: }
1448: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A, Mat B, Mat C)
1449: {
1450: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)B->data, *c = (Mat_SeqAIJ *)C->data;
1451: PetscInt am = A->rmap->n, anzi, *ai = a->i, *aj = a->j, *bi = b->i, *bj, bnzi, nextb;
1452: PetscInt cm = C->rmap->n, *ci = c->i, *cj = c->j, crow, *cjj, i, j, k;
1453: PetscLogDouble flops = 0.0;
1454: MatScalar *aa = a->a, *ba, *ca, *caj;
1456: PetscFunctionBegin;
1457: if (!c->a) {
1458: PetscCall(PetscCalloc1(ci[cm] + 1, &ca));
1460: c->a = ca;
1461: c->free_a = PETSC_TRUE;
1462: } else {
1463: ca = c->a;
1464: PetscCall(PetscArrayzero(ca, ci[cm]));
1465: }
1467: /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1468: for (i = 0; i < am; i++) {
1469: bj = b->j + bi[i];
1470: ba = b->a + bi[i];
1471: bnzi = bi[i + 1] - bi[i];
1472: anzi = ai[i + 1] - ai[i];
1473: for (j = 0; j < anzi; j++) {
1474: nextb = 0;
1475: crow = *aj++;
1476: cjj = cj + ci[crow];
1477: caj = ca + ci[crow];
1478: /* perform sparse axpy operation. Note cjj includes bj. */
1479: for (k = 0; nextb < bnzi; k++) {
1480: if (cjj[k] == *(bj + nextb)) { /* ccol == bcol */
1481: caj[k] += (*aa) * (*(ba + nextb));
1482: nextb++;
1483: }
1484: }
1485: flops += 2 * bnzi;
1486: aa++;
1487: }
1488: }
1490: /* Assemble the final matrix and clean up */
1491: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1492: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1493: PetscCall(PetscLogFlops(flops));
1494: PetscFunctionReturn(PETSC_SUCCESS);
1495: }
1497: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A, Mat B, PetscReal fill, Mat C)
1498: {
1499: PetscFunctionBegin;
1500: PetscCall(MatMatMultSymbolic_SeqDense_SeqDense(A, B, 0.0, C));
1501: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1502: PetscFunctionReturn(PETSC_SUCCESS);
1503: }
1505: PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A, Mat B, Mat C, const PetscBool add)
1506: {
1507: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1508: PetscScalar *c, r1, r2, r3, r4, *c1, *c2, *c3, *c4;
1509: const PetscScalar *aa, *b, *b1, *b2, *b3, *b4, *av;
1510: const PetscInt *aj;
1511: PetscInt cm = C->rmap->n, cn = B->cmap->n, bm, am = A->rmap->n;
1512: PetscInt clda;
1513: PetscInt am4, bm4, col, i, j, n;
1515: PetscFunctionBegin;
1516: if (!cm || !cn) PetscFunctionReturn(PETSC_SUCCESS);
1517: PetscCall(MatSeqAIJGetArrayRead(A, &av));
1518: if (add) {
1519: PetscCall(MatDenseGetArray(C, &c));
1520: } else {
1521: PetscCall(MatDenseGetArrayWrite(C, &c));
1522: }
1523: PetscCall(MatDenseGetArrayRead(B, &b));
1524: PetscCall(MatDenseGetLDA(B, &bm));
1525: PetscCall(MatDenseGetLDA(C, &clda));
1526: am4 = 4 * clda;
1527: bm4 = 4 * bm;
1528: if (b) {
1529: b1 = b;
1530: b2 = b1 + bm;
1531: b3 = b2 + bm;
1532: b4 = b3 + bm;
1533: } else b1 = b2 = b3 = b4 = NULL;
1534: c1 = c;
1535: c2 = c1 + clda;
1536: c3 = c2 + clda;
1537: c4 = c3 + clda;
1538: for (col = 0; col < (cn / 4) * 4; col += 4) { /* over columns of C */
1539: for (i = 0; i < am; i++) { /* over rows of A in those columns */
1540: r1 = r2 = r3 = r4 = 0.0;
1541: n = a->i[i + 1] - a->i[i];
1542: aj = a->j ? a->j + a->i[i] : NULL;
1543: aa = av ? av + a->i[i] : NULL;
1544: for (j = 0; j < n; j++) {
1545: const PetscScalar aatmp = aa[j];
1546: const PetscInt ajtmp = aj[j];
1547: r1 += aatmp * b1[ajtmp];
1548: r2 += aatmp * b2[ajtmp];
1549: r3 += aatmp * b3[ajtmp];
1550: r4 += aatmp * b4[ajtmp];
1551: }
1552: if (add) {
1553: c1[i] += r1;
1554: c2[i] += r2;
1555: c3[i] += r3;
1556: c4[i] += r4;
1557: } else {
1558: c1[i] = r1;
1559: c2[i] = r2;
1560: c3[i] = r3;
1561: c4[i] = r4;
1562: }
1563: }
1564: if (b) {
1565: b1 += bm4;
1566: b2 += bm4;
1567: b3 += bm4;
1568: b4 += bm4;
1569: }
1570: c1 += am4;
1571: c2 += am4;
1572: c3 += am4;
1573: c4 += am4;
1574: }
1575: /* process remaining columns */
1576: if (col != cn) {
1577: PetscInt rc = cn - col;
1579: if (rc == 1) {
1580: for (i = 0; i < am; i++) {
1581: r1 = 0.0;
1582: n = a->i[i + 1] - a->i[i];
1583: aj = a->j + a->i[i];
1584: aa = av + a->i[i];
1585: for (j = 0; j < n; j++) r1 += aa[j] * b1[aj[j]];
1586: if (add) c1[i] += r1;
1587: else c1[i] = r1;
1588: }
1589: } else if (rc == 2) {
1590: for (i = 0; i < am; i++) {
1591: r1 = r2 = 0.0;
1592: n = a->i[i + 1] - a->i[i];
1593: aj = a->j + a->i[i];
1594: aa = av + a->i[i];
1595: for (j = 0; j < n; j++) {
1596: const PetscScalar aatmp = aa[j];
1597: const PetscInt ajtmp = aj[j];
1598: r1 += aatmp * b1[ajtmp];
1599: r2 += aatmp * b2[ajtmp];
1600: }
1601: if (add) {
1602: c1[i] += r1;
1603: c2[i] += r2;
1604: } else {
1605: c1[i] = r1;
1606: c2[i] = r2;
1607: }
1608: }
1609: } else {
1610: for (i = 0; i < am; i++) {
1611: r1 = r2 = r3 = 0.0;
1612: n = a->i[i + 1] - a->i[i];
1613: aj = a->j ? a->j + a->i[i] : NULL;
1614: aa = av ? av + a->i[i] : NULL;
1615: for (j = 0; j < n; j++) {
1616: const PetscScalar aatmp = aa[j];
1617: const PetscInt ajtmp = aj[j];
1618: r1 += aatmp * b1[ajtmp];
1619: r2 += aatmp * b2[ajtmp];
1620: r3 += aatmp * b3[ajtmp];
1621: }
1622: if (add) {
1623: c1[i] += r1;
1624: c2[i] += r2;
1625: c3[i] += r3;
1626: } else {
1627: c1[i] = r1;
1628: c2[i] = r2;
1629: c3[i] = r3;
1630: }
1631: }
1632: }
1633: }
1634: PetscCall(PetscLogFlops(cn * (2.0 * a->nz)));
1635: if (add) {
1636: PetscCall(MatDenseRestoreArray(C, &c));
1637: } else {
1638: PetscCall(MatDenseRestoreArrayWrite(C, &c));
1639: }
1640: PetscCall(MatDenseRestoreArrayRead(B, &b));
1641: PetscCall(MatSeqAIJRestoreArrayRead(A, &av));
1642: PetscFunctionReturn(PETSC_SUCCESS);
1643: }
1645: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A, Mat B, Mat C)
1646: {
1647: PetscFunctionBegin;
1648: PetscCheck(B->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in A %" PetscInt_FMT " not equal rows in B %" PetscInt_FMT, A->cmap->n, B->rmap->n);
1649: PetscCheck(A->rmap->n == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows in C %" PetscInt_FMT " not equal rows in A %" PetscInt_FMT, C->rmap->n, A->rmap->n);
1650: PetscCheck(B->cmap->n == C->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number columns in B %" PetscInt_FMT " not equal columns in C %" PetscInt_FMT, B->cmap->n, C->cmap->n);
1652: PetscCall(MatMatMultNumericAdd_SeqAIJ_SeqDense(A, B, C, PETSC_FALSE));
1653: PetscFunctionReturn(PETSC_SUCCESS);
1654: }
1656: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
1657: {
1658: PetscFunctionBegin;
1659: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
1660: C->ops->productsymbolic = MatProductSymbolic_AB;
1661: PetscFunctionReturn(PETSC_SUCCESS);
1662: }
1664: PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat, Mat, PetscReal, Mat);
1666: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
1667: {
1668: PetscFunctionBegin;
1669: C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
1670: C->ops->productsymbolic = MatProductSymbolic_AtB;
1671: PetscFunctionReturn(PETSC_SUCCESS);
1672: }
1674: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C)
1675: {
1676: PetscFunctionBegin;
1677: C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
1678: C->ops->productsymbolic = MatProductSymbolic_ABt;
1679: PetscFunctionReturn(PETSC_SUCCESS);
1680: }
1682: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
1683: {
1684: Mat_Product *product = C->product;
1686: PetscFunctionBegin;
1687: switch (product->type) {
1688: case MATPRODUCT_AB:
1689: PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C));
1690: break;
1691: case MATPRODUCT_AtB:
1692: PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C));
1693: break;
1694: case MATPRODUCT_ABt:
1695: PetscCall(MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C));
1696: break;
1697: default:
1698: break;
1699: }
1700: PetscFunctionReturn(PETSC_SUCCESS);
1701: }
1703: static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
1704: {
1705: Mat_Product *product = C->product;
1706: Mat A = product->A;
1707: PetscBool baij;
1709: PetscFunctionBegin;
1710: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &baij));
1711: if (!baij) { /* A is seqsbaij */
1712: PetscBool sbaij;
1713: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &sbaij));
1714: PetscCheck(sbaij, PetscObjectComm((PetscObject)C), PETSC_ERR_ARG_WRONGSTATE, "Mat must be either seqbaij or seqsbaij format");
1716: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
1717: } else { /* A is seqbaij */
1718: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
1719: }
1721: C->ops->productsymbolic = MatProductSymbolic_AB;
1722: PetscFunctionReturn(PETSC_SUCCESS);
1723: }
1725: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
1726: {
1727: Mat_Product *product = C->product;
1729: PetscFunctionBegin;
1730: MatCheckProduct(C, 1);
1731: PetscCheck(product->A, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing A");
1732: if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric == PETSC_BOOL3_TRUE)) PetscCall(MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C));
1733: PetscFunctionReturn(PETSC_SUCCESS);
1734: }
1736: static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
1737: {
1738: PetscFunctionBegin;
1739: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
1740: C->ops->productsymbolic = MatProductSymbolic_AB;
1741: PetscFunctionReturn(PETSC_SUCCESS);
1742: }
1744: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
1745: {
1746: Mat_Product *product = C->product;
1748: PetscFunctionBegin;
1749: if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C));
1750: PetscFunctionReturn(PETSC_SUCCESS);
1751: }
1753: PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring, Mat B, Mat Btdense)
1754: {
1755: Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
1756: Mat_SeqDense *btdense = (Mat_SeqDense *)Btdense->data;
1757: PetscInt *bi = b->i, *bj = b->j;
1758: PetscInt m = Btdense->rmap->n, n = Btdense->cmap->n, j, k, l, col, anz, *btcol, brow, ncolumns;
1759: MatScalar *btval, *btval_den, *ba = b->a;
1760: PetscInt *columns = coloring->columns, *colorforcol = coloring->colorforcol, ncolors = coloring->ncolors;
1762: PetscFunctionBegin;
1763: btval_den = btdense->v;
1764: PetscCall(PetscArrayzero(btval_den, m * n));
1765: for (k = 0; k < ncolors; k++) {
1766: ncolumns = coloring->ncolumns[k];
1767: for (l = 0; l < ncolumns; l++) { /* insert a row of B to a column of Btdense */
1768: col = *(columns + colorforcol[k] + l);
1769: btcol = bj + bi[col];
1770: btval = ba + bi[col];
1771: anz = bi[col + 1] - bi[col];
1772: for (j = 0; j < anz; j++) {
1773: brow = btcol[j];
1774: btval_den[brow] = btval[j];
1775: }
1776: }
1777: btval_den += m;
1778: }
1779: PetscFunctionReturn(PETSC_SUCCESS);
1780: }
1782: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring, Mat Cden, Mat Csp)
1783: {
1784: Mat_SeqAIJ *csp = (Mat_SeqAIJ *)Csp->data;
1785: const PetscScalar *ca_den, *ca_den_ptr;
1786: PetscScalar *ca = csp->a;
1787: PetscInt k, l, m = Cden->rmap->n, ncolors = matcoloring->ncolors;
1788: PetscInt brows = matcoloring->brows, *den2sp = matcoloring->den2sp;
1789: PetscInt nrows, *row, *idx;
1790: PetscInt *rows = matcoloring->rows, *colorforrow = matcoloring->colorforrow;
1792: PetscFunctionBegin;
1793: PetscCall(MatDenseGetArrayRead(Cden, &ca_den));
1795: if (brows > 0) {
1796: PetscInt *lstart, row_end, row_start;
1797: lstart = matcoloring->lstart;
1798: PetscCall(PetscArrayzero(lstart, ncolors));
1800: row_end = brows;
1801: if (row_end > m) row_end = m;
1802: for (row_start = 0; row_start < m; row_start += brows) { /* loop over row blocks of Csp */
1803: ca_den_ptr = ca_den;
1804: for (k = 0; k < ncolors; k++) { /* loop over colors (columns of Cden) */
1805: nrows = matcoloring->nrows[k];
1806: row = rows + colorforrow[k];
1807: idx = den2sp + colorforrow[k];
1808: for (l = lstart[k]; l < nrows; l++) {
1809: if (row[l] >= row_end) {
1810: lstart[k] = l;
1811: break;
1812: } else {
1813: ca[idx[l]] = ca_den_ptr[row[l]];
1814: }
1815: }
1816: ca_den_ptr += m;
1817: }
1818: row_end += brows;
1819: if (row_end > m) row_end = m;
1820: }
1821: } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1822: ca_den_ptr = ca_den;
1823: for (k = 0; k < ncolors; k++) {
1824: nrows = matcoloring->nrows[k];
1825: row = rows + colorforrow[k];
1826: idx = den2sp + colorforrow[k];
1827: for (l = 0; l < nrows; l++) ca[idx[l]] = ca_den_ptr[row[l]];
1828: ca_den_ptr += m;
1829: }
1830: }
1832: PetscCall(MatDenseRestoreArrayRead(Cden, &ca_den));
1833: #if defined(PETSC_USE_INFO)
1834: if (matcoloring->brows > 0) {
1835: PetscCall(PetscInfo(Csp, "Loop over %" PetscInt_FMT " row blocks for den2sp\n", brows));
1836: } else {
1837: PetscCall(PetscInfo(Csp, "Loop over colors/columns of Cden, inefficient for large sparse matrix product \n"));
1838: }
1839: #endif
1840: PetscFunctionReturn(PETSC_SUCCESS);
1841: }
1843: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat, ISColoring iscoloring, MatTransposeColoring c)
1844: {
1845: PetscInt i, n, nrows, Nbs, j, k, m, ncols, col, cm;
1846: const PetscInt *is, *ci, *cj, *row_idx;
1847: PetscInt nis = iscoloring->n, *rowhit, bs = 1;
1848: IS *isa;
1849: Mat_SeqAIJ *csp = (Mat_SeqAIJ *)mat->data;
1850: PetscInt *colorforrow, *rows, *rows_i, *idxhit, *spidx, *den2sp, *den2sp_i;
1851: PetscInt *colorforcol, *columns, *columns_i, brows;
1852: PetscBool flg;
1854: PetscFunctionBegin;
1855: PetscCall(ISColoringGetIS(iscoloring, PETSC_USE_POINTER, PETSC_IGNORE, &isa));
1857: /* bs >1 is not being tested yet! */
1858: Nbs = mat->cmap->N / bs;
1859: c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */
1860: c->N = Nbs;
1861: c->m = c->M;
1862: c->rstart = 0;
1863: c->brows = 100;
1865: c->ncolors = nis;
1866: PetscCall(PetscMalloc3(nis, &c->ncolumns, nis, &c->nrows, nis + 1, &colorforrow));
1867: PetscCall(PetscMalloc1(csp->nz + 1, &rows));
1868: PetscCall(PetscMalloc1(csp->nz + 1, &den2sp));
1870: brows = c->brows;
1871: PetscCall(PetscOptionsGetInt(NULL, NULL, "-matden2sp_brows", &brows, &flg));
1872: if (flg) c->brows = brows;
1873: if (brows > 0) PetscCall(PetscMalloc1(nis + 1, &c->lstart));
1875: colorforrow[0] = 0;
1876: rows_i = rows;
1877: den2sp_i = den2sp;
1879: PetscCall(PetscMalloc1(nis + 1, &colorforcol));
1880: PetscCall(PetscMalloc1(Nbs + 1, &columns));
1882: colorforcol[0] = 0;
1883: columns_i = columns;
1885: /* get column-wise storage of mat */
1886: PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
1888: cm = c->m;
1889: PetscCall(PetscMalloc1(cm + 1, &rowhit));
1890: PetscCall(PetscMalloc1(cm + 1, &idxhit));
1891: for (i = 0; i < nis; i++) { /* loop over color */
1892: PetscCall(ISGetLocalSize(isa[i], &n));
1893: PetscCall(ISGetIndices(isa[i], &is));
1895: c->ncolumns[i] = n;
1896: if (n) PetscCall(PetscArraycpy(columns_i, is, n));
1897: colorforcol[i + 1] = colorforcol[i] + n;
1898: columns_i += n;
1900: /* fast, crude version requires O(N*N) work */
1901: PetscCall(PetscArrayzero(rowhit, cm));
1903: for (j = 0; j < n; j++) { /* loop over columns*/
1904: col = is[j];
1905: row_idx = cj + ci[col];
1906: m = ci[col + 1] - ci[col];
1907: for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */
1908: idxhit[*row_idx] = spidx[ci[col] + k];
1909: rowhit[*row_idx++] = col + 1;
1910: }
1911: }
1912: /* count the number of hits */
1913: nrows = 0;
1914: for (j = 0; j < cm; j++) {
1915: if (rowhit[j]) nrows++;
1916: }
1917: c->nrows[i] = nrows;
1918: colorforrow[i + 1] = colorforrow[i] + nrows;
1920: nrows = 0;
1921: for (j = 0; j < cm; j++) { /* loop over rows */
1922: if (rowhit[j]) {
1923: rows_i[nrows] = j;
1924: den2sp_i[nrows] = idxhit[j];
1925: nrows++;
1926: }
1927: }
1928: den2sp_i += nrows;
1930: PetscCall(ISRestoreIndices(isa[i], &is));
1931: rows_i += nrows;
1932: }
1933: PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL));
1934: PetscCall(PetscFree(rowhit));
1935: PetscCall(ISColoringRestoreIS(iscoloring, PETSC_USE_POINTER, &isa));
1936: PetscCheck(csp->nz == colorforrow[nis], PETSC_COMM_SELF, PETSC_ERR_PLIB, "csp->nz %" PetscInt_FMT " != colorforrow[nis] %" PetscInt_FMT, csp->nz, colorforrow[nis]);
1938: c->colorforrow = colorforrow;
1939: c->rows = rows;
1940: c->den2sp = den2sp;
1941: c->colorforcol = colorforcol;
1942: c->columns = columns;
1944: PetscCall(PetscFree(idxhit));
1945: PetscFunctionReturn(PETSC_SUCCESS);
1946: }
1948: static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1949: {
1950: Mat_Product *product = C->product;
1951: Mat A = product->A, B = product->B;
1953: PetscFunctionBegin;
1954: if (C->ops->mattransposemultnumeric) {
1955: /* Alg: "outerproduct" */
1956: PetscCall((*C->ops->mattransposemultnumeric)(A, B, C));
1957: } else {
1958: /* Alg: "matmatmult" -- C = At*B */
1959: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data;
1961: PetscCheck(atb, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing product struct");
1962: if (atb->At) {
1963: /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ();
1964: user may have called MatProductReplaceMats() to get this A=product->A */
1965: PetscCall(MatTransposeSetPrecursor(A, atb->At));
1966: PetscCall(MatTranspose(A, MAT_REUSE_MATRIX, &atb->At));
1967: }
1968: PetscCall(MatMatMultNumeric_SeqAIJ_SeqAIJ(atb->At ? atb->At : A, B, C));
1969: }
1970: PetscFunctionReturn(PETSC_SUCCESS);
1971: }
1973: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1974: {
1975: Mat_Product *product = C->product;
1976: Mat A = product->A, B = product->B;
1977: PetscReal fill = product->fill;
1979: PetscFunctionBegin;
1980: PetscCall(MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A, B, fill, C));
1982: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
1983: PetscFunctionReturn(PETSC_SUCCESS);
1984: }
1986: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
1987: {
1988: Mat_Product *product = C->product;
1989: PetscInt alg = 0; /* default algorithm */
1990: PetscBool flg = PETSC_FALSE;
1991: #if !defined(PETSC_HAVE_HYPRE)
1992: const char *algTypes[7] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge"};
1993: PetscInt nalg = 7;
1994: #else
1995: const char *algTypes[8] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge", "hypre"};
1996: PetscInt nalg = 8;
1997: #endif
1999: PetscFunctionBegin;
2000: /* Set default algorithm */
2001: PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
2002: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2004: /* Get runtime option */
2005: if (product->api_user) {
2006: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
2007: PetscCall(PetscOptionsEList("-matmatmult_via", "Algorithmic approach", "MatMatMult", algTypes, nalg, algTypes[0], &alg, &flg));
2008: PetscOptionsEnd();
2009: } else {
2010: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
2011: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AB", algTypes, nalg, algTypes[0], &alg, &flg));
2012: PetscOptionsEnd();
2013: }
2014: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2016: C->ops->productsymbolic = MatProductSymbolic_AB;
2017: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
2018: PetscFunctionReturn(PETSC_SUCCESS);
2019: }
2021: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
2022: {
2023: Mat_Product *product = C->product;
2024: PetscInt alg = 0; /* default algorithm */
2025: PetscBool flg = PETSC_FALSE;
2026: const char *algTypes[3] = {"default", "at*b", "outerproduct"};
2027: PetscInt nalg = 3;
2029: PetscFunctionBegin;
2030: /* Get runtime option */
2031: if (product->api_user) {
2032: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatTransposeMatMult", "Mat");
2033: PetscCall(PetscOptionsEList("-mattransposematmult_via", "Algorithmic approach", "MatTransposeMatMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2034: PetscOptionsEnd();
2035: } else {
2036: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AtB", "Mat");
2037: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_AtB", algTypes, nalg, algTypes[alg], &alg, &flg));
2038: PetscOptionsEnd();
2039: }
2040: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2042: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
2043: PetscFunctionReturn(PETSC_SUCCESS);
2044: }
2046: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
2047: {
2048: Mat_Product *product = C->product;
2049: PetscInt alg = 0; /* default algorithm */
2050: PetscBool flg = PETSC_FALSE;
2051: const char *algTypes[2] = {"default", "color"};
2052: PetscInt nalg = 2;
2054: PetscFunctionBegin;
2055: /* Set default algorithm */
2056: PetscCall(PetscStrcmp(C->product->alg, "default", &flg));
2057: if (!flg) {
2058: alg = 1;
2059: PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2060: }
2062: /* Get runtime option */
2063: if (product->api_user) {
2064: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatTransposeMult", "Mat");
2065: PetscCall(PetscOptionsEList("-matmattransmult_via", "Algorithmic approach", "MatMatTransposeMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2066: PetscOptionsEnd();
2067: } else {
2068: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABt", "Mat");
2069: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABt", algTypes, nalg, algTypes[alg], &alg, &flg));
2070: PetscOptionsEnd();
2071: }
2072: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2074: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
2075: C->ops->productsymbolic = MatProductSymbolic_ABt;
2076: PetscFunctionReturn(PETSC_SUCCESS);
2077: }
2079: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
2080: {
2081: Mat_Product *product = C->product;
2082: PetscBool flg = PETSC_FALSE;
2083: PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */
2084: #if !defined(PETSC_HAVE_HYPRE)
2085: const char *algTypes[2] = {"scalable", "rap"};
2086: PetscInt nalg = 2;
2087: #else
2088: const char *algTypes[3] = {"scalable", "rap", "hypre"};
2089: PetscInt nalg = 3;
2090: #endif
2092: PetscFunctionBegin;
2093: /* Set default algorithm */
2094: PetscCall(PetscStrcmp(product->alg, "default", &flg));
2095: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2097: /* Get runtime option */
2098: if (product->api_user) {
2099: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
2100: PetscCall(PetscOptionsEList("-matptap_via", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[0], &alg, &flg));
2101: PetscOptionsEnd();
2102: } else {
2103: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
2104: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_PtAP", algTypes, nalg, algTypes[0], &alg, &flg));
2105: PetscOptionsEnd();
2106: }
2107: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2109: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
2110: PetscFunctionReturn(PETSC_SUCCESS);
2111: }
2113: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
2114: {
2115: Mat_Product *product = C->product;
2116: PetscBool flg = PETSC_FALSE;
2117: PetscInt alg = 0; /* default algorithm */
2118: const char *algTypes[3] = {"r*a*rt", "r*art", "coloring_rart"};
2119: PetscInt nalg = 3;
2121: PetscFunctionBegin;
2122: /* Set default algorithm */
2123: PetscCall(PetscStrcmp(product->alg, "default", &flg));
2124: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2126: /* Get runtime option */
2127: if (product->api_user) {
2128: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatRARt", "Mat");
2129: PetscCall(PetscOptionsEList("-matrart_via", "Algorithmic approach", "MatRARt", algTypes, nalg, algTypes[0], &alg, &flg));
2130: PetscOptionsEnd();
2131: } else {
2132: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_RARt", "Mat");
2133: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_RARt", algTypes, nalg, algTypes[0], &alg, &flg));
2134: PetscOptionsEnd();
2135: }
2136: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2138: C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
2139: PetscFunctionReturn(PETSC_SUCCESS);
2140: }
2142: /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
2143: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
2144: {
2145: Mat_Product *product = C->product;
2146: PetscInt alg = 0; /* default algorithm */
2147: PetscBool flg = PETSC_FALSE;
2148: const char *algTypes[7] = {"sorted", "scalable", "scalable_fast", "heap", "btheap", "llcondensed", "rowmerge"};
2149: PetscInt nalg = 7;
2151: PetscFunctionBegin;
2152: /* Set default algorithm */
2153: PetscCall(PetscStrcmp(product->alg, "default", &flg));
2154: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2156: /* Get runtime option */
2157: if (product->api_user) {
2158: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMatMult", "Mat");
2159: PetscCall(PetscOptionsEList("-matmatmatmult_via", "Algorithmic approach", "MatMatMatMult", algTypes, nalg, algTypes[alg], &alg, &flg));
2160: PetscOptionsEnd();
2161: } else {
2162: PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_ABC", "Mat");
2163: PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatProduct_ABC", algTypes, nalg, algTypes[alg], &alg, &flg));
2164: PetscOptionsEnd();
2165: }
2166: if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg]));
2168: C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
2169: C->ops->productsymbolic = MatProductSymbolic_ABC;
2170: PetscFunctionReturn(PETSC_SUCCESS);
2171: }
2173: PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
2174: {
2175: Mat_Product *product = C->product;
2177: PetscFunctionBegin;
2178: switch (product->type) {
2179: case MATPRODUCT_AB:
2180: PetscCall(MatProductSetFromOptions_SeqAIJ_AB(C));
2181: break;
2182: case MATPRODUCT_AtB:
2183: PetscCall(MatProductSetFromOptions_SeqAIJ_AtB(C));
2184: break;
2185: case MATPRODUCT_ABt:
2186: PetscCall(MatProductSetFromOptions_SeqAIJ_ABt(C));
2187: break;
2188: case MATPRODUCT_PtAP:
2189: PetscCall(MatProductSetFromOptions_SeqAIJ_PtAP(C));
2190: break;
2191: case MATPRODUCT_RARt:
2192: PetscCall(MatProductSetFromOptions_SeqAIJ_RARt(C));
2193: break;
2194: case MATPRODUCT_ABC:
2195: PetscCall(MatProductSetFromOptions_SeqAIJ_ABC(C));
2196: break;
2197: default:
2198: break;
2199: }
2200: PetscFunctionReturn(PETSC_SUCCESS);
2201: }