Actual source code: matmatmult.c
petsc-3.14.6 2021-03-30
2: /*
3: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
4: C = A * B
5: */
7: #include <../src/mat/impls/aij/seq/aij.h>
8: #include <../src/mat/utils/freespace.h>
9: #include <petscbt.h>
10: #include <petsc/private/isimpl.h>
11: #include <../src/mat/impls/dense/seq/dense.h>
13: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
14: {
18: if (C->ops->matmultnumeric) {
19: if (C->ops->matmultnumeric == MatMatMultNumeric_SeqAIJ_SeqAIJ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recursive call");
20: (*C->ops->matmultnumeric)(A,B,C);
21: } else {
22: MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);
23: }
24: return(0);
25: }
27: /* Modified from MatCreateSeqAIJWithArrays() */
28: PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat)
29: {
31: PetscInt ii;
32: Mat_SeqAIJ *aij;
33: PetscBool isseqaij;
36: if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
37: MatSetSizes(mat,m,n,m,n);
39: if (!mtype) {
40: PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij);
41: if (!isseqaij) { MatSetType(mat,MATSEQAIJ); }
42: } else {
43: MatSetType(mat,mtype);
44: }
45: MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,NULL);
46: aij = (Mat_SeqAIJ*)(mat)->data;
47: PetscMalloc1(m,&aij->imax);
48: PetscMalloc1(m,&aij->ilen);
50: aij->i = i;
51: aij->j = j;
52: aij->a = a;
53: aij->singlemalloc = PETSC_FALSE;
54: aij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
55: aij->free_a = PETSC_FALSE;
56: aij->free_ij = PETSC_FALSE;
58: for (ii=0; ii<m; ii++) {
59: aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii];
60: }
62: return(0);
63: }
65: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
66: {
67: PetscErrorCode ierr;
68: Mat_Product *product = C->product;
69: MatProductAlgorithm alg;
70: PetscBool flg;
73: if (product) {
74: alg = product->alg;
75: } else {
76: alg = "sorted";
77: }
78: /* sorted */
79: PetscStrcmp(alg,"sorted",&flg);
80: if (flg) {
81: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);
82: return(0);
83: }
85: /* scalable */
86: PetscStrcmp(alg,"scalable",&flg);
87: if (flg) {
88: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);
89: return(0);
90: }
92: /* scalable_fast */
93: PetscStrcmp(alg,"scalable_fast",&flg);
94: if (flg) {
95: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);
96: return(0);
97: }
99: /* heap */
100: PetscStrcmp(alg,"heap",&flg);
101: if (flg) {
102: MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);
103: return(0);
104: }
106: /* btheap */
107: PetscStrcmp(alg,"btheap",&flg);
108: if (flg) {
109: MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);
110: return(0);
111: }
113: /* llcondensed */
114: PetscStrcmp(alg,"llcondensed",&flg);
115: if (flg) {
116: MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);
117: return(0);
118: }
120: /* rowmerge */
121: PetscStrcmp(alg,"rowmerge",&flg);
122: if (flg) {
123: MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);
124: return(0);
125: }
127: #if defined(PETSC_HAVE_HYPRE)
128: PetscStrcmp(alg,"hypre",&flg);
129: if (flg) {
130: MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);
131: return(0);
132: }
133: #endif
135: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
136: }
138: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C)
139: {
140: PetscErrorCode ierr;
141: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
142: PetscInt *ai=a->i,*bi=b->i,*ci,*cj;
143: PetscInt am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
144: PetscReal afill;
145: PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
146: PetscTable ta;
147: PetscBT lnkbt;
148: PetscFreeSpaceList free_space=NULL,current_space=NULL;
151: /* Get ci and cj */
152: /*---------------*/
153: /* Allocate ci array, arrays for fill computation and */
154: /* free space for accumulating nonzero column info */
155: PetscMalloc1(am+2,&ci);
156: ci[0] = 0;
158: /* create and initialize a linked list */
159: PetscTableCreate(bn,bn,&ta);
160: MatRowMergeMax_SeqAIJ(b,bm,ta);
161: PetscTableGetCount(ta,&Crmax);
162: PetscTableDestroy(&ta);
164: PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);
166: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
167: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
169: current_space = free_space;
171: /* Determine ci and cj */
172: for (i=0; i<am; i++) {
173: anzi = ai[i+1] - ai[i];
174: aj = a->j + ai[i];
175: for (j=0; j<anzi; j++) {
176: brow = aj[j];
177: bnzj = bi[brow+1] - bi[brow];
178: bj = b->j + bi[brow];
179: /* add non-zero cols of B into the sorted linked list lnk */
180: PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);
181: }
182: cnzi = lnk[0];
184: /* If free space is not available, make more free space */
185: /* Double the amount of total space in the list */
186: if (current_space->local_remaining<cnzi) {
187: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
188: ndouble++;
189: }
191: /* Copy data into free space, then initialize lnk */
192: PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);
194: current_space->array += cnzi;
195: current_space->local_used += cnzi;
196: current_space->local_remaining -= cnzi;
198: ci[i+1] = ci[i] + cnzi;
199: }
201: /* Column indices are in the list of free space */
202: /* Allocate space for cj, initialize cj, and */
203: /* destroy list of free space and other temporary array(s) */
204: PetscMalloc1(ci[am]+1,&cj);
205: PetscFreeSpaceContiguous(&free_space,cj);
206: PetscLLCondensedDestroy(lnk,lnkbt);
208: /* put together the new symbolic matrix */
209: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
210: MatSetBlockSizesFromMats(C,A,B);
212: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
213: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
214: c = (Mat_SeqAIJ*)(C->data);
215: c->free_a = PETSC_FALSE;
216: c->free_ij = PETSC_TRUE;
217: c->nonew = 0;
219: /* fast, needs non-scalable O(bn) array 'abdense' */
220: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
222: /* set MatInfo */
223: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
224: if (afill < 1.0) afill = 1.0;
225: c->maxnz = ci[am];
226: c->nz = ci[am];
227: C->info.mallocs = ndouble;
228: C->info.fill_ratio_given = fill;
229: C->info.fill_ratio_needed = afill;
231: #if defined(PETSC_USE_INFO)
232: if (ci[am]) {
233: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
234: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
235: } else {
236: PetscInfo(C,"Empty matrix product\n");
237: }
238: #endif
239: return(0);
240: }
242: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C)
243: {
245: PetscLogDouble flops=0.0;
246: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
247: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
248: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
249: PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
250: PetscInt am =A->rmap->n,cm=C->rmap->n;
251: PetscInt i,j,k,anzi,bnzi,cnzi,brow;
252: PetscScalar *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
253: PetscScalar *ab_dense;
254: PetscContainer cab_dense;
257: if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
258: PetscMalloc1(ci[cm]+1,&ca);
259: c->a = ca;
260: c->free_a = PETSC_TRUE;
261: } else ca = c->a;
263: /* TODO this should be done in the symbolic phase */
264: /* However, this function is so heavily used (sometimes in an hidden way through multnumeric function pointers
265: that is hard to eradicate) */
266: PetscObjectQuery((PetscObject)C,"__PETSc__ab_dense",(PetscObject*)&cab_dense);
267: if (!cab_dense) {
268: PetscMalloc1(B->cmap->N,&ab_dense);
269: PetscContainerCreate(PETSC_COMM_SELF,&cab_dense);
270: PetscContainerSetPointer(cab_dense,ab_dense);
271: PetscContainerSetUserDestroy(cab_dense,PetscContainerUserDestroyDefault);
272: PetscObjectCompose((PetscObject)C,"__PETSc__ab_dense",(PetscObject)cab_dense);
273: PetscObjectDereference((PetscObject)cab_dense);
274: }
275: PetscContainerGetPointer(cab_dense,(void**)&ab_dense);
276: PetscArrayzero(ab_dense,B->cmap->N);
278: /* clean old values in C */
279: PetscArrayzero(ca,ci[cm]);
280: /* Traverse A row-wise. */
281: /* Build the ith row in C by summing over nonzero columns in A, */
282: /* the rows of B corresponding to nonzeros of A. */
283: for (i=0; i<am; i++) {
284: anzi = ai[i+1] - ai[i];
285: for (j=0; j<anzi; j++) {
286: brow = aj[j];
287: bnzi = bi[brow+1] - bi[brow];
288: bjj = bj + bi[brow];
289: baj = ba + bi[brow];
290: /* perform dense axpy */
291: valtmp = aa[j];
292: for (k=0; k<bnzi; k++) {
293: ab_dense[bjj[k]] += valtmp*baj[k];
294: }
295: flops += 2*bnzi;
296: }
297: aj += anzi; aa += anzi;
299: cnzi = ci[i+1] - ci[i];
300: for (k=0; k<cnzi; k++) {
301: ca[k] += ab_dense[cj[k]];
302: ab_dense[cj[k]] = 0.0; /* zero ab_dense */
303: }
304: flops += cnzi;
305: cj += cnzi; ca += cnzi;
306: }
307: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
308: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
309: PetscLogFlops(flops);
310: return(0);
311: }
313: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
314: {
316: PetscLogDouble flops=0.0;
317: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
318: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
319: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
320: PetscInt *ai = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
321: PetscInt am = A->rmap->N,cm=C->rmap->N;
322: PetscInt i,j,k,anzi,bnzi,cnzi,brow;
323: PetscScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
324: PetscInt nextb;
327: if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
328: PetscMalloc1(ci[cm]+1,&ca);
329: c->a = ca;
330: c->free_a = PETSC_TRUE;
331: }
333: /* clean old values in C */
334: PetscArrayzero(ca,ci[cm]);
335: /* Traverse A row-wise. */
336: /* Build the ith row in C by summing over nonzero columns in A, */
337: /* the rows of B corresponding to nonzeros of A. */
338: for (i=0; i<am; i++) {
339: anzi = ai[i+1] - ai[i];
340: cnzi = ci[i+1] - ci[i];
341: for (j=0; j<anzi; j++) {
342: brow = aj[j];
343: bnzi = bi[brow+1] - bi[brow];
344: bjj = bj + bi[brow];
345: baj = ba + bi[brow];
346: /* perform sparse axpy */
347: valtmp = aa[j];
348: nextb = 0;
349: for (k=0; nextb<bnzi; k++) {
350: if (cj[k] == bjj[nextb]) { /* ccol == bcol */
351: ca[k] += valtmp*baj[nextb++];
352: }
353: }
354: flops += 2*bnzi;
355: }
356: aj += anzi; aa += anzi;
357: cj += cnzi; ca += cnzi;
358: }
359: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
360: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
361: PetscLogFlops(flops);
362: return(0);
363: }
365: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C)
366: {
367: PetscErrorCode ierr;
368: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
369: PetscInt *ai = a->i,*bi=b->i,*ci,*cj;
370: PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
371: MatScalar *ca;
372: PetscReal afill;
373: PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
374: PetscTable ta;
375: PetscFreeSpaceList free_space=NULL,current_space=NULL;
378: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
379: /*-----------------------------------------------------------------------------------------*/
380: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
381: PetscMalloc1(am+2,&ci);
382: ci[0] = 0;
384: /* create and initialize a linked list */
385: PetscTableCreate(bn,bn,&ta);
386: MatRowMergeMax_SeqAIJ(b,bm,ta);
387: PetscTableGetCount(ta,&Crmax);
388: PetscTableDestroy(&ta);
390: PetscLLCondensedCreate_fast(Crmax,&lnk);
392: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
393: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
394: current_space = free_space;
396: /* Determine ci and cj */
397: for (i=0; i<am; i++) {
398: anzi = ai[i+1] - ai[i];
399: aj = a->j + ai[i];
400: for (j=0; j<anzi; j++) {
401: brow = aj[j];
402: bnzj = bi[brow+1] - bi[brow];
403: bj = b->j + bi[brow];
404: /* add non-zero cols of B into the sorted linked list lnk */
405: PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);
406: }
407: cnzi = lnk[1];
409: /* If free space is not available, make more free space */
410: /* Double the amount of total space in the list */
411: if (current_space->local_remaining<cnzi) {
412: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
413: ndouble++;
414: }
416: /* Copy data into free space, then initialize lnk */
417: PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);
419: current_space->array += cnzi;
420: current_space->local_used += cnzi;
421: current_space->local_remaining -= cnzi;
423: ci[i+1] = ci[i] + cnzi;
424: }
426: /* Column indices are in the list of free space */
427: /* Allocate space for cj, initialize cj, and */
428: /* destroy list of free space and other temporary array(s) */
429: PetscMalloc1(ci[am]+1,&cj);
430: PetscFreeSpaceContiguous(&free_space,cj);
431: PetscLLCondensedDestroy_fast(lnk);
433: /* Allocate space for ca */
434: PetscCalloc1(ci[am]+1,&ca);
436: /* put together the new symbolic matrix */
437: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);
438: MatSetBlockSizesFromMats(C,A,B);
440: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
441: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
442: c = (Mat_SeqAIJ*)(C->data);
443: c->free_a = PETSC_TRUE;
444: c->free_ij = PETSC_TRUE;
445: c->nonew = 0;
447: /* slower, less memory */
448: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
450: /* set MatInfo */
451: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
452: if (afill < 1.0) afill = 1.0;
453: c->maxnz = ci[am];
454: c->nz = ci[am];
455: C->info.mallocs = ndouble;
456: C->info.fill_ratio_given = fill;
457: C->info.fill_ratio_needed = afill;
459: #if defined(PETSC_USE_INFO)
460: if (ci[am]) {
461: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
462: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
463: } else {
464: PetscInfo(C,"Empty matrix product\n");
465: }
466: #endif
467: return(0);
468: }
470: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C)
471: {
472: PetscErrorCode ierr;
473: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
474: PetscInt *ai = a->i,*bi=b->i,*ci,*cj;
475: PetscInt am = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
476: MatScalar *ca;
477: PetscReal afill;
478: PetscInt i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
479: PetscTable ta;
480: PetscFreeSpaceList free_space=NULL,current_space=NULL;
483: /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
484: /*---------------------------------------------------------------------------------------------*/
485: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
486: PetscMalloc1(am+2,&ci);
487: ci[0] = 0;
489: /* create and initialize a linked list */
490: PetscTableCreate(bn,bn,&ta);
491: MatRowMergeMax_SeqAIJ(b,bm,ta);
492: PetscTableGetCount(ta,&Crmax);
493: PetscTableDestroy(&ta);
494: PetscLLCondensedCreate_Scalable(Crmax,&lnk);
496: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
497: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
498: current_space = free_space;
500: /* Determine ci and cj */
501: for (i=0; i<am; i++) {
502: anzi = ai[i+1] - ai[i];
503: aj = a->j + ai[i];
504: for (j=0; j<anzi; j++) {
505: brow = aj[j];
506: bnzj = bi[brow+1] - bi[brow];
507: bj = b->j + bi[brow];
508: /* add non-zero cols of B into the sorted linked list lnk */
509: PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);
510: }
511: cnzi = lnk[0];
513: /* If free space is not available, make more free space */
514: /* Double the amount of total space in the list */
515: if (current_space->local_remaining<cnzi) {
516: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
517: ndouble++;
518: }
520: /* Copy data into free space, then initialize lnk */
521: PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);
523: current_space->array += cnzi;
524: current_space->local_used += cnzi;
525: current_space->local_remaining -= cnzi;
527: ci[i+1] = ci[i] + cnzi;
528: }
530: /* Column indices are in the list of free space */
531: /* Allocate space for cj, initialize cj, and */
532: /* destroy list of free space and other temporary array(s) */
533: PetscMalloc1(ci[am]+1,&cj);
534: PetscFreeSpaceContiguous(&free_space,cj);
535: PetscLLCondensedDestroy_Scalable(lnk);
537: /* Allocate space for ca */
538: /*-----------------------*/
539: PetscCalloc1(ci[am]+1,&ca);
541: /* put together the new symbolic matrix */
542: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);
543: MatSetBlockSizesFromMats(C,A,B);
545: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
546: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
547: c = (Mat_SeqAIJ*)(C->data);
548: c->free_a = PETSC_TRUE;
549: c->free_ij = PETSC_TRUE;
550: c->nonew = 0;
552: /* slower, less memory */
553: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;
555: /* set MatInfo */
556: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
557: if (afill < 1.0) afill = 1.0;
558: c->maxnz = ci[am];
559: c->nz = ci[am];
560: C->info.mallocs = ndouble;
561: C->info.fill_ratio_given = fill;
562: C->info.fill_ratio_needed = afill;
564: #if defined(PETSC_USE_INFO)
565: if (ci[am]) {
566: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
567: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
568: } else {
569: PetscInfo(C,"Empty matrix product\n");
570: }
571: #endif
572: return(0);
573: }
575: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C)
576: {
577: PetscErrorCode ierr;
578: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
579: const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
580: PetscInt *ci,*cj,*bb;
581: PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
582: PetscReal afill;
583: PetscInt i,j,col,ndouble = 0;
584: PetscFreeSpaceList free_space=NULL,current_space=NULL;
585: PetscHeap h;
588: /* Get ci and cj - by merging sorted rows using a heap */
589: /*---------------------------------------------------------------------------------------------*/
590: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
591: PetscMalloc1(am+2,&ci);
592: ci[0] = 0;
594: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
595: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
596: current_space = free_space;
598: PetscHeapCreate(a->rmax,&h);
599: PetscMalloc1(a->rmax,&bb);
601: /* Determine ci and cj */
602: for (i=0; i<am; i++) {
603: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
604: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
605: ci[i+1] = ci[i];
606: /* Populate the min heap */
607: for (j=0; j<anzi; j++) {
608: bb[j] = bi[acol[j]]; /* bb points at the start of the row */
609: if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
610: PetscHeapAdd(h,j,bj[bb[j]++]);
611: }
612: }
613: /* Pick off the min element, adding it to free space */
614: PetscHeapPop(h,&j,&col);
615: while (j >= 0) {
616: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
617: PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);
618: ndouble++;
619: }
620: *(current_space->array++) = col;
621: current_space->local_used++;
622: current_space->local_remaining--;
623: ci[i+1]++;
625: /* stash if anything else remains in this row of B */
626: if (bb[j] < bi[acol[j]+1]) {PetscHeapStash(h,j,bj[bb[j]++]);}
627: while (1) { /* pop and stash any other rows of B that also had an entry in this column */
628: PetscInt j2,col2;
629: PetscHeapPeek(h,&j2,&col2);
630: if (col2 != col) break;
631: PetscHeapPop(h,&j2,&col2);
632: if (bb[j2] < bi[acol[j2]+1]) {PetscHeapStash(h,j2,bj[bb[j2]++]);}
633: }
634: /* Put any stashed elements back into the min heap */
635: PetscHeapUnstash(h);
636: PetscHeapPop(h,&j,&col);
637: }
638: }
639: PetscFree(bb);
640: PetscHeapDestroy(&h);
642: /* Column indices are in the list of free space */
643: /* Allocate space for cj, initialize cj, and */
644: /* destroy list of free space and other temporary array(s) */
645: PetscMalloc1(ci[am],&cj);
646: PetscFreeSpaceContiguous(&free_space,cj);
648: /* put together the new symbolic matrix */
649: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
650: MatSetBlockSizesFromMats(C,A,B);
652: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
653: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
654: c = (Mat_SeqAIJ*)(C->data);
655: c->free_a = PETSC_TRUE;
656: c->free_ij = PETSC_TRUE;
657: c->nonew = 0;
659: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
661: /* set MatInfo */
662: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
663: if (afill < 1.0) afill = 1.0;
664: c->maxnz = ci[am];
665: c->nz = ci[am];
666: C->info.mallocs = ndouble;
667: C->info.fill_ratio_given = fill;
668: C->info.fill_ratio_needed = afill;
670: #if defined(PETSC_USE_INFO)
671: if (ci[am]) {
672: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
673: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
674: } else {
675: PetscInfo(C,"Empty matrix product\n");
676: }
677: #endif
678: return(0);
679: }
681: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C)
682: {
683: PetscErrorCode ierr;
684: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
685: const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
686: PetscInt *ci,*cj,*bb;
687: PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
688: PetscReal afill;
689: PetscInt i,j,col,ndouble = 0;
690: PetscFreeSpaceList free_space=NULL,current_space=NULL;
691: PetscHeap h;
692: PetscBT bt;
695: /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
696: /*---------------------------------------------------------------------------------------------*/
697: /* Allocate arrays for fill computation and free space for accumulating nonzero column */
698: PetscMalloc1(am+2,&ci);
699: ci[0] = 0;
701: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
702: PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
704: current_space = free_space;
706: PetscHeapCreate(a->rmax,&h);
707: PetscMalloc1(a->rmax,&bb);
708: PetscBTCreate(bn,&bt);
710: /* Determine ci and cj */
711: for (i=0; i<am; i++) {
712: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
713: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
714: const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
715: ci[i+1] = ci[i];
716: /* Populate the min heap */
717: for (j=0; j<anzi; j++) {
718: PetscInt brow = acol[j];
719: for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
720: PetscInt bcol = bj[bb[j]];
721: if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
722: PetscHeapAdd(h,j,bcol);
723: bb[j]++;
724: break;
725: }
726: }
727: }
728: /* Pick off the min element, adding it to free space */
729: PetscHeapPop(h,&j,&col);
730: while (j >= 0) {
731: if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
732: fptr = NULL; /* need PetscBTMemzero */
733: PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),¤t_space);
734: ndouble++;
735: }
736: *(current_space->array++) = col;
737: current_space->local_used++;
738: current_space->local_remaining--;
739: ci[i+1]++;
741: /* stash if anything else remains in this row of B */
742: for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
743: PetscInt bcol = bj[bb[j]];
744: if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
745: PetscHeapAdd(h,j,bcol);
746: bb[j]++;
747: break;
748: }
749: }
750: PetscHeapPop(h,&j,&col);
751: }
752: if (fptr) { /* Clear the bits for this row */
753: for (; fptr<current_space->array; fptr++) {PetscBTClear(bt,*fptr);}
754: } else { /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
755: PetscBTMemzero(bn,bt);
756: }
757: }
758: PetscFree(bb);
759: PetscHeapDestroy(&h);
760: PetscBTDestroy(&bt);
762: /* Column indices are in the list of free space */
763: /* Allocate space for cj, initialize cj, and */
764: /* destroy list of free space and other temporary array(s) */
765: PetscMalloc1(ci[am],&cj);
766: PetscFreeSpaceContiguous(&free_space,cj);
768: /* put together the new symbolic matrix */
769: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
770: MatSetBlockSizesFromMats(C,A,B);
772: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
773: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
774: c = (Mat_SeqAIJ*)(C->data);
775: c->free_a = PETSC_TRUE;
776: c->free_ij = PETSC_TRUE;
777: c->nonew = 0;
779: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
781: /* set MatInfo */
782: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
783: if (afill < 1.0) afill = 1.0;
784: c->maxnz = ci[am];
785: c->nz = ci[am];
786: C->info.mallocs = ndouble;
787: C->info.fill_ratio_given = fill;
788: C->info.fill_ratio_needed = afill;
790: #if defined(PETSC_USE_INFO)
791: if (ci[am]) {
792: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
793: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
794: } else {
795: PetscInfo(C,"Empty matrix product\n");
796: }
797: #endif
798: return(0);
799: }
802: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C)
803: {
804: PetscErrorCode ierr;
805: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
806: const PetscInt *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1;
807: PetscInt *ci,*cj,*outputj,worki_L1[9],worki_L2[9];
808: PetscInt c_maxmem,a_maxrownnz=0,a_rownnz;
809: const PetscInt workcol[8]={0,1,2,3,4,5,6,7};
810: const PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
811: const PetscInt *brow_ptr[8],*brow_end[8];
812: PetscInt window[8];
813: PetscInt window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows;
814: PetscInt i,k,ndouble=0,L1_rowsleft,rowsleft;
815: PetscReal afill;
816: PetscInt *workj_L1,*workj_L2,*workj_L3;
817: PetscInt L1_nnz,L2_nnz;
819: /* Step 1: Get upper bound on memory required for allocation.
820: Because of the way virtual memory works,
821: only the memory pages that are actually needed will be physically allocated. */
823: PetscMalloc1(am+1,&ci);
824: for (i=0; i<am; i++) {
825: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
826: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
827: a_rownnz = 0;
828: for (k=0; k<anzi; ++k) {
829: a_rownnz += bi[acol[k]+1] - bi[acol[k]];
830: if (a_rownnz > bn) {
831: a_rownnz = bn;
832: break;
833: }
834: }
835: a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
836: }
837: /* temporary work areas for merging rows */
838: PetscMalloc1(a_maxrownnz*8,&workj_L1);
839: PetscMalloc1(a_maxrownnz*8,&workj_L2);
840: PetscMalloc1(a_maxrownnz,&workj_L3);
842: /* This should be enough for almost all matrices. If not, memory is reallocated later. */
843: c_maxmem = 8*(ai[am]+bi[bm]);
844: /* Step 2: Populate pattern for C */
845: PetscMalloc1(c_maxmem,&cj);
847: ci_nnz = 0;
848: ci[0] = 0;
849: worki_L1[0] = 0;
850: worki_L2[0] = 0;
851: for (i=0; i<am; i++) {
852: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
853: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
854: rowsleft = anzi;
855: inputcol_L1 = acol;
856: L2_nnz = 0;
857: L2_nrows = 1; /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1 */
858: worki_L2[1] = 0;
859: outputi_nnz = 0;
861: /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem -> allocate more memory */
862: while (ci_nnz+a_maxrownnz > c_maxmem) {
863: c_maxmem *= 2;
864: ndouble++;
865: PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);
866: }
868: while (rowsleft) {
869: L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
870: L1_nrows = 0;
871: L1_nnz = 0;
872: inputcol = inputcol_L1;
873: inputi = bi;
874: inputj = bj;
876: /* The following macro is used to specialize for small rows in A.
877: This helps with compiler unrolling, improving performance substantially.
878: Input: inputj inputi inputcol bn
879: Output: outputj outputi_nnz */
880: #define MatMatMultSymbolic_RowMergeMacro(ANNZ) \
881: window_min = bn; \
882: outputi_nnz = 0; \
883: for (k=0; k<ANNZ; ++k) { \
884: brow_ptr[k] = inputj + inputi[inputcol[k]]; \
885: brow_end[k] = inputj + inputi[inputcol[k]+1]; \
886: window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
887: window_min = PetscMin(window[k], window_min); \
888: } \
889: while (window_min < bn) { \
890: outputj[outputi_nnz++] = window_min; \
891: /* advance front and compute new minimum */ \
892: old_window_min = window_min; \
893: window_min = bn; \
894: for (k=0; k<ANNZ; ++k) { \
895: if (window[k] == old_window_min) { \
896: brow_ptr[k]++; \
897: window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
898: } \
899: window_min = PetscMin(window[k], window_min); \
900: } \
901: }
903: /************** L E V E L 1 ***************/
904: /* Merge up to 8 rows of B to L1 work array*/
905: while (L1_rowsleft) {
906: outputi_nnz = 0;
907: if (anzi > 8) outputj = workj_L1 + L1_nnz; /* Level 1 rowmerge*/
908: else outputj = cj + ci_nnz; /* Merge directly to C */
910: switch (L1_rowsleft) {
911: case 1: brow_ptr[0] = inputj + inputi[inputcol[0]];
912: brow_end[0] = inputj + inputi[inputcol[0]+1];
913: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
914: inputcol += L1_rowsleft;
915: rowsleft -= L1_rowsleft;
916: L1_rowsleft = 0;
917: break;
918: case 2: MatMatMultSymbolic_RowMergeMacro(2);
919: inputcol += L1_rowsleft;
920: rowsleft -= L1_rowsleft;
921: L1_rowsleft = 0;
922: break;
923: case 3: MatMatMultSymbolic_RowMergeMacro(3);
924: inputcol += L1_rowsleft;
925: rowsleft -= L1_rowsleft;
926: L1_rowsleft = 0;
927: break;
928: case 4: MatMatMultSymbolic_RowMergeMacro(4);
929: inputcol += L1_rowsleft;
930: rowsleft -= L1_rowsleft;
931: L1_rowsleft = 0;
932: break;
933: case 5: MatMatMultSymbolic_RowMergeMacro(5);
934: inputcol += L1_rowsleft;
935: rowsleft -= L1_rowsleft;
936: L1_rowsleft = 0;
937: break;
938: case 6: MatMatMultSymbolic_RowMergeMacro(6);
939: inputcol += L1_rowsleft;
940: rowsleft -= L1_rowsleft;
941: L1_rowsleft = 0;
942: break;
943: case 7: MatMatMultSymbolic_RowMergeMacro(7);
944: inputcol += L1_rowsleft;
945: rowsleft -= L1_rowsleft;
946: L1_rowsleft = 0;
947: break;
948: default: MatMatMultSymbolic_RowMergeMacro(8);
949: inputcol += 8;
950: rowsleft -= 8;
951: L1_rowsleft -= 8;
952: break;
953: }
954: inputcol_L1 = inputcol;
955: L1_nnz += outputi_nnz;
956: worki_L1[++L1_nrows] = L1_nnz;
957: }
959: /********************** L E V E L 2 ************************/
960: /* Merge from L1 work array to either C or to L2 work array */
961: if (anzi > 8) {
962: inputi = worki_L1;
963: inputj = workj_L1;
964: inputcol = workcol;
965: outputi_nnz = 0;
967: if (anzi <= 64) outputj = cj + ci_nnz; /* Merge from L1 work array to C */
968: else outputj = workj_L2 + L2_nnz; /* Merge from L1 work array to L2 work array */
970: switch (L1_nrows) {
971: case 1: brow_ptr[0] = inputj + inputi[inputcol[0]];
972: brow_end[0] = inputj + inputi[inputcol[0]+1];
973: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
974: break;
975: case 2: MatMatMultSymbolic_RowMergeMacro(2); break;
976: case 3: MatMatMultSymbolic_RowMergeMacro(3); break;
977: case 4: MatMatMultSymbolic_RowMergeMacro(4); break;
978: case 5: MatMatMultSymbolic_RowMergeMacro(5); break;
979: case 6: MatMatMultSymbolic_RowMergeMacro(6); break;
980: case 7: MatMatMultSymbolic_RowMergeMacro(7); break;
981: case 8: MatMatMultSymbolic_RowMergeMacro(8); break;
982: default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
983: }
984: L2_nnz += outputi_nnz;
985: worki_L2[++L2_nrows] = L2_nnz;
987: /************************ L E V E L 3 **********************/
988: /* Merge from L2 work array to either C or to L2 work array */
989: if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
990: inputi = worki_L2;
991: inputj = workj_L2;
992: inputcol = workcol;
993: outputi_nnz = 0;
994: if (rowsleft) outputj = workj_L3;
995: else outputj = cj + ci_nnz;
996: switch (L2_nrows) {
997: case 1: brow_ptr[0] = inputj + inputi[inputcol[0]];
998: brow_end[0] = inputj + inputi[inputcol[0]+1];
999: for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
1000: break;
1001: case 2: MatMatMultSymbolic_RowMergeMacro(2); break;
1002: case 3: MatMatMultSymbolic_RowMergeMacro(3); break;
1003: case 4: MatMatMultSymbolic_RowMergeMacro(4); break;
1004: case 5: MatMatMultSymbolic_RowMergeMacro(5); break;
1005: case 6: MatMatMultSymbolic_RowMergeMacro(6); break;
1006: case 7: MatMatMultSymbolic_RowMergeMacro(7); break;
1007: case 8: MatMatMultSymbolic_RowMergeMacro(8); break;
1008: default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1009: }
1010: L2_nrows = 1;
1011: L2_nnz = outputi_nnz;
1012: worki_L2[1] = outputi_nnz;
1013: /* Copy to workj_L2 */
1014: if (rowsleft) {
1015: for (k=0; k<outputi_nnz; ++k) workj_L2[k] = outputj[k];
1016: }
1017: }
1018: }
1019: } /* while (rowsleft) */
1020: #undef MatMatMultSymbolic_RowMergeMacro
1022: /* terminate current row */
1023: ci_nnz += outputi_nnz;
1024: ci[i+1] = ci_nnz;
1025: }
1027: /* Step 3: Create the new symbolic matrix */
1028: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
1029: MatSetBlockSizesFromMats(C,A,B);
1031: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1032: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1033: c = (Mat_SeqAIJ*)(C->data);
1034: c->free_a = PETSC_TRUE;
1035: c->free_ij = PETSC_TRUE;
1036: c->nonew = 0;
1038: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1040: /* set MatInfo */
1041: afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1042: if (afill < 1.0) afill = 1.0;
1043: c->maxnz = ci[am];
1044: c->nz = ci[am];
1045: C->info.mallocs = ndouble;
1046: C->info.fill_ratio_given = fill;
1047: C->info.fill_ratio_needed = afill;
1049: #if defined(PETSC_USE_INFO)
1050: if (ci[am]) {
1051: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
1052: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
1053: } else {
1054: PetscInfo(C,"Empty matrix product\n");
1055: }
1056: #endif
1058: /* Step 4: Free temporary work areas */
1059: PetscFree(workj_L1);
1060: PetscFree(workj_L2);
1061: PetscFree(workj_L3);
1062: return(0);
1063: }
1065: /* concatenate unique entries and then sort */
1066: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C)
1067: {
1069: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
1070: const PetscInt *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
1071: PetscInt *ci,*cj;
1072: PetscInt am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
1073: PetscReal afill;
1074: PetscInt i,j,ndouble = 0;
1075: PetscSegBuffer seg,segrow;
1076: char *seen;
1079: PetscMalloc1(am+1,&ci);
1080: ci[0] = 0;
1082: /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1083: PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);
1084: PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);
1085: PetscCalloc1(bn,&seen);
1087: /* Determine ci and cj */
1088: for (i=0; i<am; i++) {
1089: const PetscInt anzi = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1090: const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1091: PetscInt packlen = 0,*PETSC_RESTRICT crow;
1092: /* Pack segrow */
1093: for (j=0; j<anzi; j++) {
1094: PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
1095: for (k=bjstart; k<bjend; k++) {
1096: PetscInt bcol = bj[k];
1097: if (!seen[bcol]) { /* new entry */
1098: PetscInt *PETSC_RESTRICT slot;
1099: PetscSegBufferGetInts(segrow,1,&slot);
1100: *slot = bcol;
1101: seen[bcol] = 1;
1102: packlen++;
1103: }
1104: }
1105: }
1106: PetscSegBufferGetInts(seg,packlen,&crow);
1107: PetscSegBufferExtractTo(segrow,crow);
1108: PetscSortInt(packlen,crow);
1109: ci[i+1] = ci[i] + packlen;
1110: for (j=0; j<packlen; j++) seen[crow[j]] = 0;
1111: }
1112: PetscSegBufferDestroy(&segrow);
1113: PetscFree(seen);
1115: /* Column indices are in the segmented buffer */
1116: PetscSegBufferExtractAlloc(seg,&cj);
1117: PetscSegBufferDestroy(&seg);
1119: /* put together the new symbolic matrix */
1120: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
1121: MatSetBlockSizesFromMats(C,A,B);
1123: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1124: /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1125: c = (Mat_SeqAIJ*)(C->data);
1126: c->free_a = PETSC_TRUE;
1127: c->free_ij = PETSC_TRUE;
1128: c->nonew = 0;
1130: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;
1132: /* set MatInfo */
1133: afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5;
1134: if (afill < 1.0) afill = 1.0;
1135: c->maxnz = ci[am];
1136: c->nz = ci[am];
1137: C->info.mallocs = ndouble;
1138: C->info.fill_ratio_given = fill;
1139: C->info.fill_ratio_needed = afill;
1141: #if defined(PETSC_USE_INFO)
1142: if (ci[am]) {
1143: PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
1144: PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
1145: } else {
1146: PetscInfo(C,"Empty matrix product\n");
1147: }
1148: #endif
1149: return(0);
1150: }
1152: PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(void *data)
1153: {
1154: PetscErrorCode ierr;
1155: Mat_MatMatTransMult *abt=(Mat_MatMatTransMult *)data;
1158: MatTransposeColoringDestroy(&abt->matcoloring);
1159: MatDestroy(&abt->Bt_den);
1160: MatDestroy(&abt->ABt_den);
1161: PetscFree(abt);
1162: return(0);
1163: }
1165: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1166: {
1167: PetscErrorCode ierr;
1168: Mat Bt;
1169: PetscInt *bti,*btj;
1170: Mat_MatMatTransMult *abt;
1171: Mat_Product *product = C->product;
1172: char *alg;
1175: if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
1176: if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty");
1178: /* create symbolic Bt */
1179: MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);
1180: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);
1181: MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));
1182: MatSetType(Bt,((PetscObject)A)->type_name);
1184: /* get symbolic C=A*Bt */
1185: PetscStrallocpy(product->alg,&alg);
1186: MatProductSetAlgorithm(C,"sorted"); /* set algorithm for C = A*Bt */
1187: MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);
1188: MatProductSetAlgorithm(C,alg); /* resume original algorithm for ABt product */
1189: PetscFree(alg);
1191: /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
1192: PetscNew(&abt);
1194: product->data = abt;
1195: product->destroy = MatDestroy_SeqAIJ_MatMatMultTrans;
1197: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
1199: abt->usecoloring = PETSC_FALSE;
1200: PetscStrcmp(product->alg,"color",&abt->usecoloring);
1201: if (abt->usecoloring) {
1202: /* Create MatTransposeColoring from symbolic C=A*B^T */
1203: MatTransposeColoring matcoloring;
1204: MatColoring coloring;
1205: ISColoring iscoloring;
1206: Mat Bt_dense,C_dense;
1208: /* inode causes memory problem */
1209: MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);
1211: MatColoringCreate(C,&coloring);
1212: MatColoringSetDistance(coloring,2);
1213: MatColoringSetType(coloring,MATCOLORINGSL);
1214: MatColoringSetFromOptions(coloring);
1215: MatColoringApply(coloring,&iscoloring);
1216: MatColoringDestroy(&coloring);
1217: MatTransposeColoringCreate(C,iscoloring,&matcoloring);
1219: abt->matcoloring = matcoloring;
1221: ISColoringDestroy(&iscoloring);
1223: /* Create Bt_dense and C_dense = A*Bt_dense */
1224: MatCreate(PETSC_COMM_SELF,&Bt_dense);
1225: MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);
1226: MatSetType(Bt_dense,MATSEQDENSE);
1227: MatSeqDenseSetPreallocation(Bt_dense,NULL);
1229: Bt_dense->assembled = PETSC_TRUE;
1230: abt->Bt_den = Bt_dense;
1232: MatCreate(PETSC_COMM_SELF,&C_dense);
1233: MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);
1234: MatSetType(C_dense,MATSEQDENSE);
1235: MatSeqDenseSetPreallocation(C_dense,NULL);
1237: Bt_dense->assembled = PETSC_TRUE;
1238: abt->ABt_den = C_dense;
1240: #if defined(PETSC_USE_INFO)
1241: {
1242: Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
1243: PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));
1244: }
1245: #endif
1246: }
1247: /* clean up */
1248: MatDestroy(&Bt);
1249: MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);
1250: return(0);
1251: }
1253: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1254: {
1255: PetscErrorCode ierr;
1256: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1257: PetscInt *ai =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
1258: PetscInt cm =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
1259: PetscLogDouble flops=0.0;
1260: MatScalar *aa =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
1261: Mat_MatMatTransMult *abt;
1262: Mat_Product *product = C->product;
1265: if (!product) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
1266: abt = (Mat_MatMatTransMult *)product->data;
1267: if (!abt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
1268: /* clear old values in C */
1269: if (!c->a) {
1270: PetscCalloc1(ci[cm]+1,&ca);
1271: c->a = ca;
1272: c->free_a = PETSC_TRUE;
1273: } else {
1274: ca = c->a;
1275: PetscArrayzero(ca,ci[cm]+1);
1276: }
1278: if (abt->usecoloring) {
1279: MatTransposeColoring matcoloring = abt->matcoloring;
1280: Mat Bt_dense,C_dense = abt->ABt_den;
1282: /* Get Bt_dense by Apply MatTransposeColoring to B */
1283: Bt_dense = abt->Bt_den;
1284: MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);
1286: /* C_dense = A*Bt_dense */
1287: MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);
1289: /* Recover C from C_dense */
1290: MatTransColoringApplyDenToSp(matcoloring,C_dense,C);
1291: return(0);
1292: }
1294: for (i=0; i<cm; i++) {
1295: anzi = ai[i+1] - ai[i];
1296: acol = aj + ai[i];
1297: aval = aa + ai[i];
1298: cnzi = ci[i+1] - ci[i];
1299: ccol = cj + ci[i];
1300: cval = ca + ci[i];
1301: for (j=0; j<cnzi; j++) {
1302: brow = ccol[j];
1303: bnzj = bi[brow+1] - bi[brow];
1304: bcol = bj + bi[brow];
1305: bval = ba + bi[brow];
1307: /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1308: nexta = 0; nextb = 0;
1309: while (nexta<anzi && nextb<bnzj) {
1310: while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1311: if (nexta == anzi) break;
1312: while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1313: if (nextb == bnzj) break;
1314: if (acol[nexta] == bcol[nextb]) {
1315: cval[j] += aval[nexta]*bval[nextb];
1316: nexta++; nextb++;
1317: flops += 2;
1318: }
1319: }
1320: }
1321: }
1322: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1323: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1324: PetscLogFlops(flops);
1325: return(0);
1326: }
1328: PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(void *data)
1329: {
1330: PetscErrorCode ierr;
1331: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult*)data;
1334: MatDestroy(&atb->At);
1335: if (atb->destroy) {
1336: (*atb->destroy)(atb->data);
1337: }
1338: PetscFree(atb);
1339: return(0);
1340: }
1342: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1343: {
1345: Mat At = NULL;
1346: PetscInt *ati,*atj;
1347: Mat_Product *product = C->product;
1348: PetscBool flg,def,square;
1351: MatCheckProduct(C,4);
1352: square = (PetscBool)(A == B && A->symmetric && A->symmetric_set);
1353: /* outerproduct */
1354: PetscStrcmp(product->alg,"outerproduct",&flg);
1355: if (flg) {
1356: /* create symbolic At */
1357: if (!square) {
1358: MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1359: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);
1360: MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));
1361: MatSetType(At,((PetscObject)A)->type_name);
1362: }
1363: /* get symbolic C=At*B */
1364: MatProductSetAlgorithm(C,"sorted");
1365: MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);
1367: /* clean up */
1368: if (!square) {
1369: MatDestroy(&At);
1370: MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1371: }
1373: C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
1374: MatProductSetAlgorithm(C,"outerproduct");
1375: return(0);
1376: }
1378: /* matmatmult */
1379: PetscStrcmp(product->alg,"default",&def);
1380: PetscStrcmp(product->alg,"at*b",&flg);
1381: if (flg || def) {
1382: Mat_MatTransMatMult *atb;
1384: if (product->data) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Extra product struct not empty");
1385: PetscNew(&atb);
1386: if (!square) {
1387: MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);
1388: }
1389: MatProductSetAlgorithm(C,"sorted");
1390: MatMatMultSymbolic_SeqAIJ_SeqAIJ(square ? A : At,B,fill,C);
1391: MatProductSetAlgorithm(C,"at*b");
1392: product->data = atb;
1393: product->destroy = MatDestroy_SeqAIJ_MatTransMatMult;
1394: atb->At = At;
1395: atb->updateAt = PETSC_FALSE; /* because At is computed here */
1397: C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
1398: return(0);
1399: }
1401: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1402: }
1404: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1405: {
1407: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1408: PetscInt am=A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1409: PetscInt cm=C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1410: PetscLogDouble flops=0.0;
1411: MatScalar *aa=a->a,*ba,*ca,*caj;
1414: if (!c->a) {
1415: PetscCalloc1(ci[cm]+1,&ca);
1417: c->a = ca;
1418: c->free_a = PETSC_TRUE;
1419: } else {
1420: ca = c->a;
1421: PetscArrayzero(ca,ci[cm]);
1422: }
1424: /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1425: for (i=0; i<am; i++) {
1426: bj = b->j + bi[i];
1427: ba = b->a + bi[i];
1428: bnzi = bi[i+1] - bi[i];
1429: anzi = ai[i+1] - ai[i];
1430: for (j=0; j<anzi; j++) {
1431: nextb = 0;
1432: crow = *aj++;
1433: cjj = cj + ci[crow];
1434: caj = ca + ci[crow];
1435: /* perform sparse axpy operation. Note cjj includes bj. */
1436: for (k=0; nextb<bnzi; k++) {
1437: if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
1438: caj[k] += (*aa)*(*(ba+nextb));
1439: nextb++;
1440: }
1441: }
1442: flops += 2*bnzi;
1443: aa++;
1444: }
1445: }
1447: /* Assemble the final matrix and clean up */
1448: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1449: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1450: PetscLogFlops(flops);
1451: return(0);
1452: }
1454: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)
1455: {
1459: MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);
1460: C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1461: return(0);
1462: }
1464: PETSC_INTERN PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C,const PetscBool add)
1465: {
1466: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1467: Mat_SeqDense *bd=(Mat_SeqDense*)B->data;
1468: Mat_SeqDense *cd=(Mat_SeqDense*)C->data;
1469: PetscErrorCode ierr;
1470: PetscScalar *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4;
1471: const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av;
1472: const PetscInt *aj;
1473: PetscInt cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n;
1474: PetscInt clda=cd->lda;
1475: PetscInt am4=4*clda,bm4=4*bm,col,i,j,n;
1478: if (!cm || !cn) return(0);
1479: MatSeqAIJGetArrayRead(A,&av);
1480: if (add) {
1481: MatDenseGetArray(C,&c);
1482: } else {
1483: MatDenseGetArrayWrite(C,&c);
1484: }
1485: MatDenseGetArrayRead(B,&b);
1486: b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1487: c1 = c; c2 = c1 + clda; c3 = c2 + clda; c4 = c3 + clda;
1488: for (col=0; col<(cn/4)*4; col += 4) { /* over columns of C */
1489: for (i=0; i<am; i++) { /* over rows of A in those columns */
1490: r1 = r2 = r3 = r4 = 0.0;
1491: n = a->i[i+1] - a->i[i];
1492: aj = a->j + a->i[i];
1493: aa = av + a->i[i];
1494: for (j=0; j<n; j++) {
1495: const PetscScalar aatmp = aa[j];
1496: const PetscInt ajtmp = aj[j];
1497: r1 += aatmp*b1[ajtmp];
1498: r2 += aatmp*b2[ajtmp];
1499: r3 += aatmp*b3[ajtmp];
1500: r4 += aatmp*b4[ajtmp];
1501: }
1502: if (add) {
1503: c1[i] += r1;
1504: c2[i] += r2;
1505: c3[i] += r3;
1506: c4[i] += r4;
1507: } else {
1508: c1[i] = r1;
1509: c2[i] = r2;
1510: c3[i] = r3;
1511: c4[i] = r4;
1512: }
1513: }
1514: b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4;
1515: c1 += am4; c2 += am4; c3 += am4; c4 += am4;
1516: }
1517: /* process remaining columns */
1518: if (col != cn) {
1519: PetscInt rc = cn-col;
1521: if (rc == 1) {
1522: for (i=0; i<am; i++) {
1523: r1 = 0.0;
1524: n = a->i[i+1] - a->i[i];
1525: aj = a->j + a->i[i];
1526: aa = av + a->i[i];
1527: for (j=0; j<n; j++) r1 += aa[j]*b1[aj[j]];
1528: if (add) c1[i] += r1;
1529: else c1[i] = r1;
1530: }
1531: } else if (rc == 2) {
1532: for (i=0; i<am; i++) {
1533: r1 = r2 = 0.0;
1534: n = a->i[i+1] - a->i[i];
1535: aj = a->j + a->i[i];
1536: aa = av + a->i[i];
1537: for (j=0; j<n; j++) {
1538: const PetscScalar aatmp = aa[j];
1539: const PetscInt ajtmp = aj[j];
1540: r1 += aatmp*b1[ajtmp];
1541: r2 += aatmp*b2[ajtmp];
1542: }
1543: if (add) {
1544: c1[i] += r1;
1545: c2[i] += r2;
1546: } else {
1547: c1[i] = r1;
1548: c2[i] = r2;
1549: }
1550: }
1551: } else {
1552: for (i=0; i<am; i++) {
1553: r1 = r2 = r3 = 0.0;
1554: n = a->i[i+1] - a->i[i];
1555: aj = a->j + a->i[i];
1556: aa = av + a->i[i];
1557: for (j=0; j<n; j++) {
1558: const PetscScalar aatmp = aa[j];
1559: const PetscInt ajtmp = aj[j];
1560: r1 += aatmp*b1[ajtmp];
1561: r2 += aatmp*b2[ajtmp];
1562: r3 += aatmp*b3[ajtmp];
1563: }
1564: if (add) {
1565: c1[i] += r1;
1566: c2[i] += r2;
1567: c3[i] += r3;
1568: } else {
1569: c1[i] = r1;
1570: c2[i] = r2;
1571: c3[i] = r3;
1572: }
1573: }
1574: }
1575: }
1576: PetscLogFlops(cn*(2.0*a->nz));
1577: if (add) {
1578: MatDenseRestoreArray(C,&c);
1579: } else {
1580: MatDenseRestoreArrayWrite(C,&c);
1581: }
1582: MatDenseRestoreArrayRead(B,&b);
1583: MatSeqAIJRestoreArrayRead(A,&av);
1584: return(0);
1585: }
1587: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1588: {
1592: if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n);
1593: if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
1594: if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);
1596: MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C,PETSC_FALSE);
1597: return(0);
1598: }
1600: /* ------------------------------------------------------- */
1601: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
1602: {
1604: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
1605: C->ops->productsymbolic = MatProductSymbolic_AB;
1606: return(0);
1607: }
1609: PETSC_INTERN PetscErrorCode MatTMatTMultSymbolic_SeqAIJ_SeqDense(Mat,Mat,PetscReal,Mat);
1611: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
1612: {
1614: C->ops->transposematmultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
1615: C->ops->productsymbolic = MatProductSymbolic_AtB;
1616: return(0);
1617: }
1619: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(Mat C)
1620: {
1622: C->ops->mattransposemultsymbolic = MatTMatTMultSymbolic_SeqAIJ_SeqDense;
1623: C->ops->productsymbolic = MatProductSymbolic_ABt;
1624: return(0);
1625: }
1627: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
1628: {
1630: Mat_Product *product = C->product;
1633: switch (product->type) {
1634: case MATPRODUCT_AB:
1635: MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);
1636: break;
1637: case MATPRODUCT_AtB:
1638: MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);
1639: break;
1640: case MATPRODUCT_ABt:
1641: MatProductSetFromOptions_SeqAIJ_SeqDense_ABt(C);
1642: break;
1643: default:
1644: break;
1645: }
1646: return(0);
1647: }
1648: /* ------------------------------------------------------- */
1649: static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
1650: {
1652: Mat_Product *product = C->product;
1653: Mat A = product->A;
1654: PetscBool baij;
1657: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);
1658: if (!baij) { /* A is seqsbaij */
1659: PetscBool sbaij;
1660: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);
1661: if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format");
1663: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
1664: } else { /* A is seqbaij */
1665: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
1666: }
1668: C->ops->productsymbolic = MatProductSymbolic_AB;
1669: return(0);
1670: }
1672: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
1673: {
1675: Mat_Product *product = C->product;
1678: MatCheckProduct(C,1);
1679: if (!product->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing A");
1680: if (product->type == MATPRODUCT_AB || (product->type == MATPRODUCT_AtB && product->A->symmetric)) {
1681: MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);
1682: }
1683: return(0);
1684: }
1686: /* ------------------------------------------------------- */
1687: static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
1688: {
1690: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
1691: C->ops->productsymbolic = MatProductSymbolic_AB;
1692: return(0);
1693: }
1695: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
1696: {
1698: Mat_Product *product = C->product;
1701: if (product->type == MATPRODUCT_AB) {
1702: MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);
1703: }
1704: return(0);
1705: }
1706: /* ------------------------------------------------------- */
1708: PetscErrorCode MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1709: {
1711: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
1712: Mat_SeqDense *btdense = (Mat_SeqDense*)Btdense->data;
1713: PetscInt *bi = b->i,*bj=b->j;
1714: PetscInt m = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
1715: MatScalar *btval,*btval_den,*ba=b->a;
1716: PetscInt *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;
1719: btval_den=btdense->v;
1720: PetscArrayzero(btval_den,m*n);
1721: for (k=0; k<ncolors; k++) {
1722: ncolumns = coloring->ncolumns[k];
1723: for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1724: col = *(columns + colorforcol[k] + l);
1725: btcol = bj + bi[col];
1726: btval = ba + bi[col];
1727: anz = bi[col+1] - bi[col];
1728: for (j=0; j<anz; j++) {
1729: brow = btcol[j];
1730: btval_den[brow] = btval[j];
1731: }
1732: }
1733: btval_den += m;
1734: }
1735: return(0);
1736: }
1738: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
1739: {
1740: PetscErrorCode ierr;
1741: Mat_SeqAIJ *csp = (Mat_SeqAIJ*)Csp->data;
1742: const PetscScalar *ca_den,*ca_den_ptr;
1743: PetscScalar *ca=csp->a;
1744: PetscInt k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
1745: PetscInt brows=matcoloring->brows,*den2sp=matcoloring->den2sp;
1746: PetscInt nrows,*row,*idx;
1747: PetscInt *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;
1750: MatDenseGetArrayRead(Cden,&ca_den);
1752: if (brows > 0) {
1753: PetscInt *lstart,row_end,row_start;
1754: lstart = matcoloring->lstart;
1755: PetscArrayzero(lstart,ncolors);
1757: row_end = brows;
1758: if (row_end > m) row_end = m;
1759: for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */
1760: ca_den_ptr = ca_den;
1761: for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
1762: nrows = matcoloring->nrows[k];
1763: row = rows + colorforrow[k];
1764: idx = den2sp + colorforrow[k];
1765: for (l=lstart[k]; l<nrows; l++) {
1766: if (row[l] >= row_end) {
1767: lstart[k] = l;
1768: break;
1769: } else {
1770: ca[idx[l]] = ca_den_ptr[row[l]];
1771: }
1772: }
1773: ca_den_ptr += m;
1774: }
1775: row_end += brows;
1776: if (row_end > m) row_end = m;
1777: }
1778: } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1779: ca_den_ptr = ca_den;
1780: for (k=0; k<ncolors; k++) {
1781: nrows = matcoloring->nrows[k];
1782: row = rows + colorforrow[k];
1783: idx = den2sp + colorforrow[k];
1784: for (l=0; l<nrows; l++) {
1785: ca[idx[l]] = ca_den_ptr[row[l]];
1786: }
1787: ca_den_ptr += m;
1788: }
1789: }
1791: MatDenseRestoreArrayRead(Cden,&ca_den);
1792: #if defined(PETSC_USE_INFO)
1793: if (matcoloring->brows > 0) {
1794: PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);
1795: } else {
1796: PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");
1797: }
1798: #endif
1799: return(0);
1800: }
1802: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1803: {
1805: PetscInt i,n,nrows,Nbs,j,k,m,ncols,col,cm;
1806: const PetscInt *is,*ci,*cj,*row_idx;
1807: PetscInt nis = iscoloring->n,*rowhit,bs = 1;
1808: IS *isa;
1809: Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data;
1810: PetscInt *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
1811: PetscInt *colorforcol,*columns,*columns_i,brows;
1812: PetscBool flg;
1815: ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);
1817: /* bs >1 is not being tested yet! */
1818: Nbs = mat->cmap->N/bs;
1819: c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */
1820: c->N = Nbs;
1821: c->m = c->M;
1822: c->rstart = 0;
1823: c->brows = 100;
1825: c->ncolors = nis;
1826: PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);
1827: PetscMalloc1(csp->nz+1,&rows);
1828: PetscMalloc1(csp->nz+1,&den2sp);
1830: brows = c->brows;
1831: PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);
1832: if (flg) c->brows = brows;
1833: if (brows > 0) {
1834: PetscMalloc1(nis+1,&c->lstart);
1835: }
1837: colorforrow[0] = 0;
1838: rows_i = rows;
1839: den2sp_i = den2sp;
1841: PetscMalloc1(nis+1,&colorforcol);
1842: PetscMalloc1(Nbs+1,&columns);
1844: colorforcol[0] = 0;
1845: columns_i = columns;
1847: /* get column-wise storage of mat */
1848: MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
1850: cm = c->m;
1851: PetscMalloc1(cm+1,&rowhit);
1852: PetscMalloc1(cm+1,&idxhit);
1853: for (i=0; i<nis; i++) { /* loop over color */
1854: ISGetLocalSize(isa[i],&n);
1855: ISGetIndices(isa[i],&is);
1857: c->ncolumns[i] = n;
1858: if (n) {
1859: PetscArraycpy(columns_i,is,n);
1860: }
1861: colorforcol[i+1] = colorforcol[i] + n;
1862: columns_i += n;
1864: /* fast, crude version requires O(N*N) work */
1865: PetscArrayzero(rowhit,cm);
1867: for (j=0; j<n; j++) { /* loop over columns*/
1868: col = is[j];
1869: row_idx = cj + ci[col];
1870: m = ci[col+1] - ci[col];
1871: for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
1872: idxhit[*row_idx] = spidx[ci[col] + k];
1873: rowhit[*row_idx++] = col + 1;
1874: }
1875: }
1876: /* count the number of hits */
1877: nrows = 0;
1878: for (j=0; j<cm; j++) {
1879: if (rowhit[j]) nrows++;
1880: }
1881: c->nrows[i] = nrows;
1882: colorforrow[i+1] = colorforrow[i] + nrows;
1884: nrows = 0;
1885: for (j=0; j<cm; j++) { /* loop over rows */
1886: if (rowhit[j]) {
1887: rows_i[nrows] = j;
1888: den2sp_i[nrows] = idxhit[j];
1889: nrows++;
1890: }
1891: }
1892: den2sp_i += nrows;
1894: ISRestoreIndices(isa[i],&is);
1895: rows_i += nrows;
1896: }
1897: MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
1898: PetscFree(rowhit);
1899: ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);
1900: if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);
1902: c->colorforrow = colorforrow;
1903: c->rows = rows;
1904: c->den2sp = den2sp;
1905: c->colorforcol = colorforcol;
1906: c->columns = columns;
1908: PetscFree(idxhit);
1909: return(0);
1910: }
1912: /* --------------------------------------------------------------- */
1913: static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1914: {
1916: Mat_Product *product = C->product;
1917: Mat A=product->A,B=product->B;
1920: if (C->ops->mattransposemultnumeric) {
1921: /* Alg: "outerproduct" */
1922: (*C->ops->mattransposemultnumeric)(A,B,C);
1923: } else {
1924: /* Alg: "matmatmult" -- C = At*B */
1925: Mat_MatTransMatMult *atb = (Mat_MatTransMatMult *)product->data;
1926: Mat At;
1928: if (!atb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing product struct");
1929: At = atb->At;
1930: if (atb->updateAt && At) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */
1931: MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);
1932: }
1933: MatMatMultNumeric_SeqAIJ_SeqAIJ(At ? At : A,B,C);
1934: atb->updateAt = PETSC_TRUE;
1935: }
1936: return(0);
1937: }
1939: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1940: {
1942: Mat_Product *product = C->product;
1943: Mat A=product->A,B=product->B;
1944: PetscReal fill=product->fill;
1947: MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);
1949: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
1950: return(0);
1951: }
1953: /* --------------------------------------------------------------- */
1954: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
1955: {
1957: Mat_Product *product = C->product;
1958: PetscInt alg = 0; /* default algorithm */
1959: PetscBool flg = PETSC_FALSE;
1960: #if !defined(PETSC_HAVE_HYPRE)
1961: const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
1962: PetscInt nalg = 7;
1963: #else
1964: const char *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"};
1965: PetscInt nalg = 8;
1966: #endif
1969: /* Set default algorithm */
1970: PetscStrcmp(C->product->alg,"default",&flg);
1971: if (flg) {
1972: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1973: }
1975: /* Get runtime option */
1976: if (product->api_user) {
1977: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
1978: PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);
1979: PetscOptionsEnd();
1980: } else {
1981: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
1982: PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);
1983: PetscOptionsEnd();
1984: }
1985: if (flg) {
1986: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1987: }
1989: C->ops->productsymbolic = MatProductSymbolic_AB;
1990: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
1991: return(0);
1992: }
1994: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
1995: {
1997: Mat_Product *product = C->product;
1998: PetscInt alg = 0; /* default algorithm */
1999: PetscBool flg = PETSC_FALSE;
2000: const char *algTypes[3] = {"default","at*b","outerproduct"};
2001: PetscInt nalg = 3;
2004: /* Get runtime option */
2005: if (product->api_user) {
2006: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");
2007: PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2008: PetscOptionsEnd();
2009: } else {
2010: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");
2011: PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);
2012: PetscOptionsEnd();
2013: }
2014: if (flg) {
2015: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2016: }
2018: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
2019: return(0);
2020: }
2022: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
2023: {
2025: Mat_Product *product = C->product;
2026: PetscInt alg = 0; /* default algorithm */
2027: PetscBool flg = PETSC_FALSE;
2028: const char *algTypes[2] = {"default","color"};
2029: PetscInt nalg = 2;
2032: /* Set default algorithm */
2033: PetscStrcmp(C->product->alg,"default",&flg);
2034: if (!flg) {
2035: alg = 1;
2036: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2037: }
2039: /* Get runtime option */
2040: if (product->api_user) {
2041: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2042: PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2043: PetscOptionsEnd();
2044: } else {
2045: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2046: PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
2047: PetscOptionsEnd();
2048: }
2049: if (flg) {
2050: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2051: }
2053: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
2054: C->ops->productsymbolic = MatProductSymbolic_ABt;
2055: return(0);
2056: }
2058: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
2059: {
2061: Mat_Product *product = C->product;
2062: PetscBool flg = PETSC_FALSE;
2063: PetscInt alg = 0; /* default algorithm -- alg=1 should be default!!! */
2064: #if !defined(PETSC_HAVE_HYPRE)
2065: const char *algTypes[2] = {"scalable","rap"};
2066: PetscInt nalg = 2;
2067: #else
2068: const char *algTypes[3] = {"scalable","rap","hypre"};
2069: PetscInt nalg = 3;
2070: #endif
2073: /* Set default algorithm */
2074: PetscStrcmp(product->alg,"default",&flg);
2075: if (flg) {
2076: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2077: }
2079: /* Get runtime option */
2080: if (product->api_user) {
2081: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
2082: PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);
2083: PetscOptionsEnd();
2084: } else {
2085: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2086: PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);
2087: PetscOptionsEnd();
2088: }
2089: if (flg) {
2090: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2091: }
2093: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
2094: return(0);
2095: }
2097: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
2098: {
2100: Mat_Product *product = C->product;
2101: PetscBool flg = PETSC_FALSE;
2102: PetscInt alg = 0; /* default algorithm */
2103: const char *algTypes[3] = {"r*a*rt","r*art","coloring_rart"};
2104: PetscInt nalg = 3;
2107: /* Set default algorithm */
2108: PetscStrcmp(product->alg,"default",&flg);
2109: if (flg) {
2110: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2111: }
2113: /* Get runtime option */
2114: if (product->api_user) {
2115: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");
2116: PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);
2117: PetscOptionsEnd();
2118: } else {
2119: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");
2120: PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);
2121: PetscOptionsEnd();
2122: }
2123: if (flg) {
2124: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2125: }
2127: C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
2128: return(0);
2129: }
2131: /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
2132: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
2133: {
2135: Mat_Product *product = C->product;
2136: PetscInt alg = 0; /* default algorithm */
2137: PetscBool flg = PETSC_FALSE;
2138: const char *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
2139: PetscInt nalg = 7;
2142: /* Set default algorithm */
2143: PetscStrcmp(product->alg,"default",&flg);
2144: if (flg) {
2145: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2146: }
2148: /* Get runtime option */
2149: if (product->api_user) {
2150: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");
2151: PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2152: PetscOptionsEnd();
2153: } else {
2154: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");
2155: PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);
2156: PetscOptionsEnd();
2157: }
2158: if (flg) {
2159: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2160: }
2162: C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
2163: C->ops->productsymbolic = MatProductSymbolic_ABC;
2164: return(0);
2165: }
2167: PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
2168: {
2170: Mat_Product *product = C->product;
2173: switch (product->type) {
2174: case MATPRODUCT_AB:
2175: MatProductSetFromOptions_SeqAIJ_AB(C);
2176: break;
2177: case MATPRODUCT_AtB:
2178: MatProductSetFromOptions_SeqAIJ_AtB(C);
2179: break;
2180: case MATPRODUCT_ABt:
2181: MatProductSetFromOptions_SeqAIJ_ABt(C);
2182: break;
2183: case MATPRODUCT_PtAP:
2184: MatProductSetFromOptions_SeqAIJ_PtAP(C);
2185: break;
2186: case MATPRODUCT_RARt:
2187: MatProductSetFromOptions_SeqAIJ_RARt(C);
2188: break;
2189: case MATPRODUCT_ABC:
2190: MatProductSetFromOptions_SeqAIJ_ABC(C);
2191: break;
2192: default:
2193: break;
2194: }
2195: return(0);
2196: }