Actual source code: matmatmult.c

petsc-3.13.6 2020-09-29
Report Typos and Errors

  2: /*
  3:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  4:           C = A * B
  5: */

  7:  #include <../src/mat/impls/aij/seq/aij.h>
  8:  #include <../src/mat/utils/freespace.h>
  9:  #include <petscbt.h>
 10:  #include <petsc/private/isimpl.h>
 11:  #include <../src/mat/impls/dense/seq/dense.h>

 13: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
 14: {

 18:   if (C->ops->matmultnumeric) {
 19:     (*C->ops->matmultnumeric)(A,B,C);
 20:   } else {
 21:     MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(A,B,C);
 22:   }
 23:   return(0);
 24: }

 26: /* Modified from MatCreateSeqAIJWithArrays() */
 27: PETSC_INTERN PetscErrorCode MatSetSeqAIJWithArrays_private(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],MatType mtype,Mat mat)
 28: {
 30:   PetscInt       ii;
 31:   Mat_SeqAIJ     *aij;
 32:   PetscBool      isseqaij;

 35:   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
 36:   MatSetSizes(mat,m,n,m,n);

 38:   if (!mtype) {
 39:     PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQAIJ,&isseqaij);
 40:     if (!isseqaij) { MatSetType(mat,MATSEQAIJ); }
 41:   } else {
 42:     MatSetType(mat,mtype);
 43:   }
 44:   MatSeqAIJSetPreallocation_SeqAIJ(mat,MAT_SKIP_ALLOCATION,0);
 45:   aij  = (Mat_SeqAIJ*)(mat)->data;
 46:   PetscMalloc1(m,&aij->imax);
 47:   PetscMalloc1(m,&aij->ilen);

 49:   aij->i            = i;
 50:   aij->j            = j;
 51:   aij->a            = a;
 52:   aij->singlemalloc = PETSC_FALSE;
 53:   aij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
 54:   aij->free_a       = PETSC_FALSE;
 55:   aij->free_ij      = PETSC_FALSE;

 57:   for (ii=0; ii<m; ii++) {
 58:     aij->ilen[ii] = aij->imax[ii] = i[ii+1] - i[ii];
 59:   }

 61:   return(0);
 62: }

 64: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
 65: {
 66:   PetscErrorCode      ierr;
 67:   Mat_Product         *product = C->product;
 68:   MatProductAlgorithm alg;
 69:   PetscBool           flg;

 72:   if (product) {
 73:     alg = product->alg;
 74:   } else {
 75:     alg = "sorted";
 76:   }
 77:   /* sorted */
 78:   PetscStrcmp(alg,"sorted",&flg);
 79:   if (flg) {
 80:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(A,B,fill,C);
 81:     return(0);
 82:   }

 84:   /* scalable */
 85:   PetscStrcmp(alg,"scalable",&flg);
 86:   if (flg) {
 87:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(A,B,fill,C);
 88:     return(0);
 89:   }

 91:   /* scalable_fast */
 92:   PetscStrcmp(alg,"scalable_fast",&flg);
 93:   if (flg) {
 94:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(A,B,fill,C);
 95:     return(0);
 96:   }

 98:   /* heap */
 99:   PetscStrcmp(alg,"heap",&flg);
100:   if (flg) {
101:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);
102:     return(0);
103:   }

105:   /* btheap */
106:   PetscStrcmp(alg,"btheap",&flg);
107:   if (flg) {
108:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);
109:     return(0);
110:   }

112:   /* llcondensed */
113:   PetscStrcmp(alg,"llcondensed",&flg);
114:   if (flg) {
115:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);
116:     return(0);
117:   }

119:   /* rowmerge */
120:   PetscStrcmp(alg,"rowmerge",&flg);
121:   if (flg) {
122:     MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(A,B,fill,C);
123:     return(0);
124:   }

126: #if defined(PETSC_HAVE_HYPRE)
127:   PetscStrcmp(alg,"hypre",&flg);
128:   if (flg) {
129:     MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);
130:     return(0);
131:   }
132: #endif

134:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
135:   return(0);
136: }

138: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat C)
139: {
140:   PetscErrorCode     ierr;
141:   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
142:   PetscInt           *ai=a->i,*bi=b->i,*ci,*cj;
143:   PetscInt           am =A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
144:   PetscReal          afill;
145:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
146:   PetscTable         ta;
147:   PetscBT            lnkbt;
148:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

151:   /* Get ci and cj */
152:   /*---------------*/
153:   /* Allocate ci array, arrays for fill computation and */
154:   /* free space for accumulating nonzero column info */
155:   PetscMalloc1(am+2,&ci);
156:   ci[0] = 0;

158:   /* create and initialize a linked list */
159:   PetscTableCreate(bn,bn,&ta);
160:   MatRowMergeMax_SeqAIJ(b,bm,ta);
161:   PetscTableGetCount(ta,&Crmax);
162:   PetscTableDestroy(&ta);

164:   PetscLLCondensedCreate(Crmax,bn,&lnk,&lnkbt);

166:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
167:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);

169:   current_space = free_space;

171:   /* Determine ci and cj */
172:   for (i=0; i<am; i++) {
173:     anzi = ai[i+1] - ai[i];
174:     aj   = a->j + ai[i];
175:     for (j=0; j<anzi; j++) {
176:       brow = aj[j];
177:       bnzj = bi[brow+1] - bi[brow];
178:       bj   = b->j + bi[brow];
179:       /* add non-zero cols of B into the sorted linked list lnk */
180:       PetscLLCondensedAddSorted(bnzj,bj,lnk,lnkbt);
181:     }
182:     cnzi = lnk[0];

184:     /* If free space is not available, make more free space */
185:     /* Double the amount of total space in the list */
186:     if (current_space->local_remaining<cnzi) {
187:       PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
188:       ndouble++;
189:     }

191:     /* Copy data into free space, then initialize lnk */
192:     PetscLLCondensedClean(bn,cnzi,current_space->array,lnk,lnkbt);

194:     current_space->array           += cnzi;
195:     current_space->local_used      += cnzi;
196:     current_space->local_remaining -= cnzi;

198:     ci[i+1] = ci[i] + cnzi;
199:   }

201:   /* Column indices are in the list of free space */
202:   /* Allocate space for cj, initialize cj, and */
203:   /* destroy list of free space and other temporary array(s) */
204:   PetscMalloc1(ci[am]+1,&cj);
205:   PetscFreeSpaceContiguous(&free_space,cj);
206:   PetscLLCondensedDestroy(lnk,lnkbt);

208:   /* put together the new symbolic matrix */
209:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
210:   MatSetBlockSizesFromMats(C,A,B);

212:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
213:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
214:   c                         = (Mat_SeqAIJ*)(C->data);
215:   c->free_a                 = PETSC_FALSE;
216:   c->free_ij                = PETSC_TRUE;
217:   c->nonew                  = 0;

219:   /* fast, needs non-scalable O(bn) array 'abdense' */
220:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

222:   /* set MatInfo */
223:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
224:   if (afill < 1.0) afill = 1.0;
225:   c->maxnz                     = ci[am];
226:   c->nz                        = ci[am];
227:   C->info.mallocs           = ndouble;
228:   C->info.fill_ratio_given  = fill;
229:   C->info.fill_ratio_needed = afill;

231: #if defined(PETSC_USE_INFO)
232:   if (ci[am]) {
233:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
234:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
235:   } else {
236:     PetscInfo(C,"Empty matrix product\n");
237:   }
238: #endif
239:   return(0);
240: }

242: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,Mat C)
243: {
245:   PetscLogDouble flops=0.0;
246:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
247:   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
248:   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
249:   PetscInt       *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
250:   PetscInt       am   =A->rmap->n,cm=C->rmap->n;
251:   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
252:   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca,valtmp;
253:   PetscScalar    *ab_dense;

256:   if (!c->a) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
257:     PetscMalloc1(ci[cm]+1,&ca);
258:     c->a      = ca;
259:     c->free_a = PETSC_TRUE;
260:   } else {
261:     ca        = c->a;
262:   }
263:   if (!c->matmult_abdense) {
264:     PetscCalloc1(B->cmap->N,&ab_dense);
265:     c->matmult_abdense = ab_dense;
266:   } else {
267:     ab_dense = c->matmult_abdense;
268:   }

270:   /* clean old values in C */
271:   PetscArrayzero(ca,ci[cm]);
272:   /* Traverse A row-wise. */
273:   /* Build the ith row in C by summing over nonzero columns in A, */
274:   /* the rows of B corresponding to nonzeros of A. */
275:   for (i=0; i<am; i++) {
276:     anzi = ai[i+1] - ai[i];
277:     for (j=0; j<anzi; j++) {
278:       brow = aj[j];
279:       bnzi = bi[brow+1] - bi[brow];
280:       bjj  = bj + bi[brow];
281:       baj  = ba + bi[brow];
282:       /* perform dense axpy */
283:       valtmp = aa[j];
284:       for (k=0; k<bnzi; k++) {
285:         ab_dense[bjj[k]] += valtmp*baj[k];
286:       }
287:       flops += 2*bnzi;
288:     }
289:     aj += anzi; aa += anzi;

291:     cnzi = ci[i+1] - ci[i];
292:     for (k=0; k<cnzi; k++) {
293:       ca[k]          += ab_dense[cj[k]];
294:       ab_dense[cj[k]] = 0.0; /* zero ab_dense */
295:     }
296:     flops += cnzi;
297:     cj    += cnzi; ca += cnzi;
298:   }
299:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
300:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
301:   PetscLogFlops(flops);
302:   return(0);
303: }

305: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,Mat C)
306: {
308:   PetscLogDouble flops=0.0;
309:   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)A->data;
310:   Mat_SeqAIJ     *b   = (Mat_SeqAIJ*)B->data;
311:   Mat_SeqAIJ     *c   = (Mat_SeqAIJ*)C->data;
312:   PetscInt       *ai  = a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
313:   PetscInt       am   = A->rmap->N,cm=C->rmap->N;
314:   PetscInt       i,j,k,anzi,bnzi,cnzi,brow;
315:   PetscScalar    *aa=a->a,*ba=b->a,*baj,*ca=c->a,valtmp;
316:   PetscInt       nextb;

319:   if (!ca) { /* first call of MatMatMultNumeric_SeqAIJ_SeqAIJ, allocate ca and matmult_abdense */
320:     PetscMalloc1(ci[cm]+1,&ca);
321:     c->a      = ca;
322:     c->free_a = PETSC_TRUE;
323:   }

325:   /* clean old values in C */
326:   PetscArrayzero(ca,ci[cm]);
327:   /* Traverse A row-wise. */
328:   /* Build the ith row in C by summing over nonzero columns in A, */
329:   /* the rows of B corresponding to nonzeros of A. */
330:   for (i=0; i<am; i++) {
331:     anzi = ai[i+1] - ai[i];
332:     cnzi = ci[i+1] - ci[i];
333:     for (j=0; j<anzi; j++) {
334:       brow = aj[j];
335:       bnzi = bi[brow+1] - bi[brow];
336:       bjj  = bj + bi[brow];
337:       baj  = ba + bi[brow];
338:       /* perform sparse axpy */
339:       valtmp = aa[j];
340:       nextb  = 0;
341:       for (k=0; nextb<bnzi; k++) {
342:         if (cj[k] == bjj[nextb]) { /* ccol == bcol */
343:           ca[k] += valtmp*baj[nextb++];
344:         }
345:       }
346:       flops += 2*bnzi;
347:     }
348:     aj += anzi; aa += anzi;
349:     cj += cnzi; ca += cnzi;
350:   }

352:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
353:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
354:   PetscLogFlops(flops);
355:   return(0);
356: }

358: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable_fast(Mat A,Mat B,PetscReal fill,Mat C)
359: {
360:   PetscErrorCode     ierr;
361:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
362:   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
363:   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
364:   MatScalar          *ca;
365:   PetscReal          afill;
366:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
367:   PetscTable         ta;
368:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

371:   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_fast() */
372:   /*-----------------------------------------------------------------------------------------*/
373:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
374:   PetscMalloc1(am+2,&ci);
375:   ci[0] = 0;

377:   /* create and initialize a linked list */
378:   PetscTableCreate(bn,bn,&ta);
379:   MatRowMergeMax_SeqAIJ(b,bm,ta);
380:   PetscTableGetCount(ta,&Crmax);
381:   PetscTableDestroy(&ta);

383:   PetscLLCondensedCreate_fast(Crmax,&lnk);

385:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
386:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
387:   current_space = free_space;

389:   /* Determine ci and cj */
390:   for (i=0; i<am; i++) {
391:     anzi = ai[i+1] - ai[i];
392:     aj   = a->j + ai[i];
393:     for (j=0; j<anzi; j++) {
394:       brow = aj[j];
395:       bnzj = bi[brow+1] - bi[brow];
396:       bj   = b->j + bi[brow];
397:       /* add non-zero cols of B into the sorted linked list lnk */
398:       PetscLLCondensedAddSorted_fast(bnzj,bj,lnk);
399:     }
400:     cnzi = lnk[1];

402:     /* If free space is not available, make more free space */
403:     /* Double the amount of total space in the list */
404:     if (current_space->local_remaining<cnzi) {
405:       PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
406:       ndouble++;
407:     }

409:     /* Copy data into free space, then initialize lnk */
410:     PetscLLCondensedClean_fast(cnzi,current_space->array,lnk);

412:     current_space->array           += cnzi;
413:     current_space->local_used      += cnzi;
414:     current_space->local_remaining -= cnzi;

416:     ci[i+1] = ci[i] + cnzi;
417:   }

419:   /* Column indices are in the list of free space */
420:   /* Allocate space for cj, initialize cj, and */
421:   /* destroy list of free space and other temporary array(s) */
422:   PetscMalloc1(ci[am]+1,&cj);
423:   PetscFreeSpaceContiguous(&free_space,cj);
424:   PetscLLCondensedDestroy_fast(lnk);

426:   /* Allocate space for ca */
427:   PetscCalloc1(ci[am]+1,&ca);

429:   /* put together the new symbolic matrix */
430:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);
431:   MatSetBlockSizesFromMats(C,A,B);

433:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
434:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
435:   c          = (Mat_SeqAIJ*)(C->data);
436:   c->free_a  = PETSC_TRUE;
437:   c->free_ij = PETSC_TRUE;
438:   c->nonew   = 0;

440:   /* slower, less memory */
441:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;

443:   /* set MatInfo */
444:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
445:   if (afill < 1.0) afill = 1.0;
446:   c->maxnz                     = ci[am];
447:   c->nz                        = ci[am];
448:   C->info.mallocs           = ndouble;
449:   C->info.fill_ratio_given  = fill;
450:   C->info.fill_ratio_needed = afill;

452: #if defined(PETSC_USE_INFO)
453:   if (ci[am]) {
454:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
455:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
456:   } else {
457:     PetscInfo(C,"Empty matrix product\n");
458:   }
459: #endif
460:   return(0);
461: }

463: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Scalable(Mat A,Mat B,PetscReal fill,Mat C)
464: {
465:   PetscErrorCode     ierr;
466:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
467:   PetscInt           *ai = a->i,*bi=b->i,*ci,*cj;
468:   PetscInt           am  = A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
469:   MatScalar          *ca;
470:   PetscReal          afill;
471:   PetscInt           i,j,anzi,brow,bnzj,cnzi,*bj,*aj,*lnk,ndouble=0,Crmax;
472:   PetscTable         ta;
473:   PetscFreeSpaceList free_space=NULL,current_space=NULL;

476:   /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
477:   /*---------------------------------------------------------------------------------------------*/
478:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
479:   PetscMalloc1(am+2,&ci);
480:   ci[0] = 0;

482:   /* create and initialize a linked list */
483:   PetscTableCreate(bn,bn,&ta);
484:   MatRowMergeMax_SeqAIJ(b,bm,ta);
485:   PetscTableGetCount(ta,&Crmax);
486:   PetscTableDestroy(&ta);
487:   PetscLLCondensedCreate_Scalable(Crmax,&lnk);

489:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
490:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
491:   current_space = free_space;

493:   /* Determine ci and cj */
494:   for (i=0; i<am; i++) {
495:     anzi = ai[i+1] - ai[i];
496:     aj   = a->j + ai[i];
497:     for (j=0; j<anzi; j++) {
498:       brow = aj[j];
499:       bnzj = bi[brow+1] - bi[brow];
500:       bj   = b->j + bi[brow];
501:       /* add non-zero cols of B into the sorted linked list lnk */
502:       PetscLLCondensedAddSorted_Scalable(bnzj,bj,lnk);
503:     }
504:     cnzi = lnk[0];

506:     /* If free space is not available, make more free space */
507:     /* Double the amount of total space in the list */
508:     if (current_space->local_remaining<cnzi) {
509:       PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
510:       ndouble++;
511:     }

513:     /* Copy data into free space, then initialize lnk */
514:     PetscLLCondensedClean_Scalable(cnzi,current_space->array,lnk);

516:     current_space->array           += cnzi;
517:     current_space->local_used      += cnzi;
518:     current_space->local_remaining -= cnzi;

520:     ci[i+1] = ci[i] + cnzi;
521:   }

523:   /* Column indices are in the list of free space */
524:   /* Allocate space for cj, initialize cj, and */
525:   /* destroy list of free space and other temporary array(s) */
526:   PetscMalloc1(ci[am]+1,&cj);
527:   PetscFreeSpaceContiguous(&free_space,cj);
528:   PetscLLCondensedDestroy_Scalable(lnk);

530:   /* Allocate space for ca */
531:   /*-----------------------*/
532:   PetscCalloc1(ci[am]+1,&ca);

534:   /* put together the new symbolic matrix */
535:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,ca,((PetscObject)A)->type_name,C);
536:   MatSetBlockSizesFromMats(C,A,B);

538:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
539:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
540:   c          = (Mat_SeqAIJ*)(C->data);
541:   c->free_a  = PETSC_TRUE;
542:   c->free_ij = PETSC_TRUE;
543:   c->nonew   = 0;

545:   /* slower, less memory */
546:   C->ops->matmultnumeric    = MatMatMultNumeric_SeqAIJ_SeqAIJ_Scalable;

548:   /* set MatInfo */
549:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
550:   if (afill < 1.0) afill = 1.0;
551:   c->maxnz                     = ci[am];
552:   c->nz                        = ci[am];
553:   C->info.mallocs           = ndouble;
554:   C->info.fill_ratio_given  = fill;
555:   C->info.fill_ratio_needed = afill;

557: #if defined(PETSC_USE_INFO)
558:   if (ci[am]) {
559:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
560:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
561:   } else {
562:     PetscInfo(C,"Empty matrix product\n");
563:   }
564: #endif
565:   return(0);
566: }

568: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(Mat A,Mat B,PetscReal fill,Mat C)
569: {
570:   PetscErrorCode     ierr;
571:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
572:   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j;
573:   PetscInt           *ci,*cj,*bb;
574:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
575:   PetscReal          afill;
576:   PetscInt           i,j,col,ndouble = 0;
577:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
578:   PetscHeap          h;

581:   /* Get ci and cj - by merging sorted rows using a heap */
582:   /*---------------------------------------------------------------------------------------------*/
583:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
584:   PetscMalloc1(am+2,&ci);
585:   ci[0] = 0;

587:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
588:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);
589:   current_space = free_space;

591:   PetscHeapCreate(a->rmax,&h);
592:   PetscMalloc1(a->rmax,&bb);

594:   /* Determine ci and cj */
595:   for (i=0; i<am; i++) {
596:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
597:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
598:     ci[i+1] = ci[i];
599:     /* Populate the min heap */
600:     for (j=0; j<anzi; j++) {
601:       bb[j] = bi[acol[j]];         /* bb points at the start of the row */
602:       if (bb[j] < bi[acol[j]+1]) { /* Add if row is nonempty */
603:         PetscHeapAdd(h,j,bj[bb[j]++]);
604:       }
605:     }
606:     /* Pick off the min element, adding it to free space */
607:     PetscHeapPop(h,&j,&col);
608:     while (j >= 0) {
609:       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
610:         PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);
611:         ndouble++;
612:       }
613:       *(current_space->array++) = col;
614:       current_space->local_used++;
615:       current_space->local_remaining--;
616:       ci[i+1]++;

618:       /* stash if anything else remains in this row of B */
619:       if (bb[j] < bi[acol[j]+1]) {PetscHeapStash(h,j,bj[bb[j]++]);}
620:       while (1) {               /* pop and stash any other rows of B that also had an entry in this column */
621:         PetscInt j2,col2;
622:         PetscHeapPeek(h,&j2,&col2);
623:         if (col2 != col) break;
624:         PetscHeapPop(h,&j2,&col2);
625:         if (bb[j2] < bi[acol[j2]+1]) {PetscHeapStash(h,j2,bj[bb[j2]++]);}
626:       }
627:       /* Put any stashed elements back into the min heap */
628:       PetscHeapUnstash(h);
629:       PetscHeapPop(h,&j,&col);
630:     }
631:   }
632:   PetscFree(bb);
633:   PetscHeapDestroy(&h);

635:   /* Column indices are in the list of free space */
636:   /* Allocate space for cj, initialize cj, and */
637:   /* destroy list of free space and other temporary array(s) */
638:   PetscMalloc1(ci[am],&cj);
639:   PetscFreeSpaceContiguous(&free_space,cj);

641:   /* put together the new symbolic matrix */
642:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
643:   MatSetBlockSizesFromMats(C,A,B);

645:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
646:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
647:   c          = (Mat_SeqAIJ*)(C->data);
648:   c->free_a  = PETSC_TRUE;
649:   c->free_ij = PETSC_TRUE;
650:   c->nonew   = 0;

652:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

654:   /* set MatInfo */
655:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
656:   if (afill < 1.0) afill = 1.0;
657:   c->maxnz                     = ci[am];
658:   c->nz                        = ci[am];
659:   C->info.mallocs           = ndouble;
660:   C->info.fill_ratio_given  = fill;
661:   C->info.fill_ratio_needed = afill;

663: #if defined(PETSC_USE_INFO)
664:   if (ci[am]) {
665:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
666:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
667:   } else {
668:     PetscInfo(C,"Empty matrix product\n");
669:   }
670: #endif
671:   return(0);
672: }

674: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(Mat A,Mat B,PetscReal fill,Mat C)
675: {
676:   PetscErrorCode     ierr;
677:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
678:   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
679:   PetscInt           *ci,*cj,*bb;
680:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
681:   PetscReal          afill;
682:   PetscInt           i,j,col,ndouble = 0;
683:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
684:   PetscHeap          h;
685:   PetscBT            bt;

688:   /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
689:   /*---------------------------------------------------------------------------------------------*/
690:   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
691:   PetscMalloc1(am+2,&ci);
692:   ci[0] = 0;

694:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
695:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(ai[am],bi[bm])),&free_space);

697:   current_space = free_space;

699:   PetscHeapCreate(a->rmax,&h);
700:   PetscMalloc1(a->rmax,&bb);
701:   PetscBTCreate(bn,&bt);

703:   /* Determine ci and cj */
704:   for (i=0; i<am; i++) {
705:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
706:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
707:     const PetscInt *fptr = current_space->array; /* Save beginning of the row so we can clear the BT later */
708:     ci[i+1] = ci[i];
709:     /* Populate the min heap */
710:     for (j=0; j<anzi; j++) {
711:       PetscInt brow = acol[j];
712:       for (bb[j] = bi[brow]; bb[j] < bi[brow+1]; bb[j]++) {
713:         PetscInt bcol = bj[bb[j]];
714:         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
715:           PetscHeapAdd(h,j,bcol);
716:           bb[j]++;
717:           break;
718:         }
719:       }
720:     }
721:     /* Pick off the min element, adding it to free space */
722:     PetscHeapPop(h,&j,&col);
723:     while (j >= 0) {
724:       if (current_space->local_remaining < 1) { /* double the size, but don't exceed 16 MiB */
725:         fptr = NULL;                      /* need PetscBTMemzero */
726:         PetscFreeSpaceGet(PetscMin(PetscIntMultTruncate(2,current_space->total_array_size),16 << 20),&current_space);
727:         ndouble++;
728:       }
729:       *(current_space->array++) = col;
730:       current_space->local_used++;
731:       current_space->local_remaining--;
732:       ci[i+1]++;

734:       /* stash if anything else remains in this row of B */
735:       for (; bb[j] < bi[acol[j]+1]; bb[j]++) {
736:         PetscInt bcol = bj[bb[j]];
737:         if (!PetscBTLookupSet(bt,bcol)) { /* new entry */
738:           PetscHeapAdd(h,j,bcol);
739:           bb[j]++;
740:           break;
741:         }
742:       }
743:       PetscHeapPop(h,&j,&col);
744:     }
745:     if (fptr) {                 /* Clear the bits for this row */
746:       for (; fptr<current_space->array; fptr++) {PetscBTClear(bt,*fptr);}
747:     } else {                    /* We reallocated so we don't remember (easily) how to clear only the bits we changed */
748:       PetscBTMemzero(bn,bt);
749:     }
750:   }
751:   PetscFree(bb);
752:   PetscHeapDestroy(&h);
753:   PetscBTDestroy(&bt);

755:   /* Column indices are in the list of free space */
756:   /* Allocate space for cj, initialize cj, and */
757:   /* destroy list of free space and other temporary array(s) */
758:   PetscMalloc1(ci[am],&cj);
759:   PetscFreeSpaceContiguous(&free_space,cj);

761:   /* put together the new symbolic matrix */
762:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
763:   MatSetBlockSizesFromMats(C,A,B);

765:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
766:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
767:   c          = (Mat_SeqAIJ*)(C->data);
768:   c->free_a  = PETSC_TRUE;
769:   c->free_ij = PETSC_TRUE;
770:   c->nonew   = 0;

772:   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

774:   /* set MatInfo */
775:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
776:   if (afill < 1.0) afill = 1.0;
777:   c->maxnz                     = ci[am];
778:   c->nz                        = ci[am];
779:   C->info.mallocs           = ndouble;
780:   C->info.fill_ratio_given  = fill;
781:   C->info.fill_ratio_needed = afill;

783: #if defined(PETSC_USE_INFO)
784:   if (ci[am]) {
785:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
786:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
787:   } else {
788:     PetscInfo(C,"Empty matrix product\n");
789:   }
790: #endif
791:   return(0);
792: }


795: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_RowMerge(Mat A,Mat B,PetscReal fill,Mat C)
796: {
797:   PetscErrorCode     ierr;
798:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
799:   const PetscInt     *ai=a->i,*bi=b->i,*aj=a->j,*bj=b->j,*inputi,*inputj,*inputcol,*inputcol_L1;
800:   PetscInt           *ci,*cj,*outputj,worki_L1[9],worki_L2[9];
801:   PetscInt           c_maxmem,a_maxrownnz=0,a_rownnz;
802:   const PetscInt     workcol[8]={0,1,2,3,4,5,6,7};
803:   const PetscInt     am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
804:   const PetscInt     *brow_ptr[8],*brow_end[8];
805:   PetscInt           window[8];
806:   PetscInt           window_min,old_window_min,ci_nnz,outputi_nnz=0,L1_nrows,L2_nrows;
807:   PetscInt           i,k,ndouble=0,L1_rowsleft,rowsleft;
808:   PetscReal          afill;
809:   PetscInt           *workj_L1,*workj_L2,*workj_L3;
810:   PetscInt           L1_nnz,L2_nnz;

812:   /* Step 1: Get upper bound on memory required for allocation.
813:              Because of the way virtual memory works,
814:              only the memory pages that are actually needed will be physically allocated. */
816:   PetscMalloc1(am+1,&ci);
817:   for (i=0; i<am; i++) {
818:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
819:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
820:     a_rownnz = 0;
821:     for (k=0; k<anzi; ++k) {
822:       a_rownnz += bi[acol[k]+1] - bi[acol[k]];
823:       if (a_rownnz > bn) {
824:         a_rownnz = bn;
825:         break;
826:       }
827:     }
828:     a_maxrownnz = PetscMax(a_maxrownnz, a_rownnz);
829:   }
830:   /* temporary work areas for merging rows */
831:   PetscMalloc1(a_maxrownnz*8,&workj_L1);
832:   PetscMalloc1(a_maxrownnz*8,&workj_L2);
833:   PetscMalloc1(a_maxrownnz,&workj_L3);

835:   /* This should be enough for almost all matrices. If not, memory is reallocated later. */
836:   c_maxmem = 8*(ai[am]+bi[bm]);
837:   /* Step 2: Populate pattern for C */
838:   PetscMalloc1(c_maxmem,&cj);

840:   ci_nnz       = 0;
841:   ci[0]        = 0;
842:   worki_L1[0]  = 0;
843:   worki_L2[0]  = 0;
844:   for (i=0; i<am; i++) {
845:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
846:     const PetscInt *acol = aj + ai[i];      /* column indices of nonzero entries in this row */
847:     rowsleft             = anzi;
848:     inputcol_L1          = acol;
849:     L2_nnz               = 0;
850:     L2_nrows             = 1;  /* Number of rows to be merged on Level 3. output of L3 already exists -> initial value 1   */
851:     worki_L2[1]          = 0;
852:     outputi_nnz          = 0;

854:     /* If the number of indices in C so far + the max number of columns in the next row > c_maxmem  -> allocate more memory */
855:     while (ci_nnz+a_maxrownnz > c_maxmem) {
856:       c_maxmem *= 2;
857:       ndouble++;
858:       PetscRealloc(sizeof(PetscInt)*c_maxmem,&cj);
859:     }

861:     while (rowsleft) {
862:       L1_rowsleft = PetscMin(64, rowsleft); /* In the inner loop max 64 rows of B can be merged */
863:       L1_nrows    = 0;
864:       L1_nnz      = 0;
865:       inputcol    = inputcol_L1;
866:       inputi      = bi;
867:       inputj      = bj;

869:       /* The following macro is used to specialize for small rows in A.
870:          This helps with compiler unrolling, improving performance substantially.
871:           Input:  inputj   inputi  inputcol  bn
872:           Output: outputj  outputi_nnz                       */
873:        #define MatMatMultSymbolic_RowMergeMacro(ANNZ)                        \
874:          window_min  = bn;                                                   \
875:          outputi_nnz = 0;                                                    \
876:          for (k=0; k<ANNZ; ++k) {                                            \
877:            brow_ptr[k] = inputj + inputi[inputcol[k]];                       \
878:            brow_end[k] = inputj + inputi[inputcol[k]+1];                     \
879:            window[k]   = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn;   \
880:            window_min  = PetscMin(window[k], window_min);                    \
881:          }                                                                   \
882:          while (window_min < bn) {                                           \
883:            outputj[outputi_nnz++] = window_min;                              \
884:            /* advance front and compute new minimum */                       \
885:            old_window_min = window_min;                                      \
886:            window_min = bn;                                                  \
887:            for (k=0; k<ANNZ; ++k) {                                          \
888:              if (window[k] == old_window_min) {                              \
889:                brow_ptr[k]++;                                                \
890:                window[k] = (brow_ptr[k] != brow_end[k]) ? *brow_ptr[k] : bn; \
891:              }                                                               \
892:              window_min = PetscMin(window[k], window_min);                   \
893:            }                                                                 \
894:          }

896:       /************** L E V E L  1 ***************/
897:       /* Merge up to 8 rows of B to L1 work array*/
898:       while (L1_rowsleft) {
899:         outputi_nnz = 0;
900:         if (anzi > 8)  outputj = workj_L1 + L1_nnz;     /* Level 1 rowmerge*/
901:         else           outputj = cj + ci_nnz;           /* Merge directly to C */

903:         switch (L1_rowsleft) {
904:         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
905:                  brow_end[0] = inputj + inputi[inputcol[0]+1];
906:                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
907:                  inputcol    += L1_rowsleft;
908:                  rowsleft    -= L1_rowsleft;
909:                  L1_rowsleft  = 0;
910:                  break;
911:         case 2:  MatMatMultSymbolic_RowMergeMacro(2);
912:                  inputcol    += L1_rowsleft;
913:                  rowsleft    -= L1_rowsleft;
914:                  L1_rowsleft  = 0;
915:                  break;
916:         case 3: MatMatMultSymbolic_RowMergeMacro(3);
917:                  inputcol    += L1_rowsleft;
918:                  rowsleft    -= L1_rowsleft;
919:                  L1_rowsleft  = 0;
920:                  break;
921:         case 4:  MatMatMultSymbolic_RowMergeMacro(4);
922:                  inputcol    += L1_rowsleft;
923:                  rowsleft    -= L1_rowsleft;
924:                  L1_rowsleft  = 0;
925:                  break;
926:         case 5:  MatMatMultSymbolic_RowMergeMacro(5);
927:                  inputcol    += L1_rowsleft;
928:                  rowsleft    -= L1_rowsleft;
929:                  L1_rowsleft  = 0;
930:                  break;
931:         case 6:  MatMatMultSymbolic_RowMergeMacro(6);
932:                  inputcol    += L1_rowsleft;
933:                  rowsleft    -= L1_rowsleft;
934:                  L1_rowsleft  = 0;
935:                  break;
936:         case 7:  MatMatMultSymbolic_RowMergeMacro(7);
937:                  inputcol    += L1_rowsleft;
938:                  rowsleft    -= L1_rowsleft;
939:                  L1_rowsleft  = 0;
940:                  break;
941:         default: MatMatMultSymbolic_RowMergeMacro(8);
942:                  inputcol    += 8;
943:                  rowsleft    -= 8;
944:                  L1_rowsleft -= 8;
945:                  break;
946:         }
947:         inputcol_L1           = inputcol;
948:         L1_nnz               += outputi_nnz;
949:         worki_L1[++L1_nrows]  = L1_nnz;
950:       }

952:       /********************** L E V E L  2 ************************/
953:       /* Merge from L1 work array to either C or to L2 work array */
954:       if (anzi > 8) {
955:         inputi      = worki_L1;
956:         inputj      = workj_L1;
957:         inputcol    = workcol;
958:         outputi_nnz = 0;

960:         if (anzi <= 64) outputj = cj + ci_nnz;        /* Merge from L1 work array to C */
961:         else            outputj = workj_L2 + L2_nnz;  /* Merge from L1 work array to L2 work array */

963:         switch (L1_nrows) {
964:         case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
965:                  brow_end[0] = inputj + inputi[inputcol[0]+1];
966:                  for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
967:                  break;
968:         case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
969:         case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
970:         case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
971:         case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
972:         case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
973:         case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
974:         case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
975:         default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L1 work array!");
976:         }
977:         L2_nnz               += outputi_nnz;
978:         worki_L2[++L2_nrows]  = L2_nnz;

980:         /************************ L E V E L  3 **********************/
981:         /* Merge from L2 work array to either C or to L2 work array */
982:         if (anzi > 64 && (L2_nrows == 8 || rowsleft == 0)) {
983:           inputi      = worki_L2;
984:           inputj      = workj_L2;
985:           inputcol    = workcol;
986:           outputi_nnz = 0;
987:           if (rowsleft) outputj = workj_L3;
988:           else          outputj = cj + ci_nnz;
989:           switch (L2_nrows) {
990:           case 1:  brow_ptr[0] = inputj + inputi[inputcol[0]];
991:                    brow_end[0] = inputj + inputi[inputcol[0]+1];
992:                    for (; brow_ptr[0] != brow_end[0]; ++brow_ptr[0]) outputj[outputi_nnz++] = *brow_ptr[0]; /* copy row in b over */
993:                    break;
994:           case 2:  MatMatMultSymbolic_RowMergeMacro(2); break;
995:           case 3:  MatMatMultSymbolic_RowMergeMacro(3); break;
996:           case 4:  MatMatMultSymbolic_RowMergeMacro(4); break;
997:           case 5:  MatMatMultSymbolic_RowMergeMacro(5); break;
998:           case 6:  MatMatMultSymbolic_RowMergeMacro(6); break;
999:           case 7:  MatMatMultSymbolic_RowMergeMacro(7); break;
1000:           case 8:  MatMatMultSymbolic_RowMergeMacro(8); break;
1001:           default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatMult logic error: Not merging 1-8 rows from L2 work array!");
1002:           }
1003:           L2_nrows    = 1;
1004:           L2_nnz      = outputi_nnz;
1005:           worki_L2[1] = outputi_nnz;
1006:           /* Copy to workj_L2 */
1007:           if (rowsleft) {
1008:             for (k=0; k<outputi_nnz; ++k)  workj_L2[k] = outputj[k];
1009:           }
1010:         }
1011:       }
1012:     }  /* while (rowsleft) */
1013: #undef MatMatMultSymbolic_RowMergeMacro

1015:     /* terminate current row */
1016:     ci_nnz += outputi_nnz;
1017:     ci[i+1] = ci_nnz;
1018:   }

1020:   /* Step 3: Create the new symbolic matrix */
1021:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
1022:   MatSetBlockSizesFromMats(C,A,B);

1024:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1025:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1026:   c          = (Mat_SeqAIJ*)(C->data);
1027:   c->free_a  = PETSC_TRUE;
1028:   c->free_ij = PETSC_TRUE;
1029:   c->nonew   = 0;

1031:   C->ops->matmultnumeric        = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

1033:   /* set MatInfo */
1034:   afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
1035:   if (afill < 1.0) afill = 1.0;
1036:   c->maxnz                     = ci[am];
1037:   c->nz                        = ci[am];
1038:   C->info.mallocs           = ndouble;
1039:   C->info.fill_ratio_given  = fill;
1040:   C->info.fill_ratio_needed = afill;

1042: #if defined(PETSC_USE_INFO)
1043:   if (ci[am]) {
1044:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
1045:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
1046:   } else {
1047:     PetscInfo(C,"Empty matrix product\n");
1048:   }
1049: #endif

1051:   /* Step 4: Free temporary work areas */
1052:   PetscFree(workj_L1);
1053:   PetscFree(workj_L2);
1054:   PetscFree(workj_L3);
1055:   return(0);
1056: }

1058: /* concatenate unique entries and then sort */
1059: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_Sorted(Mat A,Mat B,PetscReal fill,Mat C)
1060: {
1061:   PetscErrorCode     ierr;
1062:   Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
1063:   const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
1064:   PetscInt           *ci,*cj;
1065:   PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
1066:   PetscReal          afill;
1067:   PetscInt           i,j,ndouble = 0;
1068:   PetscSegBuffer     seg,segrow;
1069:   char               *seen;

1072:   PetscMalloc1(am+1,&ci);
1073:   ci[0] = 0;

1075:   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
1076:   PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);
1077:   PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);
1078:   PetscCalloc1(bn,&seen);

1080:   /* Determine ci and cj */
1081:   for (i=0; i<am; i++) {
1082:     const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
1083:     const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
1084:     PetscInt packlen = 0,*PETSC_RESTRICT crow;
1085:     /* Pack segrow */
1086:     for (j=0; j<anzi; j++) {
1087:       PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
1088:       for (k=bjstart; k<bjend; k++) {
1089:         PetscInt bcol = bj[k];
1090:         if (!seen[bcol]) { /* new entry */
1091:           PetscInt *PETSC_RESTRICT slot;
1092:           PetscSegBufferGetInts(segrow,1,&slot);
1093:           *slot = bcol;
1094:           seen[bcol] = 1;
1095:           packlen++;
1096:         }
1097:       }
1098:     }
1099:     PetscSegBufferGetInts(seg,packlen,&crow);
1100:     PetscSegBufferExtractTo(segrow,crow);
1101:     PetscSortInt(packlen,crow);
1102:     ci[i+1] = ci[i] + packlen;
1103:     for (j=0; j<packlen; j++) seen[crow[j]] = 0;
1104:   }
1105:   PetscSegBufferDestroy(&segrow);
1106:   PetscFree(seen);

1108:   /* Column indices are in the segmented buffer */
1109:   PetscSegBufferExtractAlloc(seg,&cj);
1110:   PetscSegBufferDestroy(&seg);

1112:   /* put together the new symbolic matrix */
1113:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,((PetscObject)A)->type_name,C);
1114:   MatSetBlockSizesFromMats(C,A,B);

1116:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1117:   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
1118:   c          = (Mat_SeqAIJ*)(C->data);
1119:   c->free_a  = PETSC_TRUE;
1120:   c->free_ij = PETSC_TRUE;
1121:   c->nonew   = 0;

1123:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ_Sorted;

1125:   /* set MatInfo */
1126:   afill = (PetscReal)ci[am]/PetscMax(ai[am]+bi[bm],1) + 1.e-5;
1127:   if (afill < 1.0) afill = 1.0;
1128:   c->maxnz                  = ci[am];
1129:   c->nz                     = ci[am];
1130:   C->info.mallocs           = ndouble;
1131:   C->info.fill_ratio_given  = fill;
1132:   C->info.fill_ratio_needed = afill;

1134: #if defined(PETSC_USE_INFO)
1135:   if (ci[am]) {
1136:     PetscInfo3(C,"Reallocs %D; Fill ratio: given %g needed %g.\n",ndouble,(double)fill,(double)afill);
1137:     PetscInfo1(C,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill);
1138:   } else {
1139:     PetscInfo(C,"Empty matrix product\n");
1140:   }
1141: #endif
1142:   return(0);
1143: }

1145: PetscErrorCode MatDestroy_SeqAIJ_MatMatMultTrans(Mat A)
1146: {
1147:   PetscErrorCode      ierr;
1148:   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)A->data;
1149:   Mat_MatMatTransMult *abt=a->abt;

1152:   (abt->destroy)(A);
1153:   MatTransposeColoringDestroy(&abt->matcoloring);
1154:   MatDestroy(&abt->Bt_den);
1155:   MatDestroy(&abt->ABt_den);
1156:   PetscFree(abt);
1157:   return(0);
1158: }

1160: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1161: {
1162:   PetscErrorCode      ierr;
1163:   Mat                 Bt;
1164:   PetscInt            *bti,*btj;
1165:   Mat_MatMatTransMult *abt;
1166:   Mat_SeqAIJ          *c;
1167:   Mat_Product         *product = C->product;
1168:   MatProductAlgorithm alg = product->alg;

1171:   /* create symbolic Bt */
1172:   MatGetSymbolicTranspose_SeqAIJ(B,&bti,&btj);
1173:   MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,B->cmap->n,B->rmap->n,bti,btj,NULL,&Bt);
1174:   MatSetBlockSizes(Bt,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));
1175:   MatSetType(Bt,((PetscObject)A)->type_name);

1177:   /* get symbolic C=A*Bt */
1178:   MatProductSetAlgorithm(C,"sorted"); /* set algorithm for C = A*Bt */
1179:   MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,Bt,fill,C);
1180:   MatProductSetAlgorithm(C,alg); /* resume original algorithm for ABt product */

1182:   /* create a supporting struct for reuse intermidiate dense matrices with matcoloring */
1183:   PetscNew(&abt);
1184:   c      = (Mat_SeqAIJ*)C->data;
1185:   c->abt = abt;

1187:   abt->usecoloring = PETSC_FALSE;
1188:   abt->destroy     = C->ops->destroy;
1189:   C->ops->destroy  = MatDestroy_SeqAIJ_MatMatMultTrans;
1190:   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;

1192:   abt->usecoloring = PETSC_FALSE;
1193:   PetscStrcmp(product->alg,"color",&abt->usecoloring);
1194:   if (abt->usecoloring) {
1195:     /* Create MatTransposeColoring from symbolic C=A*B^T */
1196:     MatTransposeColoring matcoloring;
1197:     MatColoring          coloring;
1198:     ISColoring           iscoloring;
1199:     Mat                  Bt_dense,C_dense;

1201:     /* inode causes memory problem */
1202:     MatSetOption(C,MAT_USE_INODES,PETSC_FALSE);

1204:     MatColoringCreate(C,&coloring);
1205:     MatColoringSetDistance(coloring,2);
1206:     MatColoringSetType(coloring,MATCOLORINGSL);
1207:     MatColoringSetFromOptions(coloring);
1208:     MatColoringApply(coloring,&iscoloring);
1209:     MatColoringDestroy(&coloring);
1210:     MatTransposeColoringCreate(C,iscoloring,&matcoloring);

1212:     abt->matcoloring = matcoloring;

1214:     ISColoringDestroy(&iscoloring);

1216:     /* Create Bt_dense and C_dense = A*Bt_dense */
1217:     MatCreate(PETSC_COMM_SELF,&Bt_dense);
1218:     MatSetSizes(Bt_dense,A->cmap->n,matcoloring->ncolors,A->cmap->n,matcoloring->ncolors);
1219:     MatSetType(Bt_dense,MATSEQDENSE);
1220:     MatSeqDenseSetPreallocation(Bt_dense,NULL);

1222:     Bt_dense->assembled = PETSC_TRUE;
1223:     abt->Bt_den         = Bt_dense;

1225:     MatCreate(PETSC_COMM_SELF,&C_dense);
1226:     MatSetSizes(C_dense,A->rmap->n,matcoloring->ncolors,A->rmap->n,matcoloring->ncolors);
1227:     MatSetType(C_dense,MATSEQDENSE);
1228:     MatSeqDenseSetPreallocation(C_dense,NULL);

1230:     Bt_dense->assembled = PETSC_TRUE;
1231:     abt->ABt_den  = C_dense;

1233: #if defined(PETSC_USE_INFO)
1234:     {
1235:       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
1236:       PetscInfo7(C,"Use coloring of C=A*B^T; B^T: %D %D, Bt_dense: %D,%D; Cnz %D / (cm*ncolors %D) = %g\n",B->cmap->n,B->rmap->n,Bt_dense->rmap->n,Bt_dense->cmap->n,c->nz,A->rmap->n*matcoloring->ncolors,(PetscReal)(c->nz)/(A->rmap->n*matcoloring->ncolors));
1237:     }
1238: #endif
1239:   }
1240:   /* clean up */
1241:   MatDestroy(&Bt);
1242:   MatRestoreSymbolicTranspose_SeqAIJ(B,&bti,&btj);
1243:   return(0);
1244: }

1246: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1247: {
1248:   PetscErrorCode      ierr;
1249:   Mat_SeqAIJ          *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1250:   PetscInt            *ai  =a->i,*aj=a->j,*bi=b->i,*bj=b->j,anzi,bnzj,nexta,nextb,*acol,*bcol,brow;
1251:   PetscInt            cm   =C->rmap->n,*ci=c->i,*cj=c->j,i,j,cnzi,*ccol;
1252:   PetscLogDouble      flops=0.0;
1253:   MatScalar           *aa  =a->a,*aval,*ba=b->a,*bval,*ca,*cval;
1254:   Mat_MatMatTransMult *abt = c->abt;

1257:   /* clear old values in C */
1258:   if (!c->a) {
1259:     PetscCalloc1(ci[cm]+1,&ca);
1260:     c->a      = ca;
1261:     c->free_a = PETSC_TRUE;
1262:   } else {
1263:     ca =  c->a;
1264:     PetscArrayzero(ca,ci[cm]+1);
1265:   }

1267:   if (abt->usecoloring) {
1268:     MatTransposeColoring matcoloring = abt->matcoloring;
1269:     Mat                  Bt_dense,C_dense = abt->ABt_den;

1271:     /* Get Bt_dense by Apply MatTransposeColoring to B */
1272:     Bt_dense = abt->Bt_den;
1273:     MatTransColoringApplySpToDen(matcoloring,B,Bt_dense);

1275:     /* C_dense = A*Bt_dense */
1276:     MatMatMultNumeric_SeqAIJ_SeqDense(A,Bt_dense,C_dense);

1278:     /* Recover C from C_dense */
1279:     MatTransColoringApplyDenToSp(matcoloring,C_dense,C);
1280:     return(0);
1281:   }

1283:   for (i=0; i<cm; i++) {
1284:     anzi = ai[i+1] - ai[i];
1285:     acol = aj + ai[i];
1286:     aval = aa + ai[i];
1287:     cnzi = ci[i+1] - ci[i];
1288:     ccol = cj + ci[i];
1289:     cval = ca + ci[i];
1290:     for (j=0; j<cnzi; j++) {
1291:       brow = ccol[j];
1292:       bnzj = bi[brow+1] - bi[brow];
1293:       bcol = bj + bi[brow];
1294:       bval = ba + bi[brow];

1296:       /* perform sparse inner-product c(i,j)=A[i,:]*B[j,:]^T */
1297:       nexta = 0; nextb = 0;
1298:       while (nexta<anzi && nextb<bnzj) {
1299:         while (nexta < anzi && acol[nexta] < bcol[nextb]) nexta++;
1300:         if (nexta == anzi) break;
1301:         while (nextb < bnzj && acol[nexta] > bcol[nextb]) nextb++;
1302:         if (nextb == bnzj) break;
1303:         if (acol[nexta] == bcol[nextb]) {
1304:           cval[j] += aval[nexta]*bval[nextb];
1305:           nexta++; nextb++;
1306:           flops += 2;
1307:         }
1308:       }
1309:     }
1310:   }
1311:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1312:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1313:   PetscLogFlops(flops);
1314:   return(0);
1315: }

1317: PetscErrorCode MatDestroy_SeqAIJ_MatTransMatMult(Mat A)
1318: {
1319:   PetscErrorCode      ierr;
1320:   Mat_SeqAIJ          *a = (Mat_SeqAIJ*)A->data;
1321:   Mat_MatTransMatMult *atb = a->atb;

1324:   if (atb) {
1325:     MatDestroy(&atb->At);
1326:     (*atb->destroy)(A);
1327:   }
1328:   PetscFree(atb);
1329:   return(0);
1330: }

1332: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat C)
1333: {
1334:   PetscErrorCode      ierr;
1335:   Mat                 At;
1336:   PetscInt            *ati,*atj;
1337:   Mat_Product         *product = C->product;
1338:   MatProductAlgorithm alg;
1339:   PetscBool           flg;

1342:   if (product) {
1343:     alg = product->alg;
1344:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"!product, not supported yet");

1346:   /* outerproduct */
1347:   PetscStrcmp(alg,"outerproduct",&flg);
1348:   if (flg) {
1349:     /* create symbolic At */
1350:     MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);
1351:     MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->cmap->n,A->rmap->n,ati,atj,NULL,&At);
1352:     MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(B->cmap->bs));
1353:     MatSetType(At,((PetscObject)A)->type_name);

1355:     /* get symbolic C=At*B */
1356:     product->alg = "sorted";
1357:     MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);

1359:     /* clean up */
1360:     MatDestroy(&At);
1361:     MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);

1363:     C->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; /* outerproduct */
1364:     return(0);
1365:   }

1367:   /* matmatmult */
1368:   PetscStrcmp(alg,"at*b",&flg);
1369:   if (flg) {
1370:     Mat_MatTransMatMult *atb;
1371:     Mat_SeqAIJ          *c;

1373:     PetscNew(&atb);
1374:     MatTranspose_SeqAIJ(A,MAT_INITIAL_MATRIX,&At);
1375:     product->alg = "sorted";
1376:     MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);

1378:     c               = (Mat_SeqAIJ*)C->data;
1379:     c->atb          = atb;
1380:     atb->At         = At;
1381:     atb->destroy    = C->ops->destroy;
1382:     atb->updateAt   = PETSC_FALSE; /* because At is computed here */
1383:     C->ops->destroy = MatDestroy_SeqAIJ_MatTransMatMult;

1385:     C->ops->mattransposemultnumeric = NULL; /* see MatProductNumeric_AtB_SeqAIJ_SeqAIJ */
1386:     return(0);
1387:   }

1389:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
1390:   return(0);
1391: }

1393: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
1394: {
1396:   Mat_SeqAIJ     *a   =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
1397:   PetscInt       am   =A->rmap->n,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
1398:   PetscInt       cm   =C->rmap->n,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k;
1399:   PetscLogDouble flops=0.0;
1400:   MatScalar      *aa  =a->a,*ba,*ca,*caj;

1403:   if (!c->a) {
1404:     PetscCalloc1(ci[cm]+1,&ca);

1406:     c->a      = ca;
1407:     c->free_a = PETSC_TRUE;
1408:   } else {
1409:     ca   = c->a;
1410:     PetscArrayzero(ca,ci[cm]);
1411:   }

1413:   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
1414:   for (i=0; i<am; i++) {
1415:     bj   = b->j + bi[i];
1416:     ba   = b->a + bi[i];
1417:     bnzi = bi[i+1] - bi[i];
1418:     anzi = ai[i+1] - ai[i];
1419:     for (j=0; j<anzi; j++) {
1420:       nextb = 0;
1421:       crow  = *aj++;
1422:       cjj   = cj + ci[crow];
1423:       caj   = ca + ci[crow];
1424:       /* perform sparse axpy operation.  Note cjj includes bj. */
1425:       for (k=0; nextb<bnzi; k++) {
1426:         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
1427:           caj[k] += (*aa)*(*(ba+nextb));
1428:           nextb++;
1429:         }
1430:       }
1431:       flops += 2*bnzi;
1432:       aa++;
1433:     }
1434:   }

1436:   /* Assemble the final matrix and clean up */
1437:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1438:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1439:   PetscLogFlops(flops);
1440:   return(0);
1441: }

1443: PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqDense(Mat A,Mat B,PetscReal fill,Mat C)
1444: {

1448:   MatMatMultSymbolic_SeqDense_SeqDense(A,B,0.0,C);

1450:   C->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqDense;
1451:   return(0);
1452: }

1454: PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1455: {
1456:   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)A->data;
1457:   Mat_SeqDense      *bd = (Mat_SeqDense*)B->data;
1458:   PetscErrorCode    ierr;
1459:   PetscScalar       *c,r1,r2,r3,r4,*c1,*c2,*c3,*c4,aatmp;
1460:   const PetscScalar *aa,*b,*b1,*b2,*b3,*b4,*av;
1461:   const PetscInt    *aj;
1462:   PetscInt          cm=C->rmap->n,cn=B->cmap->n,bm=bd->lda,am=A->rmap->n;
1463:   PetscInt          am4=4*am,bm4=4*bm,col,i,j,n,ajtmp;

1466:   if (!cm || !cn) return(0);
1467:   MatSeqAIJGetArrayRead(A,&av);
1468:   MatDenseGetArray(C,&c);
1469:   MatDenseGetArrayRead(B,&b);
1470:   b1 = b; b2 = b1 + bm; b3 = b2 + bm; b4 = b3 + bm;
1471:   c1 = c; c2 = c1 + am; c3 = c2 + am; c4 = c3 + am;
1472:   for (col=0; col<cn-4; col += 4) {  /* over columns of C */
1473:     for (i=0; i<am; i++) {        /* over rows of C in those columns */
1474:       r1 = r2 = r3 = r4 = 0.0;
1475:       n  = a->i[i+1] - a->i[i];
1476:       aj = a->j + a->i[i];
1477:       aa = av + a->i[i];
1478:       for (j=0; j<n; j++) {
1479:         aatmp = aa[j]; ajtmp = aj[j];
1480:         r1 += aatmp*b1[ajtmp];
1481:         r2 += aatmp*b2[ajtmp];
1482:         r3 += aatmp*b3[ajtmp];
1483:         r4 += aatmp*b4[ajtmp];
1484:       }
1485:       c1[i] += r1;
1486:       c2[i] += r2;
1487:       c3[i] += r3;
1488:       c4[i] += r4;
1489:     }
1490:     b1 += bm4; b2 += bm4; b3 += bm4; b4 += bm4;
1491:     c1 += am4; c2 += am4; c3 += am4; c4 += am4;
1492:   }
1493:   for (; col<cn; col++) {   /* over extra columns of C */
1494:     for (i=0; i<am; i++) {  /* over rows of C in those columns */
1495:       r1 = 0.0;
1496:       n  = a->i[i+1] - a->i[i];
1497:       aj = a->j + a->i[i];
1498:       aa = av + a->i[i];
1499:       for (j=0; j<n; j++) {
1500:         r1 += aa[j]*b1[aj[j]];
1501:       }
1502:       c1[i] += r1;
1503:     }
1504:     b1 += bm;
1505:     c1 += am;
1506:   }
1507:   PetscLogFlops(cn*(2.0*a->nz));
1508:   MatDenseRestoreArray(C,&c);
1509:   MatDenseRestoreArrayRead(B,&b);
1510:   MatSeqAIJRestoreArrayRead(A,&av);
1511:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1512:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1513:   return(0);
1514: }

1516: PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqDense(Mat A,Mat B,Mat C)
1517: {

1521:   if (B->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in A %D not equal rows in B %D\n",A->cmap->n,B->rmap->n);
1522:   if (A->rmap->n != C->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows in C %D not equal rows in A %D\n",C->rmap->n,A->rmap->n);
1523:   if (B->cmap->n != C->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number columns in B %D not equal columns in C %D\n",B->cmap->n,C->cmap->n);

1525:   MatZeroEntries(C);
1526:   MatMatMultNumericAdd_SeqAIJ_SeqDense(A,B,C);
1527:   return(0);
1528: }

1530: /* ------------------------------------------------------- */
1531: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AB(Mat C)
1532: {
1534:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqDense;
1535:   C->ops->productsymbolic = MatProductSymbolic_AB;
1536:   /* dense mat may not call MatProductSymbolic(), thus set C->ops->productnumeric here */
1537:   C->ops->productnumeric  = MatProductNumeric_AB;
1538:   return(0);
1539: }

1541: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(Mat C)
1542: {
1544:   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_SeqAIJ_SeqDense;
1545:   C->ops->productsymbolic          = MatProductSymbolic_AtB;
1546:   C->ops->productnumeric           = MatProductNumeric_AtB;
1547:   return(0);
1548: }

1550: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat C)
1551: {
1553:   Mat_Product    *product = C->product;

1556:   switch (product->type) {
1557:   case MATPRODUCT_AB:
1558:     MatProductSetFromOptions_SeqAIJ_SeqDense_AB(C);
1559:     break;
1560:   case MATPRODUCT_AtB:
1561:     MatProductSetFromOptions_SeqAIJ_SeqDense_AtB(C);
1562:     break;
1563:   case MATPRODUCT_PtAP:
1564:     MatProductSetFromOptions_SeqDense(C);
1565:     break;
1566:   default:
1567:     /* Use MatProduct_Basic() if there is no specific implementation */
1568:     C->ops->productsymbolic = MatProductSymbolic_Basic;
1569:   }
1570:   return(0);
1571: }
1572: /* ------------------------------------------------------- */
1573: static PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(Mat C)
1574: {
1576:   Mat_Product    *product = C->product;
1577:   Mat            A = product->A;
1578:   PetscBool      baij;

1581:   PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&baij);
1582:   if (!baij) { /* A is seqsbaij */
1583:     PetscBool sbaij;
1584:     PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&sbaij);
1585:     if (!sbaij) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_WRONGSTATE,"Mat must be either seqbaij or seqsbaij format");

1587:     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqSBAIJ_SeqDense;
1588:   } else { /* A is seqbaij */
1589:     C->ops->matmultsymbolic = MatMatMultSymbolic_SeqBAIJ_SeqDense;
1590:   }

1592:   C->ops->productsymbolic = MatProductSymbolic_AB;
1593:   C->ops->productnumeric  = MatProductNumeric_AB;
1594:   return(0);
1595: }

1597: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqXBAIJ_SeqDense(Mat C)
1598: {
1600:   Mat_Product    *product = C->product;

1603:   if (product->type == MATPRODUCT_AB) {
1604:     MatProductSetFromOptions_SeqXBAIJ_SeqDense_AB(C);
1605:   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqXBAIJ and SeqDense matrices",MatProductTypes[product->type]);
1606:   return(0);
1607: }
1608: /* ------------------------------------------------------- */
1609: static PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ_AB(Mat C)
1610: {
1612:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqDense_SeqAIJ;
1613:   C->ops->productsymbolic = MatProductSymbolic_AB;
1614:   C->ops->productnumeric  = MatProductNumeric_AB;
1615:   return(0);
1616: }

1618: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqDense_SeqAIJ(Mat C)
1619: {
1621:   Mat_Product    *product = C->product;

1624:   if (product->type == MATPRODUCT_AB) {
1625:     MatProductSetFromOptions_SeqDense_SeqAIJ_AB(C);
1626:   } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqDense and SeqAIJ matrices",MatProductTypes[product->type]);
1627:   return(0);
1628: }
1629: /* ------------------------------------------------------- */

1631: PetscErrorCode  MatTransColoringApplySpToDen_SeqAIJ(MatTransposeColoring coloring,Mat B,Mat Btdense)
1632: {
1634:   Mat_SeqAIJ     *b       = (Mat_SeqAIJ*)B->data;
1635:   Mat_SeqDense   *btdense = (Mat_SeqDense*)Btdense->data;
1636:   PetscInt       *bi      = b->i,*bj=b->j;
1637:   PetscInt       m        = Btdense->rmap->n,n=Btdense->cmap->n,j,k,l,col,anz,*btcol,brow,ncolumns;
1638:   MatScalar      *btval,*btval_den,*ba=b->a;
1639:   PetscInt       *columns=coloring->columns,*colorforcol=coloring->colorforcol,ncolors=coloring->ncolors;

1642:   btval_den=btdense->v;
1643:   PetscArrayzero(btval_den,m*n);
1644:   for (k=0; k<ncolors; k++) {
1645:     ncolumns = coloring->ncolumns[k];
1646:     for (l=0; l<ncolumns; l++) { /* insert a row of B to a column of Btdense */
1647:       col   = *(columns + colorforcol[k] + l);
1648:       btcol = bj + bi[col];
1649:       btval = ba + bi[col];
1650:       anz   = bi[col+1] - bi[col];
1651:       for (j=0; j<anz; j++) {
1652:         brow            = btcol[j];
1653:         btval_den[brow] = btval[j];
1654:       }
1655:     }
1656:     btval_den += m;
1657:   }
1658:   return(0);
1659: }

1661: PetscErrorCode MatTransColoringApplyDenToSp_SeqAIJ(MatTransposeColoring matcoloring,Mat Cden,Mat Csp)
1662: {
1663:   PetscErrorCode    ierr;
1664:   Mat_SeqAIJ        *csp = (Mat_SeqAIJ*)Csp->data;
1665:   const PetscScalar *ca_den,*ca_den_ptr;
1666:   PetscScalar       *ca=csp->a;
1667:   PetscInt          k,l,m=Cden->rmap->n,ncolors=matcoloring->ncolors;
1668:   PetscInt          brows=matcoloring->brows,*den2sp=matcoloring->den2sp;
1669:   PetscInt          nrows,*row,*idx;
1670:   PetscInt          *rows=matcoloring->rows,*colorforrow=matcoloring->colorforrow;

1673:   MatDenseGetArrayRead(Cden,&ca_den);

1675:   if (brows > 0) {
1676:     PetscInt *lstart,row_end,row_start;
1677:     lstart = matcoloring->lstart;
1678:     PetscArrayzero(lstart,ncolors);

1680:     row_end = brows;
1681:     if (row_end > m) row_end = m;
1682:     for (row_start=0; row_start<m; row_start+=brows) { /* loop over row blocks of Csp */
1683:       ca_den_ptr = ca_den;
1684:       for (k=0; k<ncolors; k++) { /* loop over colors (columns of Cden) */
1685:         nrows = matcoloring->nrows[k];
1686:         row   = rows  + colorforrow[k];
1687:         idx   = den2sp + colorforrow[k];
1688:         for (l=lstart[k]; l<nrows; l++) {
1689:           if (row[l] >= row_end) {
1690:             lstart[k] = l;
1691:             break;
1692:           } else {
1693:             ca[idx[l]] = ca_den_ptr[row[l]];
1694:           }
1695:         }
1696:         ca_den_ptr += m;
1697:       }
1698:       row_end += brows;
1699:       if (row_end > m) row_end = m;
1700:     }
1701:   } else { /* non-blocked impl: loop over columns of Csp - slow if Csp is large */
1702:     ca_den_ptr = ca_den;
1703:     for (k=0; k<ncolors; k++) {
1704:       nrows = matcoloring->nrows[k];
1705:       row   = rows  + colorforrow[k];
1706:       idx   = den2sp + colorforrow[k];
1707:       for (l=0; l<nrows; l++) {
1708:         ca[idx[l]] = ca_den_ptr[row[l]];
1709:       }
1710:       ca_den_ptr += m;
1711:     }
1712:   }

1714:   MatDenseRestoreArrayRead(Cden,&ca_den);
1715: #if defined(PETSC_USE_INFO)
1716:   if (matcoloring->brows > 0) {
1717:     PetscInfo1(Csp,"Loop over %D row blocks for den2sp\n",brows);
1718:   } else {
1719:     PetscInfo(Csp,"Loop over colors/columns of Cden, inefficient for large sparse matrix product \n");
1720:   }
1721: #endif
1722:   return(0);
1723: }

1725: PetscErrorCode MatTransposeColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatTransposeColoring c)
1726: {
1728:   PetscInt       i,n,nrows,Nbs,j,k,m,ncols,col,cm;
1729:   const PetscInt *is,*ci,*cj,*row_idx;
1730:   PetscInt       nis = iscoloring->n,*rowhit,bs = 1;
1731:   IS             *isa;
1732:   Mat_SeqAIJ     *csp = (Mat_SeqAIJ*)mat->data;
1733:   PetscInt       *colorforrow,*rows,*rows_i,*idxhit,*spidx,*den2sp,*den2sp_i;
1734:   PetscInt       *colorforcol,*columns,*columns_i,brows;
1735:   PetscBool      flg;

1738:   ISColoringGetIS(iscoloring,PETSC_USE_POINTER,PETSC_IGNORE,&isa);

1740:   /* bs >1 is not being tested yet! */
1741:   Nbs       = mat->cmap->N/bs;
1742:   c->M      = mat->rmap->N/bs;  /* set total rows, columns and local rows */
1743:   c->N      = Nbs;
1744:   c->m      = c->M;
1745:   c->rstart = 0;
1746:   c->brows  = 100;

1748:   c->ncolors = nis;
1749:   PetscMalloc3(nis,&c->ncolumns,nis,&c->nrows,nis+1,&colorforrow);
1750:   PetscMalloc1(csp->nz+1,&rows);
1751:   PetscMalloc1(csp->nz+1,&den2sp);

1753:   brows = c->brows;
1754:   PetscOptionsGetInt(NULL,NULL,"-matden2sp_brows",&brows,&flg);
1755:   if (flg) c->brows = brows;
1756:   if (brows > 0) {
1757:     PetscMalloc1(nis+1,&c->lstart);
1758:   }

1760:   colorforrow[0] = 0;
1761:   rows_i         = rows;
1762:   den2sp_i       = den2sp;

1764:   PetscMalloc1(nis+1,&colorforcol);
1765:   PetscMalloc1(Nbs+1,&columns);

1767:   colorforcol[0] = 0;
1768:   columns_i      = columns;

1770:   /* get column-wise storage of mat */
1771:   MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);

1773:   cm   = c->m;
1774:   PetscMalloc1(cm+1,&rowhit);
1775:   PetscMalloc1(cm+1,&idxhit);
1776:   for (i=0; i<nis; i++) { /* loop over color */
1777:     ISGetLocalSize(isa[i],&n);
1778:     ISGetIndices(isa[i],&is);

1780:     c->ncolumns[i] = n;
1781:     if (n) {
1782:       PetscArraycpy(columns_i,is,n);
1783:     }
1784:     colorforcol[i+1] = colorforcol[i] + n;
1785:     columns_i       += n;

1787:     /* fast, crude version requires O(N*N) work */
1788:     PetscArrayzero(rowhit,cm);

1790:     for (j=0; j<n; j++) { /* loop over columns*/
1791:       col     = is[j];
1792:       row_idx = cj + ci[col];
1793:       m       = ci[col+1] - ci[col];
1794:       for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */
1795:         idxhit[*row_idx]   = spidx[ci[col] + k];
1796:         rowhit[*row_idx++] = col + 1;
1797:       }
1798:     }
1799:     /* count the number of hits */
1800:     nrows = 0;
1801:     for (j=0; j<cm; j++) {
1802:       if (rowhit[j]) nrows++;
1803:     }
1804:     c->nrows[i]      = nrows;
1805:     colorforrow[i+1] = colorforrow[i] + nrows;

1807:     nrows = 0;
1808:     for (j=0; j<cm; j++) { /* loop over rows */
1809:       if (rowhit[j]) {
1810:         rows_i[nrows]   = j;
1811:         den2sp_i[nrows] = idxhit[j];
1812:         nrows++;
1813:       }
1814:     }
1815:     den2sp_i += nrows;

1817:     ISRestoreIndices(isa[i],&is);
1818:     rows_i += nrows;
1819:   }
1820:   MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);
1821:   PetscFree(rowhit);
1822:   ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&isa);
1823:   if (csp->nz != colorforrow[nis]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"csp->nz %d != colorforrow[nis] %d",csp->nz,colorforrow[nis]);

1825:   c->colorforrow = colorforrow;
1826:   c->rows        = rows;
1827:   c->den2sp      = den2sp;
1828:   c->colorforcol = colorforcol;
1829:   c->columns     = columns;

1831:   PetscFree(idxhit);
1832:   return(0);
1833: }

1835: /* --------------------------------------------------------------- */
1836: static PetscErrorCode MatProductNumeric_AtB_SeqAIJ_SeqAIJ(Mat C)
1837: {
1839:   Mat_Product    *product = C->product;
1840:   Mat            A=product->A,B=product->B;

1843:   if (C->ops->mattransposemultnumeric) {
1844:     /* Alg: "outerproduct" */
1845:     (C->ops->mattransposemultnumeric)(A,B,C);
1846:   } else {
1847:     /* Alg: "matmatmult" -- C = At*B */
1848:     Mat_SeqAIJ          *c = (Mat_SeqAIJ*)C->data;
1849:     Mat_MatTransMatMult *atb = c->atb;
1850:     Mat                 At = atb->At;

1852:     if (atb->updateAt) { /* At is computed in MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ() */
1853:       MatTranspose_SeqAIJ(A,MAT_REUSE_MATRIX,&At);
1854:     }
1855:     MatMatMultNumeric_SeqAIJ_SeqAIJ(At,B,C);
1856:     atb->updateAt = PETSC_TRUE;
1857:   }
1858:   return(0);
1859: }

1861: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJ_SeqAIJ(Mat C)
1862: {
1864:   Mat_Product    *product = C->product;
1865:   Mat            A=product->A,B=product->B;
1866:   PetscReal      fill=product->fill;

1869:   MatTransposeMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);

1871:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJ_SeqAIJ;
1872:   return(0);
1873: }

1875: /* --------------------------------------------------------------- */
1876: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AB(Mat C)
1877: {
1879:   Mat_Product    *product = C->product;
1880:   PetscInt       alg = 0; /* default algorithm */
1881:   PetscBool      flg = PETSC_FALSE;
1882: #if !defined(PETSC_HAVE_HYPRE)
1883:   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
1884:   PetscInt       nalg = 7;
1885: #else
1886:   const char     *algTypes[8] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge","hypre"};
1887:   PetscInt       nalg = 8;
1888: #endif

1891:   /* Set default algorithm */
1892:   PetscStrcmp(C->product->alg,"default",&flg);
1893:   if (flg) {
1894:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1895:   }

1897:   /* Get runtime option */
1898:   if (product->api_user) {
1899:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");
1900:     PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[0],&alg,&flg);
1901:     PetscOptionsEnd();
1902:   } else {
1903:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");
1904:     PetscOptionsEList("-matproduct_ab_via","Algorithmic approach","MatProduct_AB",algTypes,nalg,algTypes[0],&alg,&flg);
1905:     PetscOptionsEnd();
1906:   }
1907:   if (flg) {
1908:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1909:   }

1911:   C->ops->productsymbolic = MatProductSymbolic_AB;
1912:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
1913:   return(0);
1914: }

1916: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_AtB(Mat C)
1917: {
1919:   Mat_Product    *product = C->product;
1920:   PetscInt       alg = 0; /* default algorithm */
1921:   PetscBool      flg = PETSC_FALSE;
1922:   const char     *algTypes[2] = {"at*b","outerproduct"};
1923:   PetscInt       nalg = 2;

1926:   /* Set default algorithm */
1927:   PetscStrcmp(product->alg,"default",&flg);
1928:   if (flg) {
1929:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1930:   }

1932:   /* Get runtime option */
1933:   if (product->api_user) {
1934:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatTransposeMatMult","Mat");
1935:     PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);
1936:     PetscOptionsEnd();
1937:   } else {
1938:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AtB","Mat");
1939:     PetscOptionsEList("-matproduct_atb_via","Algorithmic approach","MatProduct_AtB",algTypes,nalg,algTypes[alg],&alg,&flg);
1940:     PetscOptionsEnd();
1941:   }
1942:   if (flg) {
1943:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1944:   }

1946:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJ_SeqAIJ;
1947:   return(0);
1948: }

1950: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABt(Mat C)
1951: {
1953:   Mat_Product    *product = C->product;
1954:   PetscInt       alg = 0; /* default algorithm */
1955:   PetscBool      flg = PETSC_FALSE;
1956:   const char     *algTypes[2] = {"default","color"};
1957:   PetscInt       nalg = 2;

1960:   /* Set default algorithm */
1961:   PetscStrcmp(C->product->alg,"default",&flg);
1962:   if (!flg) {
1963:     alg = 1;
1964:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1965:   }

1967:   /* Get runtime option */
1968:   if (product->api_user) {
1969:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
1970:     PetscOptionsEList("-matmattransmult_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
1971:     PetscOptionsEnd();
1972:   } else {
1973:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
1974:     PetscOptionsEList("-matproduct_abt_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
1975:     PetscOptionsEnd();
1976:   }
1977:   if (flg) {
1978:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
1979:   }

1981:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
1982:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
1983:   return(0);
1984: }

1986: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_PtAP(Mat C)
1987: {
1989:   Mat_Product    *product = C->product;
1990:   PetscBool      flg = PETSC_FALSE;
1991:   PetscInt       alg = 0; /* default algorithm -- alg=1 should be default!!! */
1992: #if !defined(PETSC_HAVE_HYPRE)
1993:   const char      *algTypes[2] = {"scalable","rap"};
1994:   PetscInt        nalg = 2;
1995: #else
1996:   const char      *algTypes[3] = {"scalable","rap","hypre"};
1997:   PetscInt        nalg = 3;
1998: #endif

2001:   /* Set default algorithm */
2002:   PetscStrcmp(product->alg,"default",&flg);
2003:   if (flg) {
2004:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2005:   }

2007:   /* Get runtime option */
2008:   if (product->api_user) {
2009:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");
2010:     PetscOptionsEList("-matptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[0],&alg,&flg);
2011:     PetscOptionsEnd();
2012:   } else {
2013:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2014:     PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatProduct_PtAP",algTypes,nalg,algTypes[0],&alg,&flg);
2015:     PetscOptionsEnd();
2016:   }
2017:   if (flg) {
2018:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2019:   }

2021:   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqAIJ;
2022:   return(0);
2023: }

2025: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_RARt(Mat C)
2026: {
2028:   Mat_Product    *product = C->product;
2029:   PetscBool      flg = PETSC_FALSE;
2030:   PetscInt       alg = 0; /* default algorithm */
2031:   const char     *algTypes[3] = {"r*a*rt","r*art","coloring_rart"};
2032:   PetscInt        nalg = 3;

2035:   /* Set default algorithm */
2036:   PetscStrcmp(product->alg,"default",&flg);
2037:   if (flg) {
2038:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2039:   }

2041:   /* Get runtime option */
2042:   if (product->api_user) {
2043:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatRARt","Mat");
2044:     PetscOptionsEList("-matrart_via","Algorithmic approach","MatRARt",algTypes,nalg,algTypes[0],&alg,&flg);
2045:     PetscOptionsEnd();
2046:   } else {
2047:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_RARt","Mat");
2048:     PetscOptionsEList("-matproduct_rart_via","Algorithmic approach","MatProduct_RARt",algTypes,nalg,algTypes[0],&alg,&flg);
2049:     PetscOptionsEnd();
2050:   }
2051:   if (flg) {
2052:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2053:   }

2055:   C->ops->productsymbolic = MatProductSymbolic_RARt_SeqAIJ_SeqAIJ;
2056:   return(0);
2057: }

2059: /* ABC = A*B*C = A*(B*C); ABC's algorithm must be chosen from AB's algorithm */
2060: static PetscErrorCode MatProductSetFromOptions_SeqAIJ_ABC(Mat C)
2061: {
2063:   Mat_Product    *product = C->product;
2064:   PetscInt       alg = 0; /* default algorithm */
2065:   PetscBool      flg = PETSC_FALSE;
2066:   const char     *algTypes[7] = {"sorted","scalable","scalable_fast","heap","btheap","llcondensed","rowmerge"};
2067:   PetscInt       nalg = 7;

2070:   /* Set default algorithm */
2071:   PetscStrcmp(product->alg,"default",&flg);
2072:   if (flg) {
2073:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2074:   }

2076:   /* Get runtime option */
2077:   if (product->api_user) {
2078:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMatMult","Mat");
2079:     PetscOptionsEList("-matmatmatmult_via","Algorithmic approach","MatMatMatMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2080:     PetscOptionsEnd();
2081:   } else {
2082:     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABC","Mat");
2083:     PetscOptionsEList("-matproduct_abc_via","Algorithmic approach","MatProduct_ABC",algTypes,nalg,algTypes[alg],&alg,&flg);
2084:     PetscOptionsEnd();
2085:   }
2086:   if (flg) {
2087:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2088:   }

2090:   C->ops->matmatmultsymbolic = MatMatMatMultSymbolic_SeqAIJ_SeqAIJ_SeqAIJ;
2091:   C->ops->productsymbolic    = MatProductSymbolic_ABC;
2092:   return(0);
2093: }

2095: PetscErrorCode MatProductSetFromOptions_SeqAIJ(Mat C)
2096: {
2098:   Mat_Product    *product = C->product;

2101:   switch (product->type) {
2102:   case MATPRODUCT_AB:
2103:     MatProductSetFromOptions_SeqAIJ_AB(C);
2104:     break;
2105:   case MATPRODUCT_AtB:
2106:     MatProductSetFromOptions_SeqAIJ_AtB(C);
2107:     break;
2108:   case MATPRODUCT_ABt:
2109:     MatProductSetFromOptions_SeqAIJ_ABt(C);
2110:     break;
2111:   case MATPRODUCT_PtAP:
2112:     MatProductSetFromOptions_SeqAIJ_PtAP(C);
2113:     break;
2114:   case MATPRODUCT_RARt:
2115:     MatProductSetFromOptions_SeqAIJ_RARt(C);
2116:     break;
2117:   case MATPRODUCT_ABC:
2118:     MatProductSetFromOptions_SeqAIJ_ABC(C);
2119:     break;
2120:   default: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProduct type %s is not supported for SeqAIJ and SeqAIJ matrices",MatProductTypes[product->type]);
2121:   }
2122:   return(0);
2123: }