Actual source code: maij.c


  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h>
 20: #include <../src/mat/utils/freespace.h>

 22: /*@
 23:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 25:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 27:    Input Parameter:
 28: .  A - the MAIJ matrix

 30:    Output Parameter:
 31: .  B - the AIJ matrix

 33:    Level: advanced

 35:    Notes:
 36:     The reference count on the AIJ matrix is not increased so you should not destroy it.

 38: .seealso: MatCreateMAIJ()
 39: @*/
 40: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 41: {
 43:   PetscBool      ismpimaij,isseqmaij;

 46:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 47:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 48:   if (ismpimaij) {
 49:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 51:     *B = b->A;
 52:   } else if (isseqmaij) {
 53:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 55:     *B = b->AIJ;
 56:   } else {
 57:     *B = A;
 58:   }
 59:   return(0);
 60: }

 62: /*@
 63:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 65:    Logically Collective

 67:    Input Parameters:
 68: +  A - the MAIJ matrix
 69: -  dof - the block size for the new matrix

 71:    Output Parameter:
 72: .  B - the new MAIJ matrix

 74:    Level: advanced

 76: .seealso: MatCreateMAIJ()
 77: @*/
 78: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 79: {
 81:   Mat            Aij = NULL;

 85:   MatMAIJGetAIJ(A,&Aij);
 86:   MatCreateMAIJ(Aij,dof,B);
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 91: {
 93:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 96:   MatDestroy(&b->AIJ);
 97:   PetscFree(A->data);
 98:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
 99:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
100:   return(0);
101: }

103: PetscErrorCode MatSetUp_MAIJ(Mat A)
104: {
106:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107: }

109: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110: {
112:   Mat            B;

115:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
116:   MatView(B,viewer);
117:   MatDestroy(&B);
118:   return(0);
119: }

121: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122: {
124:   Mat            B;

127:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
128:   MatView(B,viewer);
129:   MatDestroy(&B);
130:   return(0);
131: }

133: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134: {
136:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

139:   MatDestroy(&b->AIJ);
140:   MatDestroy(&b->OAIJ);
141:   MatDestroy(&b->A);
142:   VecScatterDestroy(&b->ctx);
143:   VecDestroy(&b->w);
144:   PetscFree(A->data);
145:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
146:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
147:   PetscObjectChangeTypeName((PetscObject)A,NULL);
148:   return(0);
149: }

151: /*MC
152:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153:   multicomponent problems, interpolating or restricting each component the same way independently.
154:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

156:   Operations provided:
157: + MatMult
158: . MatMultTranspose
159: . MatMultAdd
160: - MatMultTransposeAdd

162:   Level: advanced

164: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
165: M*/

167: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
168: {
170:   Mat_MPIMAIJ    *b;
171:   PetscMPIInt    size;

174:   PetscNewLog(A,&b);
175:   A->data  = (void*)b;

177:   PetscMemzero(A->ops,sizeof(struct _MatOps));

179:   A->ops->setup = MatSetUp_MAIJ;

181:   b->AIJ  = NULL;
182:   b->dof  = 0;
183:   b->OAIJ = NULL;
184:   b->ctx  = NULL;
185:   b->w    = NULL;
186:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
187:   if (size == 1) {
188:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
189:   } else {
190:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
191:   }
192:   A->preallocated  = PETSC_TRUE;
193:   A->assembled     = PETSC_TRUE;
194:   return(0);
195: }

197: /* --------------------------------------------------------------------------------------*/
198: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
199: {
200:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
201:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
202:   const PetscScalar *x,*v;
203:   PetscScalar       *y, sum1, sum2;
204:   PetscErrorCode    ierr;
205:   PetscInt          nonzerorow=0,n,i,jrow,j;
206:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

209:   VecGetArrayRead(xx,&x);
210:   VecGetArray(yy,&y);
211:   idx  = a->j;
212:   v    = a->a;
213:   ii   = a->i;

215:   for (i=0; i<m; i++) {
216:     jrow  = ii[i];
217:     n     = ii[i+1] - jrow;
218:     sum1  = 0.0;
219:     sum2  = 0.0;

221:     nonzerorow += (n>0);
222:     for (j=0; j<n; j++) {
223:       sum1 += v[jrow]*x[2*idx[jrow]];
224:       sum2 += v[jrow]*x[2*idx[jrow]+1];
225:       jrow++;
226:     }
227:     y[2*i]   = sum1;
228:     y[2*i+1] = sum2;
229:   }

231:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
232:   VecRestoreArrayRead(xx,&x);
233:   VecRestoreArray(yy,&y);
234:   return(0);
235: }

237: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
238: {
239:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
240:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
241:   const PetscScalar *x,*v;
242:   PetscScalar       *y,alpha1,alpha2;
243:   PetscErrorCode    ierr;
244:   PetscInt          n,i;
245:   const PetscInt    m = b->AIJ->rmap->n,*idx;

248:   VecSet(yy,0.0);
249:   VecGetArrayRead(xx,&x);
250:   VecGetArray(yy,&y);

252:   for (i=0; i<m; i++) {
253:     idx    = a->j + a->i[i];
254:     v      = a->a + a->i[i];
255:     n      = a->i[i+1] - a->i[i];
256:     alpha1 = x[2*i];
257:     alpha2 = x[2*i+1];
258:     while (n-->0) {
259:       y[2*(*idx)]   += alpha1*(*v);
260:       y[2*(*idx)+1] += alpha2*(*v);
261:       idx++; v++;
262:     }
263:   }
264:   PetscLogFlops(4.0*a->nz);
265:   VecRestoreArrayRead(xx,&x);
266:   VecRestoreArray(yy,&y);
267:   return(0);
268: }

270: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
271: {
272:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
273:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
274:   const PetscScalar *x,*v;
275:   PetscScalar       *y,sum1, sum2;
276:   PetscErrorCode    ierr;
277:   PetscInt          n,i,jrow,j;
278:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

281:   if (yy != zz) {VecCopy(yy,zz);}
282:   VecGetArrayRead(xx,&x);
283:   VecGetArray(zz,&y);
284:   idx  = a->j;
285:   v    = a->a;
286:   ii   = a->i;

288:   for (i=0; i<m; i++) {
289:     jrow = ii[i];
290:     n    = ii[i+1] - jrow;
291:     sum1 = 0.0;
292:     sum2 = 0.0;
293:     for (j=0; j<n; j++) {
294:       sum1 += v[jrow]*x[2*idx[jrow]];
295:       sum2 += v[jrow]*x[2*idx[jrow]+1];
296:       jrow++;
297:     }
298:     y[2*i]   += sum1;
299:     y[2*i+1] += sum2;
300:   }

302:   PetscLogFlops(4.0*a->nz);
303:   VecRestoreArrayRead(xx,&x);
304:   VecRestoreArray(zz,&y);
305:   return(0);
306: }
307: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
308: {
309:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
310:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
311:   const PetscScalar *x,*v;
312:   PetscScalar       *y,alpha1,alpha2;
313:   PetscErrorCode    ierr;
314:   PetscInt          n,i;
315:   const PetscInt    m = b->AIJ->rmap->n,*idx;

318:   if (yy != zz) {VecCopy(yy,zz);}
319:   VecGetArrayRead(xx,&x);
320:   VecGetArray(zz,&y);

322:   for (i=0; i<m; i++) {
323:     idx    = a->j + a->i[i];
324:     v      = a->a + a->i[i];
325:     n      = a->i[i+1] - a->i[i];
326:     alpha1 = x[2*i];
327:     alpha2 = x[2*i+1];
328:     while (n-->0) {
329:       y[2*(*idx)]   += alpha1*(*v);
330:       y[2*(*idx)+1] += alpha2*(*v);
331:       idx++; v++;
332:     }
333:   }
334:   PetscLogFlops(4.0*a->nz);
335:   VecRestoreArrayRead(xx,&x);
336:   VecRestoreArray(zz,&y);
337:   return(0);
338: }
339: /* --------------------------------------------------------------------------------------*/
340: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341: {
342:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
343:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
344:   const PetscScalar *x,*v;
345:   PetscScalar       *y,sum1, sum2, sum3;
346:   PetscErrorCode    ierr;
347:   PetscInt          nonzerorow=0,n,i,jrow,j;
348:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

351:   VecGetArrayRead(xx,&x);
352:   VecGetArray(yy,&y);
353:   idx  = a->j;
354:   v    = a->a;
355:   ii   = a->i;

357:   for (i=0; i<m; i++) {
358:     jrow  = ii[i];
359:     n     = ii[i+1] - jrow;
360:     sum1  = 0.0;
361:     sum2  = 0.0;
362:     sum3  = 0.0;

364:     nonzerorow += (n>0);
365:     for (j=0; j<n; j++) {
366:       sum1 += v[jrow]*x[3*idx[jrow]];
367:       sum2 += v[jrow]*x[3*idx[jrow]+1];
368:       sum3 += v[jrow]*x[3*idx[jrow]+2];
369:       jrow++;
370:      }
371:     y[3*i]   = sum1;
372:     y[3*i+1] = sum2;
373:     y[3*i+2] = sum3;
374:   }

376:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
377:   VecRestoreArrayRead(xx,&x);
378:   VecRestoreArray(yy,&y);
379:   return(0);
380: }

382: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383: {
384:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
385:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
386:   const PetscScalar *x,*v;
387:   PetscScalar       *y,alpha1,alpha2,alpha3;
388:   PetscErrorCode    ierr;
389:   PetscInt          n,i;
390:   const PetscInt    m = b->AIJ->rmap->n,*idx;

393:   VecSet(yy,0.0);
394:   VecGetArrayRead(xx,&x);
395:   VecGetArray(yy,&y);

397:   for (i=0; i<m; i++) {
398:     idx    = a->j + a->i[i];
399:     v      = a->a + a->i[i];
400:     n      = a->i[i+1] - a->i[i];
401:     alpha1 = x[3*i];
402:     alpha2 = x[3*i+1];
403:     alpha3 = x[3*i+2];
404:     while (n-->0) {
405:       y[3*(*idx)]   += alpha1*(*v);
406:       y[3*(*idx)+1] += alpha2*(*v);
407:       y[3*(*idx)+2] += alpha3*(*v);
408:       idx++; v++;
409:     }
410:   }
411:   PetscLogFlops(6.0*a->nz);
412:   VecRestoreArrayRead(xx,&x);
413:   VecRestoreArray(yy,&y);
414:   return(0);
415: }

417: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418: {
419:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
420:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
421:   const PetscScalar *x,*v;
422:   PetscScalar       *y,sum1, sum2, sum3;
423:   PetscErrorCode    ierr;
424:   PetscInt          n,i,jrow,j;
425:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

428:   if (yy != zz) {VecCopy(yy,zz);}
429:   VecGetArrayRead(xx,&x);
430:   VecGetArray(zz,&y);
431:   idx  = a->j;
432:   v    = a->a;
433:   ii   = a->i;

435:   for (i=0; i<m; i++) {
436:     jrow = ii[i];
437:     n    = ii[i+1] - jrow;
438:     sum1 = 0.0;
439:     sum2 = 0.0;
440:     sum3 = 0.0;
441:     for (j=0; j<n; j++) {
442:       sum1 += v[jrow]*x[3*idx[jrow]];
443:       sum2 += v[jrow]*x[3*idx[jrow]+1];
444:       sum3 += v[jrow]*x[3*idx[jrow]+2];
445:       jrow++;
446:     }
447:     y[3*i]   += sum1;
448:     y[3*i+1] += sum2;
449:     y[3*i+2] += sum3;
450:   }

452:   PetscLogFlops(6.0*a->nz);
453:   VecRestoreArrayRead(xx,&x);
454:   VecRestoreArray(zz,&y);
455:   return(0);
456: }
457: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458: {
459:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
460:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
461:   const PetscScalar *x,*v;
462:   PetscScalar       *y,alpha1,alpha2,alpha3;
463:   PetscErrorCode    ierr;
464:   PetscInt          n,i;
465:   const PetscInt    m = b->AIJ->rmap->n,*idx;

468:   if (yy != zz) {VecCopy(yy,zz);}
469:   VecGetArrayRead(xx,&x);
470:   VecGetArray(zz,&y);
471:   for (i=0; i<m; i++) {
472:     idx    = a->j + a->i[i];
473:     v      = a->a + a->i[i];
474:     n      = a->i[i+1] - a->i[i];
475:     alpha1 = x[3*i];
476:     alpha2 = x[3*i+1];
477:     alpha3 = x[3*i+2];
478:     while (n-->0) {
479:       y[3*(*idx)]   += alpha1*(*v);
480:       y[3*(*idx)+1] += alpha2*(*v);
481:       y[3*(*idx)+2] += alpha3*(*v);
482:       idx++; v++;
483:     }
484:   }
485:   PetscLogFlops(6.0*a->nz);
486:   VecRestoreArrayRead(xx,&x);
487:   VecRestoreArray(zz,&y);
488:   return(0);
489: }

491: /* ------------------------------------------------------------------------------*/
492: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493: {
494:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
495:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
496:   const PetscScalar *x,*v;
497:   PetscScalar       *y,sum1, sum2, sum3, sum4;
498:   PetscErrorCode    ierr;
499:   PetscInt          nonzerorow=0,n,i,jrow,j;
500:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

503:   VecGetArrayRead(xx,&x);
504:   VecGetArray(yy,&y);
505:   idx  = a->j;
506:   v    = a->a;
507:   ii   = a->i;

509:   for (i=0; i<m; i++) {
510:     jrow        = ii[i];
511:     n           = ii[i+1] - jrow;
512:     sum1        = 0.0;
513:     sum2        = 0.0;
514:     sum3        = 0.0;
515:     sum4        = 0.0;
516:     nonzerorow += (n>0);
517:     for (j=0; j<n; j++) {
518:       sum1 += v[jrow]*x[4*idx[jrow]];
519:       sum2 += v[jrow]*x[4*idx[jrow]+1];
520:       sum3 += v[jrow]*x[4*idx[jrow]+2];
521:       sum4 += v[jrow]*x[4*idx[jrow]+3];
522:       jrow++;
523:     }
524:     y[4*i]   = sum1;
525:     y[4*i+1] = sum2;
526:     y[4*i+2] = sum3;
527:     y[4*i+3] = sum4;
528:   }

530:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
531:   VecRestoreArrayRead(xx,&x);
532:   VecRestoreArray(yy,&y);
533:   return(0);
534: }

536: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537: {
538:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
539:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
540:   const PetscScalar *x,*v;
541:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
542:   PetscErrorCode    ierr;
543:   PetscInt          n,i;
544:   const PetscInt    m = b->AIJ->rmap->n,*idx;

547:   VecSet(yy,0.0);
548:   VecGetArrayRead(xx,&x);
549:   VecGetArray(yy,&y);
550:   for (i=0; i<m; i++) {
551:     idx    = a->j + a->i[i];
552:     v      = a->a + a->i[i];
553:     n      = a->i[i+1] - a->i[i];
554:     alpha1 = x[4*i];
555:     alpha2 = x[4*i+1];
556:     alpha3 = x[4*i+2];
557:     alpha4 = x[4*i+3];
558:     while (n-->0) {
559:       y[4*(*idx)]   += alpha1*(*v);
560:       y[4*(*idx)+1] += alpha2*(*v);
561:       y[4*(*idx)+2] += alpha3*(*v);
562:       y[4*(*idx)+3] += alpha4*(*v);
563:       idx++; v++;
564:     }
565:   }
566:   PetscLogFlops(8.0*a->nz);
567:   VecRestoreArrayRead(xx,&x);
568:   VecRestoreArray(yy,&y);
569:   return(0);
570: }

572: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573: {
574:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
575:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
576:   const PetscScalar *x,*v;
577:   PetscScalar       *y,sum1, sum2, sum3, sum4;
578:   PetscErrorCode    ierr;
579:   PetscInt          n,i,jrow,j;
580:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

583:   if (yy != zz) {VecCopy(yy,zz);}
584:   VecGetArrayRead(xx,&x);
585:   VecGetArray(zz,&y);
586:   idx  = a->j;
587:   v    = a->a;
588:   ii   = a->i;

590:   for (i=0; i<m; i++) {
591:     jrow = ii[i];
592:     n    = ii[i+1] - jrow;
593:     sum1 = 0.0;
594:     sum2 = 0.0;
595:     sum3 = 0.0;
596:     sum4 = 0.0;
597:     for (j=0; j<n; j++) {
598:       sum1 += v[jrow]*x[4*idx[jrow]];
599:       sum2 += v[jrow]*x[4*idx[jrow]+1];
600:       sum3 += v[jrow]*x[4*idx[jrow]+2];
601:       sum4 += v[jrow]*x[4*idx[jrow]+3];
602:       jrow++;
603:     }
604:     y[4*i]   += sum1;
605:     y[4*i+1] += sum2;
606:     y[4*i+2] += sum3;
607:     y[4*i+3] += sum4;
608:   }

610:   PetscLogFlops(8.0*a->nz);
611:   VecRestoreArrayRead(xx,&x);
612:   VecRestoreArray(zz,&y);
613:   return(0);
614: }
615: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616: {
617:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
618:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
619:   const PetscScalar *x,*v;
620:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
621:   PetscErrorCode    ierr;
622:   PetscInt          n,i;
623:   const PetscInt    m = b->AIJ->rmap->n,*idx;

626:   if (yy != zz) {VecCopy(yy,zz);}
627:   VecGetArrayRead(xx,&x);
628:   VecGetArray(zz,&y);

630:   for (i=0; i<m; i++) {
631:     idx    = a->j + a->i[i];
632:     v      = a->a + a->i[i];
633:     n      = a->i[i+1] - a->i[i];
634:     alpha1 = x[4*i];
635:     alpha2 = x[4*i+1];
636:     alpha3 = x[4*i+2];
637:     alpha4 = x[4*i+3];
638:     while (n-->0) {
639:       y[4*(*idx)]   += alpha1*(*v);
640:       y[4*(*idx)+1] += alpha2*(*v);
641:       y[4*(*idx)+2] += alpha3*(*v);
642:       y[4*(*idx)+3] += alpha4*(*v);
643:       idx++; v++;
644:     }
645:   }
646:   PetscLogFlops(8.0*a->nz);
647:   VecRestoreArrayRead(xx,&x);
648:   VecRestoreArray(zz,&y);
649:   return(0);
650: }
651: /* ------------------------------------------------------------------------------*/

653: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654: {
655:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
656:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
657:   const PetscScalar *x,*v;
658:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
659:   PetscErrorCode    ierr;
660:   PetscInt          nonzerorow=0,n,i,jrow,j;
661:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

664:   VecGetArrayRead(xx,&x);
665:   VecGetArray(yy,&y);
666:   idx  = a->j;
667:   v    = a->a;
668:   ii   = a->i;

670:   for (i=0; i<m; i++) {
671:     jrow  = ii[i];
672:     n     = ii[i+1] - jrow;
673:     sum1  = 0.0;
674:     sum2  = 0.0;
675:     sum3  = 0.0;
676:     sum4  = 0.0;
677:     sum5  = 0.0;

679:     nonzerorow += (n>0);
680:     for (j=0; j<n; j++) {
681:       sum1 += v[jrow]*x[5*idx[jrow]];
682:       sum2 += v[jrow]*x[5*idx[jrow]+1];
683:       sum3 += v[jrow]*x[5*idx[jrow]+2];
684:       sum4 += v[jrow]*x[5*idx[jrow]+3];
685:       sum5 += v[jrow]*x[5*idx[jrow]+4];
686:       jrow++;
687:     }
688:     y[5*i]   = sum1;
689:     y[5*i+1] = sum2;
690:     y[5*i+2] = sum3;
691:     y[5*i+3] = sum4;
692:     y[5*i+4] = sum5;
693:   }

695:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
696:   VecRestoreArrayRead(xx,&x);
697:   VecRestoreArray(yy,&y);
698:   return(0);
699: }

701: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702: {
703:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
704:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
705:   const PetscScalar *x,*v;
706:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707:   PetscErrorCode    ierr;
708:   PetscInt          n,i;
709:   const PetscInt    m = b->AIJ->rmap->n,*idx;

712:   VecSet(yy,0.0);
713:   VecGetArrayRead(xx,&x);
714:   VecGetArray(yy,&y);

716:   for (i=0; i<m; i++) {
717:     idx    = a->j + a->i[i];
718:     v      = a->a + a->i[i];
719:     n      = a->i[i+1] - a->i[i];
720:     alpha1 = x[5*i];
721:     alpha2 = x[5*i+1];
722:     alpha3 = x[5*i+2];
723:     alpha4 = x[5*i+3];
724:     alpha5 = x[5*i+4];
725:     while (n-->0) {
726:       y[5*(*idx)]   += alpha1*(*v);
727:       y[5*(*idx)+1] += alpha2*(*v);
728:       y[5*(*idx)+2] += alpha3*(*v);
729:       y[5*(*idx)+3] += alpha4*(*v);
730:       y[5*(*idx)+4] += alpha5*(*v);
731:       idx++; v++;
732:     }
733:   }
734:   PetscLogFlops(10.0*a->nz);
735:   VecRestoreArrayRead(xx,&x);
736:   VecRestoreArray(yy,&y);
737:   return(0);
738: }

740: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741: {
742:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
743:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
744:   const PetscScalar *x,*v;
745:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
746:   PetscErrorCode    ierr;
747:   PetscInt          n,i,jrow,j;
748:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

751:   if (yy != zz) {VecCopy(yy,zz);}
752:   VecGetArrayRead(xx,&x);
753:   VecGetArray(zz,&y);
754:   idx  = a->j;
755:   v    = a->a;
756:   ii   = a->i;

758:   for (i=0; i<m; i++) {
759:     jrow = ii[i];
760:     n    = ii[i+1] - jrow;
761:     sum1 = 0.0;
762:     sum2 = 0.0;
763:     sum3 = 0.0;
764:     sum4 = 0.0;
765:     sum5 = 0.0;
766:     for (j=0; j<n; j++) {
767:       sum1 += v[jrow]*x[5*idx[jrow]];
768:       sum2 += v[jrow]*x[5*idx[jrow]+1];
769:       sum3 += v[jrow]*x[5*idx[jrow]+2];
770:       sum4 += v[jrow]*x[5*idx[jrow]+3];
771:       sum5 += v[jrow]*x[5*idx[jrow]+4];
772:       jrow++;
773:     }
774:     y[5*i]   += sum1;
775:     y[5*i+1] += sum2;
776:     y[5*i+2] += sum3;
777:     y[5*i+3] += sum4;
778:     y[5*i+4] += sum5;
779:   }

781:   PetscLogFlops(10.0*a->nz);
782:   VecRestoreArrayRead(xx,&x);
783:   VecRestoreArray(zz,&y);
784:   return(0);
785: }

787: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788: {
789:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791:   const PetscScalar *x,*v;
792:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793:   PetscErrorCode    ierr;
794:   PetscInt          n,i;
795:   const PetscInt    m = b->AIJ->rmap->n,*idx;

798:   if (yy != zz) {VecCopy(yy,zz);}
799:   VecGetArrayRead(xx,&x);
800:   VecGetArray(zz,&y);

802:   for (i=0; i<m; i++) {
803:     idx    = a->j + a->i[i];
804:     v      = a->a + a->i[i];
805:     n      = a->i[i+1] - a->i[i];
806:     alpha1 = x[5*i];
807:     alpha2 = x[5*i+1];
808:     alpha3 = x[5*i+2];
809:     alpha4 = x[5*i+3];
810:     alpha5 = x[5*i+4];
811:     while (n-->0) {
812:       y[5*(*idx)]   += alpha1*(*v);
813:       y[5*(*idx)+1] += alpha2*(*v);
814:       y[5*(*idx)+2] += alpha3*(*v);
815:       y[5*(*idx)+3] += alpha4*(*v);
816:       y[5*(*idx)+4] += alpha5*(*v);
817:       idx++; v++;
818:     }
819:   }
820:   PetscLogFlops(10.0*a->nz);
821:   VecRestoreArrayRead(xx,&x);
822:   VecRestoreArray(zz,&y);
823:   return(0);
824: }

826: /* ------------------------------------------------------------------------------*/
827: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
828: {
829:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
830:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
831:   const PetscScalar *x,*v;
832:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
833:   PetscErrorCode    ierr;
834:   PetscInt          nonzerorow=0,n,i,jrow,j;
835:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

838:   VecGetArrayRead(xx,&x);
839:   VecGetArray(yy,&y);
840:   idx  = a->j;
841:   v    = a->a;
842:   ii   = a->i;

844:   for (i=0; i<m; i++) {
845:     jrow  = ii[i];
846:     n     = ii[i+1] - jrow;
847:     sum1  = 0.0;
848:     sum2  = 0.0;
849:     sum3  = 0.0;
850:     sum4  = 0.0;
851:     sum5  = 0.0;
852:     sum6  = 0.0;

854:     nonzerorow += (n>0);
855:     for (j=0; j<n; j++) {
856:       sum1 += v[jrow]*x[6*idx[jrow]];
857:       sum2 += v[jrow]*x[6*idx[jrow]+1];
858:       sum3 += v[jrow]*x[6*idx[jrow]+2];
859:       sum4 += v[jrow]*x[6*idx[jrow]+3];
860:       sum5 += v[jrow]*x[6*idx[jrow]+4];
861:       sum6 += v[jrow]*x[6*idx[jrow]+5];
862:       jrow++;
863:     }
864:     y[6*i]   = sum1;
865:     y[6*i+1] = sum2;
866:     y[6*i+2] = sum3;
867:     y[6*i+3] = sum4;
868:     y[6*i+4] = sum5;
869:     y[6*i+5] = sum6;
870:   }

872:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
873:   VecRestoreArrayRead(xx,&x);
874:   VecRestoreArray(yy,&y);
875:   return(0);
876: }

878: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879: {
880:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
881:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882:   const PetscScalar *x,*v;
883:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884:   PetscErrorCode    ierr;
885:   PetscInt          n,i;
886:   const PetscInt    m = b->AIJ->rmap->n,*idx;

889:   VecSet(yy,0.0);
890:   VecGetArrayRead(xx,&x);
891:   VecGetArray(yy,&y);

893:   for (i=0; i<m; i++) {
894:     idx    = a->j + a->i[i];
895:     v      = a->a + a->i[i];
896:     n      = a->i[i+1] - a->i[i];
897:     alpha1 = x[6*i];
898:     alpha2 = x[6*i+1];
899:     alpha3 = x[6*i+2];
900:     alpha4 = x[6*i+3];
901:     alpha5 = x[6*i+4];
902:     alpha6 = x[6*i+5];
903:     while (n-->0) {
904:       y[6*(*idx)]   += alpha1*(*v);
905:       y[6*(*idx)+1] += alpha2*(*v);
906:       y[6*(*idx)+2] += alpha3*(*v);
907:       y[6*(*idx)+3] += alpha4*(*v);
908:       y[6*(*idx)+4] += alpha5*(*v);
909:       y[6*(*idx)+5] += alpha6*(*v);
910:       idx++; v++;
911:     }
912:   }
913:   PetscLogFlops(12.0*a->nz);
914:   VecRestoreArrayRead(xx,&x);
915:   VecRestoreArray(yy,&y);
916:   return(0);
917: }

919: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920: {
921:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
922:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
923:   const PetscScalar *x,*v;
924:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
925:   PetscErrorCode    ierr;
926:   PetscInt          n,i,jrow,j;
927:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

930:   if (yy != zz) {VecCopy(yy,zz);}
931:   VecGetArrayRead(xx,&x);
932:   VecGetArray(zz,&y);
933:   idx  = a->j;
934:   v    = a->a;
935:   ii   = a->i;

937:   for (i=0; i<m; i++) {
938:     jrow = ii[i];
939:     n    = ii[i+1] - jrow;
940:     sum1 = 0.0;
941:     sum2 = 0.0;
942:     sum3 = 0.0;
943:     sum4 = 0.0;
944:     sum5 = 0.0;
945:     sum6 = 0.0;
946:     for (j=0; j<n; j++) {
947:       sum1 += v[jrow]*x[6*idx[jrow]];
948:       sum2 += v[jrow]*x[6*idx[jrow]+1];
949:       sum3 += v[jrow]*x[6*idx[jrow]+2];
950:       sum4 += v[jrow]*x[6*idx[jrow]+3];
951:       sum5 += v[jrow]*x[6*idx[jrow]+4];
952:       sum6 += v[jrow]*x[6*idx[jrow]+5];
953:       jrow++;
954:     }
955:     y[6*i]   += sum1;
956:     y[6*i+1] += sum2;
957:     y[6*i+2] += sum3;
958:     y[6*i+3] += sum4;
959:     y[6*i+4] += sum5;
960:     y[6*i+5] += sum6;
961:   }

963:   PetscLogFlops(12.0*a->nz);
964:   VecRestoreArrayRead(xx,&x);
965:   VecRestoreArray(zz,&y);
966:   return(0);
967: }

969: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
970: {
971:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
972:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
973:   const PetscScalar *x,*v;
974:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975:   PetscErrorCode    ierr;
976:   PetscInt          n,i;
977:   const PetscInt    m = b->AIJ->rmap->n,*idx;

980:   if (yy != zz) {VecCopy(yy,zz);}
981:   VecGetArrayRead(xx,&x);
982:   VecGetArray(zz,&y);

984:   for (i=0; i<m; i++) {
985:     idx    = a->j + a->i[i];
986:     v      = a->a + a->i[i];
987:     n      = a->i[i+1] - a->i[i];
988:     alpha1 = x[6*i];
989:     alpha2 = x[6*i+1];
990:     alpha3 = x[6*i+2];
991:     alpha4 = x[6*i+3];
992:     alpha5 = x[6*i+4];
993:     alpha6 = x[6*i+5];
994:     while (n-->0) {
995:       y[6*(*idx)]   += alpha1*(*v);
996:       y[6*(*idx)+1] += alpha2*(*v);
997:       y[6*(*idx)+2] += alpha3*(*v);
998:       y[6*(*idx)+3] += alpha4*(*v);
999:       y[6*(*idx)+4] += alpha5*(*v);
1000:       y[6*(*idx)+5] += alpha6*(*v);
1001:       idx++; v++;
1002:     }
1003:   }
1004:   PetscLogFlops(12.0*a->nz);
1005:   VecRestoreArrayRead(xx,&x);
1006:   VecRestoreArray(zz,&y);
1007:   return(0);
1008: }

1010: /* ------------------------------------------------------------------------------*/
1011: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012: {
1013:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015:   const PetscScalar *x,*v;
1016:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017:   PetscErrorCode    ierr;
1018:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1019:   PetscInt          nonzerorow=0,n,i,jrow,j;

1022:   VecGetArrayRead(xx,&x);
1023:   VecGetArray(yy,&y);
1024:   idx  = a->j;
1025:   v    = a->a;
1026:   ii   = a->i;

1028:   for (i=0; i<m; i++) {
1029:     jrow  = ii[i];
1030:     n     = ii[i+1] - jrow;
1031:     sum1  = 0.0;
1032:     sum2  = 0.0;
1033:     sum3  = 0.0;
1034:     sum4  = 0.0;
1035:     sum5  = 0.0;
1036:     sum6  = 0.0;
1037:     sum7  = 0.0;

1039:     nonzerorow += (n>0);
1040:     for (j=0; j<n; j++) {
1041:       sum1 += v[jrow]*x[7*idx[jrow]];
1042:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1043:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1044:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1045:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1046:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1047:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1048:       jrow++;
1049:     }
1050:     y[7*i]   = sum1;
1051:     y[7*i+1] = sum2;
1052:     y[7*i+2] = sum3;
1053:     y[7*i+3] = sum4;
1054:     y[7*i+4] = sum5;
1055:     y[7*i+5] = sum6;
1056:     y[7*i+6] = sum7;
1057:   }

1059:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1060:   VecRestoreArrayRead(xx,&x);
1061:   VecRestoreArray(yy,&y);
1062:   return(0);
1063: }

1065: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066: {
1067:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1068:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1069:   const PetscScalar *x,*v;
1070:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071:   PetscErrorCode    ierr;
1072:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1073:   PetscInt          n,i;

1076:   VecSet(yy,0.0);
1077:   VecGetArrayRead(xx,&x);
1078:   VecGetArray(yy,&y);

1080:   for (i=0; i<m; i++) {
1081:     idx    = a->j + a->i[i];
1082:     v      = a->a + a->i[i];
1083:     n      = a->i[i+1] - a->i[i];
1084:     alpha1 = x[7*i];
1085:     alpha2 = x[7*i+1];
1086:     alpha3 = x[7*i+2];
1087:     alpha4 = x[7*i+3];
1088:     alpha5 = x[7*i+4];
1089:     alpha6 = x[7*i+5];
1090:     alpha7 = x[7*i+6];
1091:     while (n-->0) {
1092:       y[7*(*idx)]   += alpha1*(*v);
1093:       y[7*(*idx)+1] += alpha2*(*v);
1094:       y[7*(*idx)+2] += alpha3*(*v);
1095:       y[7*(*idx)+3] += alpha4*(*v);
1096:       y[7*(*idx)+4] += alpha5*(*v);
1097:       y[7*(*idx)+5] += alpha6*(*v);
1098:       y[7*(*idx)+6] += alpha7*(*v);
1099:       idx++; v++;
1100:     }
1101:   }
1102:   PetscLogFlops(14.0*a->nz);
1103:   VecRestoreArrayRead(xx,&x);
1104:   VecRestoreArray(yy,&y);
1105:   return(0);
1106: }

1108: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1111:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1112:   const PetscScalar *x,*v;
1113:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114:   PetscErrorCode    ierr;
1115:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1116:   PetscInt          n,i,jrow,j;

1119:   if (yy != zz) {VecCopy(yy,zz);}
1120:   VecGetArrayRead(xx,&x);
1121:   VecGetArray(zz,&y);
1122:   idx  = a->j;
1123:   v    = a->a;
1124:   ii   = a->i;

1126:   for (i=0; i<m; i++) {
1127:     jrow = ii[i];
1128:     n    = ii[i+1] - jrow;
1129:     sum1 = 0.0;
1130:     sum2 = 0.0;
1131:     sum3 = 0.0;
1132:     sum4 = 0.0;
1133:     sum5 = 0.0;
1134:     sum6 = 0.0;
1135:     sum7 = 0.0;
1136:     for (j=0; j<n; j++) {
1137:       sum1 += v[jrow]*x[7*idx[jrow]];
1138:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1139:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1140:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1141:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1142:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1143:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1144:       jrow++;
1145:     }
1146:     y[7*i]   += sum1;
1147:     y[7*i+1] += sum2;
1148:     y[7*i+2] += sum3;
1149:     y[7*i+3] += sum4;
1150:     y[7*i+4] += sum5;
1151:     y[7*i+5] += sum6;
1152:     y[7*i+6] += sum7;
1153:   }

1155:   PetscLogFlops(14.0*a->nz);
1156:   VecRestoreArrayRead(xx,&x);
1157:   VecRestoreArray(zz,&y);
1158:   return(0);
1159: }

1161: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162: {
1163:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1164:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1165:   const PetscScalar *x,*v;
1166:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167:   PetscErrorCode    ierr;
1168:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1169:   PetscInt          n,i;

1172:   if (yy != zz) {VecCopy(yy,zz);}
1173:   VecGetArrayRead(xx,&x);
1174:   VecGetArray(zz,&y);
1175:   for (i=0; i<m; i++) {
1176:     idx    = a->j + a->i[i];
1177:     v      = a->a + a->i[i];
1178:     n      = a->i[i+1] - a->i[i];
1179:     alpha1 = x[7*i];
1180:     alpha2 = x[7*i+1];
1181:     alpha3 = x[7*i+2];
1182:     alpha4 = x[7*i+3];
1183:     alpha5 = x[7*i+4];
1184:     alpha6 = x[7*i+5];
1185:     alpha7 = x[7*i+6];
1186:     while (n-->0) {
1187:       y[7*(*idx)]   += alpha1*(*v);
1188:       y[7*(*idx)+1] += alpha2*(*v);
1189:       y[7*(*idx)+2] += alpha3*(*v);
1190:       y[7*(*idx)+3] += alpha4*(*v);
1191:       y[7*(*idx)+4] += alpha5*(*v);
1192:       y[7*(*idx)+5] += alpha6*(*v);
1193:       y[7*(*idx)+6] += alpha7*(*v);
1194:       idx++; v++;
1195:     }
1196:   }
1197:   PetscLogFlops(14.0*a->nz);
1198:   VecRestoreArrayRead(xx,&x);
1199:   VecRestoreArray(zz,&y);
1200:   return(0);
1201: }

1203: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1204: {
1205:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1206:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1207:   const PetscScalar *x,*v;
1208:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209:   PetscErrorCode    ierr;
1210:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1211:   PetscInt          nonzerorow=0,n,i,jrow,j;

1214:   VecGetArrayRead(xx,&x);
1215:   VecGetArray(yy,&y);
1216:   idx  = a->j;
1217:   v    = a->a;
1218:   ii   = a->i;

1220:   for (i=0; i<m; i++) {
1221:     jrow  = ii[i];
1222:     n     = ii[i+1] - jrow;
1223:     sum1  = 0.0;
1224:     sum2  = 0.0;
1225:     sum3  = 0.0;
1226:     sum4  = 0.0;
1227:     sum5  = 0.0;
1228:     sum6  = 0.0;
1229:     sum7  = 0.0;
1230:     sum8  = 0.0;

1232:     nonzerorow += (n>0);
1233:     for (j=0; j<n; j++) {
1234:       sum1 += v[jrow]*x[8*idx[jrow]];
1235:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1236:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1237:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1238:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1239:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1240:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1241:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1242:       jrow++;
1243:     }
1244:     y[8*i]   = sum1;
1245:     y[8*i+1] = sum2;
1246:     y[8*i+2] = sum3;
1247:     y[8*i+3] = sum4;
1248:     y[8*i+4] = sum5;
1249:     y[8*i+5] = sum6;
1250:     y[8*i+6] = sum7;
1251:     y[8*i+7] = sum8;
1252:   }

1254:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1255:   VecRestoreArrayRead(xx,&x);
1256:   VecRestoreArray(yy,&y);
1257:   return(0);
1258: }

1260: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1261: {
1262:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1263:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1264:   const PetscScalar *x,*v;
1265:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266:   PetscErrorCode    ierr;
1267:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1268:   PetscInt          n,i;

1271:   VecSet(yy,0.0);
1272:   VecGetArrayRead(xx,&x);
1273:   VecGetArray(yy,&y);

1275:   for (i=0; i<m; i++) {
1276:     idx    = a->j + a->i[i];
1277:     v      = a->a + a->i[i];
1278:     n      = a->i[i+1] - a->i[i];
1279:     alpha1 = x[8*i];
1280:     alpha2 = x[8*i+1];
1281:     alpha3 = x[8*i+2];
1282:     alpha4 = x[8*i+3];
1283:     alpha5 = x[8*i+4];
1284:     alpha6 = x[8*i+5];
1285:     alpha7 = x[8*i+6];
1286:     alpha8 = x[8*i+7];
1287:     while (n-->0) {
1288:       y[8*(*idx)]   += alpha1*(*v);
1289:       y[8*(*idx)+1] += alpha2*(*v);
1290:       y[8*(*idx)+2] += alpha3*(*v);
1291:       y[8*(*idx)+3] += alpha4*(*v);
1292:       y[8*(*idx)+4] += alpha5*(*v);
1293:       y[8*(*idx)+5] += alpha6*(*v);
1294:       y[8*(*idx)+6] += alpha7*(*v);
1295:       y[8*(*idx)+7] += alpha8*(*v);
1296:       idx++; v++;
1297:     }
1298:   }
1299:   PetscLogFlops(16.0*a->nz);
1300:   VecRestoreArrayRead(xx,&x);
1301:   VecRestoreArray(yy,&y);
1302:   return(0);
1303: }

1305: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1306: {
1307:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1308:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1309:   const PetscScalar *x,*v;
1310:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311:   PetscErrorCode    ierr;
1312:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1313:   PetscInt          n,i,jrow,j;

1316:   if (yy != zz) {VecCopy(yy,zz);}
1317:   VecGetArrayRead(xx,&x);
1318:   VecGetArray(zz,&y);
1319:   idx  = a->j;
1320:   v    = a->a;
1321:   ii   = a->i;

1323:   for (i=0; i<m; i++) {
1324:     jrow = ii[i];
1325:     n    = ii[i+1] - jrow;
1326:     sum1 = 0.0;
1327:     sum2 = 0.0;
1328:     sum3 = 0.0;
1329:     sum4 = 0.0;
1330:     sum5 = 0.0;
1331:     sum6 = 0.0;
1332:     sum7 = 0.0;
1333:     sum8 = 0.0;
1334:     for (j=0; j<n; j++) {
1335:       sum1 += v[jrow]*x[8*idx[jrow]];
1336:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1337:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1338:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1339:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1340:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1341:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1342:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1343:       jrow++;
1344:     }
1345:     y[8*i]   += sum1;
1346:     y[8*i+1] += sum2;
1347:     y[8*i+2] += sum3;
1348:     y[8*i+3] += sum4;
1349:     y[8*i+4] += sum5;
1350:     y[8*i+5] += sum6;
1351:     y[8*i+6] += sum7;
1352:     y[8*i+7] += sum8;
1353:   }

1355:   PetscLogFlops(16.0*a->nz);
1356:   VecRestoreArrayRead(xx,&x);
1357:   VecRestoreArray(zz,&y);
1358:   return(0);
1359: }

1361: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1362: {
1363:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1364:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1365:   const PetscScalar *x,*v;
1366:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367:   PetscErrorCode    ierr;
1368:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1369:   PetscInt          n,i;

1372:   if (yy != zz) {VecCopy(yy,zz);}
1373:   VecGetArrayRead(xx,&x);
1374:   VecGetArray(zz,&y);
1375:   for (i=0; i<m; i++) {
1376:     idx    = a->j + a->i[i];
1377:     v      = a->a + a->i[i];
1378:     n      = a->i[i+1] - a->i[i];
1379:     alpha1 = x[8*i];
1380:     alpha2 = x[8*i+1];
1381:     alpha3 = x[8*i+2];
1382:     alpha4 = x[8*i+3];
1383:     alpha5 = x[8*i+4];
1384:     alpha6 = x[8*i+5];
1385:     alpha7 = x[8*i+6];
1386:     alpha8 = x[8*i+7];
1387:     while (n-->0) {
1388:       y[8*(*idx)]   += alpha1*(*v);
1389:       y[8*(*idx)+1] += alpha2*(*v);
1390:       y[8*(*idx)+2] += alpha3*(*v);
1391:       y[8*(*idx)+3] += alpha4*(*v);
1392:       y[8*(*idx)+4] += alpha5*(*v);
1393:       y[8*(*idx)+5] += alpha6*(*v);
1394:       y[8*(*idx)+6] += alpha7*(*v);
1395:       y[8*(*idx)+7] += alpha8*(*v);
1396:       idx++; v++;
1397:     }
1398:   }
1399:   PetscLogFlops(16.0*a->nz);
1400:   VecRestoreArrayRead(xx,&x);
1401:   VecRestoreArray(zz,&y);
1402:   return(0);
1403: }

1405: /* ------------------------------------------------------------------------------*/
1406: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1407: {
1408:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1409:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1410:   const PetscScalar *x,*v;
1411:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412:   PetscErrorCode    ierr;
1413:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1414:   PetscInt          nonzerorow=0,n,i,jrow,j;

1417:   VecGetArrayRead(xx,&x);
1418:   VecGetArray(yy,&y);
1419:   idx  = a->j;
1420:   v    = a->a;
1421:   ii   = a->i;

1423:   for (i=0; i<m; i++) {
1424:     jrow  = ii[i];
1425:     n     = ii[i+1] - jrow;
1426:     sum1  = 0.0;
1427:     sum2  = 0.0;
1428:     sum3  = 0.0;
1429:     sum4  = 0.0;
1430:     sum5  = 0.0;
1431:     sum6  = 0.0;
1432:     sum7  = 0.0;
1433:     sum8  = 0.0;
1434:     sum9  = 0.0;

1436:     nonzerorow += (n>0);
1437:     for (j=0; j<n; j++) {
1438:       sum1 += v[jrow]*x[9*idx[jrow]];
1439:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1440:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1441:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1442:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1443:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1444:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1445:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1446:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1447:       jrow++;
1448:     }
1449:     y[9*i]   = sum1;
1450:     y[9*i+1] = sum2;
1451:     y[9*i+2] = sum3;
1452:     y[9*i+3] = sum4;
1453:     y[9*i+4] = sum5;
1454:     y[9*i+5] = sum6;
1455:     y[9*i+6] = sum7;
1456:     y[9*i+7] = sum8;
1457:     y[9*i+8] = sum9;
1458:   }

1460:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1461:   VecRestoreArrayRead(xx,&x);
1462:   VecRestoreArray(yy,&y);
1463:   return(0);
1464: }

1466: /* ------------------------------------------------------------------------------*/

1468: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1469: {
1470:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1471:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1472:   const PetscScalar *x,*v;
1473:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474:   PetscErrorCode    ierr;
1475:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1476:   PetscInt          n,i;

1479:   VecSet(yy,0.0);
1480:   VecGetArrayRead(xx,&x);
1481:   VecGetArray(yy,&y);

1483:   for (i=0; i<m; i++) {
1484:     idx    = a->j + a->i[i];
1485:     v      = a->a + a->i[i];
1486:     n      = a->i[i+1] - a->i[i];
1487:     alpha1 = x[9*i];
1488:     alpha2 = x[9*i+1];
1489:     alpha3 = x[9*i+2];
1490:     alpha4 = x[9*i+3];
1491:     alpha5 = x[9*i+4];
1492:     alpha6 = x[9*i+5];
1493:     alpha7 = x[9*i+6];
1494:     alpha8 = x[9*i+7];
1495:     alpha9 = x[9*i+8];
1496:     while (n-->0) {
1497:       y[9*(*idx)]   += alpha1*(*v);
1498:       y[9*(*idx)+1] += alpha2*(*v);
1499:       y[9*(*idx)+2] += alpha3*(*v);
1500:       y[9*(*idx)+3] += alpha4*(*v);
1501:       y[9*(*idx)+4] += alpha5*(*v);
1502:       y[9*(*idx)+5] += alpha6*(*v);
1503:       y[9*(*idx)+6] += alpha7*(*v);
1504:       y[9*(*idx)+7] += alpha8*(*v);
1505:       y[9*(*idx)+8] += alpha9*(*v);
1506:       idx++; v++;
1507:     }
1508:   }
1509:   PetscLogFlops(18.0*a->nz);
1510:   VecRestoreArrayRead(xx,&x);
1511:   VecRestoreArray(yy,&y);
1512:   return(0);
1513: }

1515: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1516: {
1517:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1518:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1519:   const PetscScalar *x,*v;
1520:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521:   PetscErrorCode    ierr;
1522:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1523:   PetscInt          n,i,jrow,j;

1526:   if (yy != zz) {VecCopy(yy,zz);}
1527:   VecGetArrayRead(xx,&x);
1528:   VecGetArray(zz,&y);
1529:   idx  = a->j;
1530:   v    = a->a;
1531:   ii   = a->i;

1533:   for (i=0; i<m; i++) {
1534:     jrow = ii[i];
1535:     n    = ii[i+1] - jrow;
1536:     sum1 = 0.0;
1537:     sum2 = 0.0;
1538:     sum3 = 0.0;
1539:     sum4 = 0.0;
1540:     sum5 = 0.0;
1541:     sum6 = 0.0;
1542:     sum7 = 0.0;
1543:     sum8 = 0.0;
1544:     sum9 = 0.0;
1545:     for (j=0; j<n; j++) {
1546:       sum1 += v[jrow]*x[9*idx[jrow]];
1547:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1548:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1549:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1550:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1551:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1552:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1553:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1554:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1555:       jrow++;
1556:     }
1557:     y[9*i]   += sum1;
1558:     y[9*i+1] += sum2;
1559:     y[9*i+2] += sum3;
1560:     y[9*i+3] += sum4;
1561:     y[9*i+4] += sum5;
1562:     y[9*i+5] += sum6;
1563:     y[9*i+6] += sum7;
1564:     y[9*i+7] += sum8;
1565:     y[9*i+8] += sum9;
1566:   }

1568:   PetscLogFlops(18.0*a->nz);
1569:   VecRestoreArrayRead(xx,&x);
1570:   VecRestoreArray(zz,&y);
1571:   return(0);
1572: }

1574: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1575: {
1576:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1577:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1578:   const PetscScalar *x,*v;
1579:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580:   PetscErrorCode    ierr;
1581:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1582:   PetscInt          n,i;

1585:   if (yy != zz) {VecCopy(yy,zz);}
1586:   VecGetArrayRead(xx,&x);
1587:   VecGetArray(zz,&y);
1588:   for (i=0; i<m; i++) {
1589:     idx    = a->j + a->i[i];
1590:     v      = a->a + a->i[i];
1591:     n      = a->i[i+1] - a->i[i];
1592:     alpha1 = x[9*i];
1593:     alpha2 = x[9*i+1];
1594:     alpha3 = x[9*i+2];
1595:     alpha4 = x[9*i+3];
1596:     alpha5 = x[9*i+4];
1597:     alpha6 = x[9*i+5];
1598:     alpha7 = x[9*i+6];
1599:     alpha8 = x[9*i+7];
1600:     alpha9 = x[9*i+8];
1601:     while (n-->0) {
1602:       y[9*(*idx)]   += alpha1*(*v);
1603:       y[9*(*idx)+1] += alpha2*(*v);
1604:       y[9*(*idx)+2] += alpha3*(*v);
1605:       y[9*(*idx)+3] += alpha4*(*v);
1606:       y[9*(*idx)+4] += alpha5*(*v);
1607:       y[9*(*idx)+5] += alpha6*(*v);
1608:       y[9*(*idx)+6] += alpha7*(*v);
1609:       y[9*(*idx)+7] += alpha8*(*v);
1610:       y[9*(*idx)+8] += alpha9*(*v);
1611:       idx++; v++;
1612:     }
1613:   }
1614:   PetscLogFlops(18.0*a->nz);
1615:   VecRestoreArrayRead(xx,&x);
1616:   VecRestoreArray(zz,&y);
1617:   return(0);
1618: }
1619: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620: {
1621:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1622:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1623:   const PetscScalar *x,*v;
1624:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625:   PetscErrorCode    ierr;
1626:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1627:   PetscInt          nonzerorow=0,n,i,jrow,j;

1630:   VecGetArrayRead(xx,&x);
1631:   VecGetArray(yy,&y);
1632:   idx  = a->j;
1633:   v    = a->a;
1634:   ii   = a->i;

1636:   for (i=0; i<m; i++) {
1637:     jrow  = ii[i];
1638:     n     = ii[i+1] - jrow;
1639:     sum1  = 0.0;
1640:     sum2  = 0.0;
1641:     sum3  = 0.0;
1642:     sum4  = 0.0;
1643:     sum5  = 0.0;
1644:     sum6  = 0.0;
1645:     sum7  = 0.0;
1646:     sum8  = 0.0;
1647:     sum9  = 0.0;
1648:     sum10 = 0.0;

1650:     nonzerorow += (n>0);
1651:     for (j=0; j<n; j++) {
1652:       sum1  += v[jrow]*x[10*idx[jrow]];
1653:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1654:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1655:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1656:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1657:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1658:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1659:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1660:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1661:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1662:       jrow++;
1663:     }
1664:     y[10*i]   = sum1;
1665:     y[10*i+1] = sum2;
1666:     y[10*i+2] = sum3;
1667:     y[10*i+3] = sum4;
1668:     y[10*i+4] = sum5;
1669:     y[10*i+5] = sum6;
1670:     y[10*i+6] = sum7;
1671:     y[10*i+7] = sum8;
1672:     y[10*i+8] = sum9;
1673:     y[10*i+9] = sum10;
1674:   }

1676:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1677:   VecRestoreArrayRead(xx,&x);
1678:   VecRestoreArray(yy,&y);
1679:   return(0);
1680: }

1682: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683: {
1684:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1685:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1686:   const PetscScalar *x,*v;
1687:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688:   PetscErrorCode    ierr;
1689:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1690:   PetscInt          n,i,jrow,j;

1693:   if (yy != zz) {VecCopy(yy,zz);}
1694:   VecGetArrayRead(xx,&x);
1695:   VecGetArray(zz,&y);
1696:   idx  = a->j;
1697:   v    = a->a;
1698:   ii   = a->i;

1700:   for (i=0; i<m; i++) {
1701:     jrow  = ii[i];
1702:     n     = ii[i+1] - jrow;
1703:     sum1  = 0.0;
1704:     sum2  = 0.0;
1705:     sum3  = 0.0;
1706:     sum4  = 0.0;
1707:     sum5  = 0.0;
1708:     sum6  = 0.0;
1709:     sum7  = 0.0;
1710:     sum8  = 0.0;
1711:     sum9  = 0.0;
1712:     sum10 = 0.0;
1713:     for (j=0; j<n; j++) {
1714:       sum1  += v[jrow]*x[10*idx[jrow]];
1715:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1716:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1717:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1718:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1719:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1720:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1721:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1722:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1723:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1724:       jrow++;
1725:     }
1726:     y[10*i]   += sum1;
1727:     y[10*i+1] += sum2;
1728:     y[10*i+2] += sum3;
1729:     y[10*i+3] += sum4;
1730:     y[10*i+4] += sum5;
1731:     y[10*i+5] += sum6;
1732:     y[10*i+6] += sum7;
1733:     y[10*i+7] += sum8;
1734:     y[10*i+8] += sum9;
1735:     y[10*i+9] += sum10;
1736:   }

1738:   PetscLogFlops(20.0*a->nz);
1739:   VecRestoreArrayRead(xx,&x);
1740:   VecRestoreArray(yy,&y);
1741:   return(0);
1742: }

1744: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745: {
1746:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1747:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1748:   const PetscScalar *x,*v;
1749:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750:   PetscErrorCode    ierr;
1751:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1752:   PetscInt          n,i;

1755:   VecSet(yy,0.0);
1756:   VecGetArrayRead(xx,&x);
1757:   VecGetArray(yy,&y);

1759:   for (i=0; i<m; i++) {
1760:     idx     = a->j + a->i[i];
1761:     v       = a->a + a->i[i];
1762:     n       = a->i[i+1] - a->i[i];
1763:     alpha1  = x[10*i];
1764:     alpha2  = x[10*i+1];
1765:     alpha3  = x[10*i+2];
1766:     alpha4  = x[10*i+3];
1767:     alpha5  = x[10*i+4];
1768:     alpha6  = x[10*i+5];
1769:     alpha7  = x[10*i+6];
1770:     alpha8  = x[10*i+7];
1771:     alpha9  = x[10*i+8];
1772:     alpha10 = x[10*i+9];
1773:     while (n-->0) {
1774:       y[10*(*idx)]   += alpha1*(*v);
1775:       y[10*(*idx)+1] += alpha2*(*v);
1776:       y[10*(*idx)+2] += alpha3*(*v);
1777:       y[10*(*idx)+3] += alpha4*(*v);
1778:       y[10*(*idx)+4] += alpha5*(*v);
1779:       y[10*(*idx)+5] += alpha6*(*v);
1780:       y[10*(*idx)+6] += alpha7*(*v);
1781:       y[10*(*idx)+7] += alpha8*(*v);
1782:       y[10*(*idx)+8] += alpha9*(*v);
1783:       y[10*(*idx)+9] += alpha10*(*v);
1784:       idx++; v++;
1785:     }
1786:   }
1787:   PetscLogFlops(20.0*a->nz);
1788:   VecRestoreArrayRead(xx,&x);
1789:   VecRestoreArray(yy,&y);
1790:   return(0);
1791: }

1793: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794: {
1795:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1796:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1797:   const PetscScalar *x,*v;
1798:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799:   PetscErrorCode    ierr;
1800:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1801:   PetscInt          n,i;

1804:   if (yy != zz) {VecCopy(yy,zz);}
1805:   VecGetArrayRead(xx,&x);
1806:   VecGetArray(zz,&y);
1807:   for (i=0; i<m; i++) {
1808:     idx     = a->j + a->i[i];
1809:     v       = a->a + a->i[i];
1810:     n       = a->i[i+1] - a->i[i];
1811:     alpha1  = x[10*i];
1812:     alpha2  = x[10*i+1];
1813:     alpha3  = x[10*i+2];
1814:     alpha4  = x[10*i+3];
1815:     alpha5  = x[10*i+4];
1816:     alpha6  = x[10*i+5];
1817:     alpha7  = x[10*i+6];
1818:     alpha8  = x[10*i+7];
1819:     alpha9  = x[10*i+8];
1820:     alpha10 = x[10*i+9];
1821:     while (n-->0) {
1822:       y[10*(*idx)]   += alpha1*(*v);
1823:       y[10*(*idx)+1] += alpha2*(*v);
1824:       y[10*(*idx)+2] += alpha3*(*v);
1825:       y[10*(*idx)+3] += alpha4*(*v);
1826:       y[10*(*idx)+4] += alpha5*(*v);
1827:       y[10*(*idx)+5] += alpha6*(*v);
1828:       y[10*(*idx)+6] += alpha7*(*v);
1829:       y[10*(*idx)+7] += alpha8*(*v);
1830:       y[10*(*idx)+8] += alpha9*(*v);
1831:       y[10*(*idx)+9] += alpha10*(*v);
1832:       idx++; v++;
1833:     }
1834:   }
1835:   PetscLogFlops(20.0*a->nz);
1836:   VecRestoreArrayRead(xx,&x);
1837:   VecRestoreArray(zz,&y);
1838:   return(0);
1839: }

1841: /*--------------------------------------------------------------------------------------------*/
1842: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1843: {
1844:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1845:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1846:   const PetscScalar *x,*v;
1847:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1848:   PetscErrorCode    ierr;
1849:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1850:   PetscInt          nonzerorow=0,n,i,jrow,j;

1853:   VecGetArrayRead(xx,&x);
1854:   VecGetArray(yy,&y);
1855:   idx  = a->j;
1856:   v    = a->a;
1857:   ii   = a->i;

1859:   for (i=0; i<m; i++) {
1860:     jrow  = ii[i];
1861:     n     = ii[i+1] - jrow;
1862:     sum1  = 0.0;
1863:     sum2  = 0.0;
1864:     sum3  = 0.0;
1865:     sum4  = 0.0;
1866:     sum5  = 0.0;
1867:     sum6  = 0.0;
1868:     sum7  = 0.0;
1869:     sum8  = 0.0;
1870:     sum9  = 0.0;
1871:     sum10 = 0.0;
1872:     sum11 = 0.0;

1874:     nonzerorow += (n>0);
1875:     for (j=0; j<n; j++) {
1876:       sum1  += v[jrow]*x[11*idx[jrow]];
1877:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1878:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1879:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1880:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1881:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1882:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1883:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1884:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1885:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1886:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1887:       jrow++;
1888:     }
1889:     y[11*i]    = sum1;
1890:     y[11*i+1]  = sum2;
1891:     y[11*i+2]  = sum3;
1892:     y[11*i+3]  = sum4;
1893:     y[11*i+4]  = sum5;
1894:     y[11*i+5]  = sum6;
1895:     y[11*i+6]  = sum7;
1896:     y[11*i+7]  = sum8;
1897:     y[11*i+8]  = sum9;
1898:     y[11*i+9]  = sum10;
1899:     y[11*i+10] = sum11;
1900:   }

1902:   PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1903:   VecRestoreArrayRead(xx,&x);
1904:   VecRestoreArray(yy,&y);
1905:   return(0);
1906: }

1908: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1909: {
1910:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1911:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1912:   const PetscScalar *x,*v;
1913:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1914:   PetscErrorCode    ierr;
1915:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1916:   PetscInt          n,i,jrow,j;

1919:   if (yy != zz) {VecCopy(yy,zz);}
1920:   VecGetArrayRead(xx,&x);
1921:   VecGetArray(zz,&y);
1922:   idx  = a->j;
1923:   v    = a->a;
1924:   ii   = a->i;

1926:   for (i=0; i<m; i++) {
1927:     jrow  = ii[i];
1928:     n     = ii[i+1] - jrow;
1929:     sum1  = 0.0;
1930:     sum2  = 0.0;
1931:     sum3  = 0.0;
1932:     sum4  = 0.0;
1933:     sum5  = 0.0;
1934:     sum6  = 0.0;
1935:     sum7  = 0.0;
1936:     sum8  = 0.0;
1937:     sum9  = 0.0;
1938:     sum10 = 0.0;
1939:     sum11 = 0.0;
1940:     for (j=0; j<n; j++) {
1941:       sum1  += v[jrow]*x[11*idx[jrow]];
1942:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1943:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1944:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1945:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1946:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1947:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1948:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1949:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1950:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1951:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1952:       jrow++;
1953:     }
1954:     y[11*i]    += sum1;
1955:     y[11*i+1]  += sum2;
1956:     y[11*i+2]  += sum3;
1957:     y[11*i+3]  += sum4;
1958:     y[11*i+4]  += sum5;
1959:     y[11*i+5]  += sum6;
1960:     y[11*i+6]  += sum7;
1961:     y[11*i+7]  += sum8;
1962:     y[11*i+8]  += sum9;
1963:     y[11*i+9]  += sum10;
1964:     y[11*i+10] += sum11;
1965:   }

1967:   PetscLogFlops(22.0*a->nz);
1968:   VecRestoreArrayRead(xx,&x);
1969:   VecRestoreArray(yy,&y);
1970:   return(0);
1971: }

1973: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1974: {
1975:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1976:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1977:   const PetscScalar *x,*v;
1978:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1979:   PetscErrorCode    ierr;
1980:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1981:   PetscInt          n,i;

1984:   VecSet(yy,0.0);
1985:   VecGetArrayRead(xx,&x);
1986:   VecGetArray(yy,&y);

1988:   for (i=0; i<m; i++) {
1989:     idx     = a->j + a->i[i];
1990:     v       = a->a + a->i[i];
1991:     n       = a->i[i+1] - a->i[i];
1992:     alpha1  = x[11*i];
1993:     alpha2  = x[11*i+1];
1994:     alpha3  = x[11*i+2];
1995:     alpha4  = x[11*i+3];
1996:     alpha5  = x[11*i+4];
1997:     alpha6  = x[11*i+5];
1998:     alpha7  = x[11*i+6];
1999:     alpha8  = x[11*i+7];
2000:     alpha9  = x[11*i+8];
2001:     alpha10 = x[11*i+9];
2002:     alpha11 = x[11*i+10];
2003:     while (n-->0) {
2004:       y[11*(*idx)]    += alpha1*(*v);
2005:       y[11*(*idx)+1]  += alpha2*(*v);
2006:       y[11*(*idx)+2]  += alpha3*(*v);
2007:       y[11*(*idx)+3]  += alpha4*(*v);
2008:       y[11*(*idx)+4]  += alpha5*(*v);
2009:       y[11*(*idx)+5]  += alpha6*(*v);
2010:       y[11*(*idx)+6]  += alpha7*(*v);
2011:       y[11*(*idx)+7]  += alpha8*(*v);
2012:       y[11*(*idx)+8]  += alpha9*(*v);
2013:       y[11*(*idx)+9]  += alpha10*(*v);
2014:       y[11*(*idx)+10] += alpha11*(*v);
2015:       idx++; v++;
2016:     }
2017:   }
2018:   PetscLogFlops(22.0*a->nz);
2019:   VecRestoreArrayRead(xx,&x);
2020:   VecRestoreArray(yy,&y);
2021:   return(0);
2022: }

2024: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2025: {
2026:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2027:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2028:   const PetscScalar *x,*v;
2029:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2030:   PetscErrorCode    ierr;
2031:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2032:   PetscInt          n,i;

2035:   if (yy != zz) {VecCopy(yy,zz);}
2036:   VecGetArrayRead(xx,&x);
2037:   VecGetArray(zz,&y);
2038:   for (i=0; i<m; i++) {
2039:     idx     = a->j + a->i[i];
2040:     v       = a->a + a->i[i];
2041:     n       = a->i[i+1] - a->i[i];
2042:     alpha1  = x[11*i];
2043:     alpha2  = x[11*i+1];
2044:     alpha3  = x[11*i+2];
2045:     alpha4  = x[11*i+3];
2046:     alpha5  = x[11*i+4];
2047:     alpha6  = x[11*i+5];
2048:     alpha7  = x[11*i+6];
2049:     alpha8  = x[11*i+7];
2050:     alpha9  = x[11*i+8];
2051:     alpha10 = x[11*i+9];
2052:     alpha11 = x[11*i+10];
2053:     while (n-->0) {
2054:       y[11*(*idx)]    += alpha1*(*v);
2055:       y[11*(*idx)+1]  += alpha2*(*v);
2056:       y[11*(*idx)+2]  += alpha3*(*v);
2057:       y[11*(*idx)+3]  += alpha4*(*v);
2058:       y[11*(*idx)+4]  += alpha5*(*v);
2059:       y[11*(*idx)+5]  += alpha6*(*v);
2060:       y[11*(*idx)+6]  += alpha7*(*v);
2061:       y[11*(*idx)+7]  += alpha8*(*v);
2062:       y[11*(*idx)+8]  += alpha9*(*v);
2063:       y[11*(*idx)+9]  += alpha10*(*v);
2064:       y[11*(*idx)+10] += alpha11*(*v);
2065:       idx++; v++;
2066:     }
2067:   }
2068:   PetscLogFlops(22.0*a->nz);
2069:   VecRestoreArrayRead(xx,&x);
2070:   VecRestoreArray(zz,&y);
2071:   return(0);
2072: }

2074: /*--------------------------------------------------------------------------------------------*/
2075: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2076: {
2077:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2078:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2079:   const PetscScalar *x,*v;
2080:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2081:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2082:   PetscErrorCode    ierr;
2083:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2084:   PetscInt          nonzerorow=0,n,i,jrow,j;

2087:   VecGetArrayRead(xx,&x);
2088:   VecGetArray(yy,&y);
2089:   idx  = a->j;
2090:   v    = a->a;
2091:   ii   = a->i;

2093:   for (i=0; i<m; i++) {
2094:     jrow  = ii[i];
2095:     n     = ii[i+1] - jrow;
2096:     sum1  = 0.0;
2097:     sum2  = 0.0;
2098:     sum3  = 0.0;
2099:     sum4  = 0.0;
2100:     sum5  = 0.0;
2101:     sum6  = 0.0;
2102:     sum7  = 0.0;
2103:     sum8  = 0.0;
2104:     sum9  = 0.0;
2105:     sum10 = 0.0;
2106:     sum11 = 0.0;
2107:     sum12 = 0.0;
2108:     sum13 = 0.0;
2109:     sum14 = 0.0;
2110:     sum15 = 0.0;
2111:     sum16 = 0.0;

2113:     nonzerorow += (n>0);
2114:     for (j=0; j<n; j++) {
2115:       sum1  += v[jrow]*x[16*idx[jrow]];
2116:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2117:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2118:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2119:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2120:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2121:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2122:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2123:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2124:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2125:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2126:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2127:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2128:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2129:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2130:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2131:       jrow++;
2132:     }
2133:     y[16*i]    = sum1;
2134:     y[16*i+1]  = sum2;
2135:     y[16*i+2]  = sum3;
2136:     y[16*i+3]  = sum4;
2137:     y[16*i+4]  = sum5;
2138:     y[16*i+5]  = sum6;
2139:     y[16*i+6]  = sum7;
2140:     y[16*i+7]  = sum8;
2141:     y[16*i+8]  = sum9;
2142:     y[16*i+9]  = sum10;
2143:     y[16*i+10] = sum11;
2144:     y[16*i+11] = sum12;
2145:     y[16*i+12] = sum13;
2146:     y[16*i+13] = sum14;
2147:     y[16*i+14] = sum15;
2148:     y[16*i+15] = sum16;
2149:   }

2151:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2152:   VecRestoreArrayRead(xx,&x);
2153:   VecRestoreArray(yy,&y);
2154:   return(0);
2155: }

2157: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2158: {
2159:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2160:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2161:   const PetscScalar *x,*v;
2162:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2163:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2164:   PetscErrorCode    ierr;
2165:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2166:   PetscInt          n,i;

2169:   VecSet(yy,0.0);
2170:   VecGetArrayRead(xx,&x);
2171:   VecGetArray(yy,&y);

2173:   for (i=0; i<m; i++) {
2174:     idx     = a->j + a->i[i];
2175:     v       = a->a + a->i[i];
2176:     n       = a->i[i+1] - a->i[i];
2177:     alpha1  = x[16*i];
2178:     alpha2  = x[16*i+1];
2179:     alpha3  = x[16*i+2];
2180:     alpha4  = x[16*i+3];
2181:     alpha5  = x[16*i+4];
2182:     alpha6  = x[16*i+5];
2183:     alpha7  = x[16*i+6];
2184:     alpha8  = x[16*i+7];
2185:     alpha9  = x[16*i+8];
2186:     alpha10 = x[16*i+9];
2187:     alpha11 = x[16*i+10];
2188:     alpha12 = x[16*i+11];
2189:     alpha13 = x[16*i+12];
2190:     alpha14 = x[16*i+13];
2191:     alpha15 = x[16*i+14];
2192:     alpha16 = x[16*i+15];
2193:     while (n-->0) {
2194:       y[16*(*idx)]    += alpha1*(*v);
2195:       y[16*(*idx)+1]  += alpha2*(*v);
2196:       y[16*(*idx)+2]  += alpha3*(*v);
2197:       y[16*(*idx)+3]  += alpha4*(*v);
2198:       y[16*(*idx)+4]  += alpha5*(*v);
2199:       y[16*(*idx)+5]  += alpha6*(*v);
2200:       y[16*(*idx)+6]  += alpha7*(*v);
2201:       y[16*(*idx)+7]  += alpha8*(*v);
2202:       y[16*(*idx)+8]  += alpha9*(*v);
2203:       y[16*(*idx)+9]  += alpha10*(*v);
2204:       y[16*(*idx)+10] += alpha11*(*v);
2205:       y[16*(*idx)+11] += alpha12*(*v);
2206:       y[16*(*idx)+12] += alpha13*(*v);
2207:       y[16*(*idx)+13] += alpha14*(*v);
2208:       y[16*(*idx)+14] += alpha15*(*v);
2209:       y[16*(*idx)+15] += alpha16*(*v);
2210:       idx++; v++;
2211:     }
2212:   }
2213:   PetscLogFlops(32.0*a->nz);
2214:   VecRestoreArrayRead(xx,&x);
2215:   VecRestoreArray(yy,&y);
2216:   return(0);
2217: }

2219: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2220: {
2221:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2222:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2223:   const PetscScalar *x,*v;
2224:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2225:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2226:   PetscErrorCode    ierr;
2227:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2228:   PetscInt          n,i,jrow,j;

2231:   if (yy != zz) {VecCopy(yy,zz);}
2232:   VecGetArrayRead(xx,&x);
2233:   VecGetArray(zz,&y);
2234:   idx  = a->j;
2235:   v    = a->a;
2236:   ii   = a->i;

2238:   for (i=0; i<m; i++) {
2239:     jrow  = ii[i];
2240:     n     = ii[i+1] - jrow;
2241:     sum1  = 0.0;
2242:     sum2  = 0.0;
2243:     sum3  = 0.0;
2244:     sum4  = 0.0;
2245:     sum5  = 0.0;
2246:     sum6  = 0.0;
2247:     sum7  = 0.0;
2248:     sum8  = 0.0;
2249:     sum9  = 0.0;
2250:     sum10 = 0.0;
2251:     sum11 = 0.0;
2252:     sum12 = 0.0;
2253:     sum13 = 0.0;
2254:     sum14 = 0.0;
2255:     sum15 = 0.0;
2256:     sum16 = 0.0;
2257:     for (j=0; j<n; j++) {
2258:       sum1  += v[jrow]*x[16*idx[jrow]];
2259:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2260:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2261:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2262:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2263:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2264:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2265:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2266:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2267:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2268:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2269:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2270:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2271:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2272:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2273:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2274:       jrow++;
2275:     }
2276:     y[16*i]    += sum1;
2277:     y[16*i+1]  += sum2;
2278:     y[16*i+2]  += sum3;
2279:     y[16*i+3]  += sum4;
2280:     y[16*i+4]  += sum5;
2281:     y[16*i+5]  += sum6;
2282:     y[16*i+6]  += sum7;
2283:     y[16*i+7]  += sum8;
2284:     y[16*i+8]  += sum9;
2285:     y[16*i+9]  += sum10;
2286:     y[16*i+10] += sum11;
2287:     y[16*i+11] += sum12;
2288:     y[16*i+12] += sum13;
2289:     y[16*i+13] += sum14;
2290:     y[16*i+14] += sum15;
2291:     y[16*i+15] += sum16;
2292:   }

2294:   PetscLogFlops(32.0*a->nz);
2295:   VecRestoreArrayRead(xx,&x);
2296:   VecRestoreArray(zz,&y);
2297:   return(0);
2298: }

2300: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2301: {
2302:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2303:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304:   const PetscScalar *x,*v;
2305:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2306:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2307:   PetscErrorCode    ierr;
2308:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2309:   PetscInt          n,i;

2312:   if (yy != zz) {VecCopy(yy,zz);}
2313:   VecGetArrayRead(xx,&x);
2314:   VecGetArray(zz,&y);
2315:   for (i=0; i<m; i++) {
2316:     idx     = a->j + a->i[i];
2317:     v       = a->a + a->i[i];
2318:     n       = a->i[i+1] - a->i[i];
2319:     alpha1  = x[16*i];
2320:     alpha2  = x[16*i+1];
2321:     alpha3  = x[16*i+2];
2322:     alpha4  = x[16*i+3];
2323:     alpha5  = x[16*i+4];
2324:     alpha6  = x[16*i+5];
2325:     alpha7  = x[16*i+6];
2326:     alpha8  = x[16*i+7];
2327:     alpha9  = x[16*i+8];
2328:     alpha10 = x[16*i+9];
2329:     alpha11 = x[16*i+10];
2330:     alpha12 = x[16*i+11];
2331:     alpha13 = x[16*i+12];
2332:     alpha14 = x[16*i+13];
2333:     alpha15 = x[16*i+14];
2334:     alpha16 = x[16*i+15];
2335:     while (n-->0) {
2336:       y[16*(*idx)]    += alpha1*(*v);
2337:       y[16*(*idx)+1]  += alpha2*(*v);
2338:       y[16*(*idx)+2]  += alpha3*(*v);
2339:       y[16*(*idx)+3]  += alpha4*(*v);
2340:       y[16*(*idx)+4]  += alpha5*(*v);
2341:       y[16*(*idx)+5]  += alpha6*(*v);
2342:       y[16*(*idx)+6]  += alpha7*(*v);
2343:       y[16*(*idx)+7]  += alpha8*(*v);
2344:       y[16*(*idx)+8]  += alpha9*(*v);
2345:       y[16*(*idx)+9]  += alpha10*(*v);
2346:       y[16*(*idx)+10] += alpha11*(*v);
2347:       y[16*(*idx)+11] += alpha12*(*v);
2348:       y[16*(*idx)+12] += alpha13*(*v);
2349:       y[16*(*idx)+13] += alpha14*(*v);
2350:       y[16*(*idx)+14] += alpha15*(*v);
2351:       y[16*(*idx)+15] += alpha16*(*v);
2352:       idx++; v++;
2353:     }
2354:   }
2355:   PetscLogFlops(32.0*a->nz);
2356:   VecRestoreArrayRead(xx,&x);
2357:   VecRestoreArray(zz,&y);
2358:   return(0);
2359: }

2361: /*--------------------------------------------------------------------------------------------*/
2362: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2363: {
2364:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2365:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2366:   const PetscScalar *x,*v;
2367:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2368:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2369:   PetscErrorCode    ierr;
2370:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2371:   PetscInt          nonzerorow=0,n,i,jrow,j;

2374:   VecGetArrayRead(xx,&x);
2375:   VecGetArray(yy,&y);
2376:   idx  = a->j;
2377:   v    = a->a;
2378:   ii   = a->i;

2380:   for (i=0; i<m; i++) {
2381:     jrow  = ii[i];
2382:     n     = ii[i+1] - jrow;
2383:     sum1  = 0.0;
2384:     sum2  = 0.0;
2385:     sum3  = 0.0;
2386:     sum4  = 0.0;
2387:     sum5  = 0.0;
2388:     sum6  = 0.0;
2389:     sum7  = 0.0;
2390:     sum8  = 0.0;
2391:     sum9  = 0.0;
2392:     sum10 = 0.0;
2393:     sum11 = 0.0;
2394:     sum12 = 0.0;
2395:     sum13 = 0.0;
2396:     sum14 = 0.0;
2397:     sum15 = 0.0;
2398:     sum16 = 0.0;
2399:     sum17 = 0.0;
2400:     sum18 = 0.0;

2402:     nonzerorow += (n>0);
2403:     for (j=0; j<n; j++) {
2404:       sum1  += v[jrow]*x[18*idx[jrow]];
2405:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2406:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2407:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2408:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2409:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2410:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2411:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2412:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2413:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2414:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2415:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2416:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2417:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2418:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2419:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2420:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2421:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2422:       jrow++;
2423:     }
2424:     y[18*i]    = sum1;
2425:     y[18*i+1]  = sum2;
2426:     y[18*i+2]  = sum3;
2427:     y[18*i+3]  = sum4;
2428:     y[18*i+4]  = sum5;
2429:     y[18*i+5]  = sum6;
2430:     y[18*i+6]  = sum7;
2431:     y[18*i+7]  = sum8;
2432:     y[18*i+8]  = sum9;
2433:     y[18*i+9]  = sum10;
2434:     y[18*i+10] = sum11;
2435:     y[18*i+11] = sum12;
2436:     y[18*i+12] = sum13;
2437:     y[18*i+13] = sum14;
2438:     y[18*i+14] = sum15;
2439:     y[18*i+15] = sum16;
2440:     y[18*i+16] = sum17;
2441:     y[18*i+17] = sum18;
2442:   }

2444:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2445:   VecRestoreArrayRead(xx,&x);
2446:   VecRestoreArray(yy,&y);
2447:   return(0);
2448: }

2450: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451: {
2452:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2453:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2454:   const PetscScalar *x,*v;
2455:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2456:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2457:   PetscErrorCode    ierr;
2458:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2459:   PetscInt          n,i;

2462:   VecSet(yy,0.0);
2463:   VecGetArrayRead(xx,&x);
2464:   VecGetArray(yy,&y);

2466:   for (i=0; i<m; i++) {
2467:     idx     = a->j + a->i[i];
2468:     v       = a->a + a->i[i];
2469:     n       = a->i[i+1] - a->i[i];
2470:     alpha1  = x[18*i];
2471:     alpha2  = x[18*i+1];
2472:     alpha3  = x[18*i+2];
2473:     alpha4  = x[18*i+3];
2474:     alpha5  = x[18*i+4];
2475:     alpha6  = x[18*i+5];
2476:     alpha7  = x[18*i+6];
2477:     alpha8  = x[18*i+7];
2478:     alpha9  = x[18*i+8];
2479:     alpha10 = x[18*i+9];
2480:     alpha11 = x[18*i+10];
2481:     alpha12 = x[18*i+11];
2482:     alpha13 = x[18*i+12];
2483:     alpha14 = x[18*i+13];
2484:     alpha15 = x[18*i+14];
2485:     alpha16 = x[18*i+15];
2486:     alpha17 = x[18*i+16];
2487:     alpha18 = x[18*i+17];
2488:     while (n-->0) {
2489:       y[18*(*idx)]    += alpha1*(*v);
2490:       y[18*(*idx)+1]  += alpha2*(*v);
2491:       y[18*(*idx)+2]  += alpha3*(*v);
2492:       y[18*(*idx)+3]  += alpha4*(*v);
2493:       y[18*(*idx)+4]  += alpha5*(*v);
2494:       y[18*(*idx)+5]  += alpha6*(*v);
2495:       y[18*(*idx)+6]  += alpha7*(*v);
2496:       y[18*(*idx)+7]  += alpha8*(*v);
2497:       y[18*(*idx)+8]  += alpha9*(*v);
2498:       y[18*(*idx)+9]  += alpha10*(*v);
2499:       y[18*(*idx)+10] += alpha11*(*v);
2500:       y[18*(*idx)+11] += alpha12*(*v);
2501:       y[18*(*idx)+12] += alpha13*(*v);
2502:       y[18*(*idx)+13] += alpha14*(*v);
2503:       y[18*(*idx)+14] += alpha15*(*v);
2504:       y[18*(*idx)+15] += alpha16*(*v);
2505:       y[18*(*idx)+16] += alpha17*(*v);
2506:       y[18*(*idx)+17] += alpha18*(*v);
2507:       idx++; v++;
2508:     }
2509:   }
2510:   PetscLogFlops(36.0*a->nz);
2511:   VecRestoreArrayRead(xx,&x);
2512:   VecRestoreArray(yy,&y);
2513:   return(0);
2514: }

2516: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2517: {
2518:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2519:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2520:   const PetscScalar *x,*v;
2521:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2522:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2523:   PetscErrorCode    ierr;
2524:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2525:   PetscInt          n,i,jrow,j;

2528:   if (yy != zz) {VecCopy(yy,zz);}
2529:   VecGetArrayRead(xx,&x);
2530:   VecGetArray(zz,&y);
2531:   idx  = a->j;
2532:   v    = a->a;
2533:   ii   = a->i;

2535:   for (i=0; i<m; i++) {
2536:     jrow  = ii[i];
2537:     n     = ii[i+1] - jrow;
2538:     sum1  = 0.0;
2539:     sum2  = 0.0;
2540:     sum3  = 0.0;
2541:     sum4  = 0.0;
2542:     sum5  = 0.0;
2543:     sum6  = 0.0;
2544:     sum7  = 0.0;
2545:     sum8  = 0.0;
2546:     sum9  = 0.0;
2547:     sum10 = 0.0;
2548:     sum11 = 0.0;
2549:     sum12 = 0.0;
2550:     sum13 = 0.0;
2551:     sum14 = 0.0;
2552:     sum15 = 0.0;
2553:     sum16 = 0.0;
2554:     sum17 = 0.0;
2555:     sum18 = 0.0;
2556:     for (j=0; j<n; j++) {
2557:       sum1  += v[jrow]*x[18*idx[jrow]];
2558:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2559:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2560:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2561:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2562:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2563:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2564:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2565:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2566:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2567:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2568:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2569:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2570:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2571:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2572:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2573:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2574:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2575:       jrow++;
2576:     }
2577:     y[18*i]    += sum1;
2578:     y[18*i+1]  += sum2;
2579:     y[18*i+2]  += sum3;
2580:     y[18*i+3]  += sum4;
2581:     y[18*i+4]  += sum5;
2582:     y[18*i+5]  += sum6;
2583:     y[18*i+6]  += sum7;
2584:     y[18*i+7]  += sum8;
2585:     y[18*i+8]  += sum9;
2586:     y[18*i+9]  += sum10;
2587:     y[18*i+10] += sum11;
2588:     y[18*i+11] += sum12;
2589:     y[18*i+12] += sum13;
2590:     y[18*i+13] += sum14;
2591:     y[18*i+14] += sum15;
2592:     y[18*i+15] += sum16;
2593:     y[18*i+16] += sum17;
2594:     y[18*i+17] += sum18;
2595:   }

2597:   PetscLogFlops(36.0*a->nz);
2598:   VecRestoreArrayRead(xx,&x);
2599:   VecRestoreArray(zz,&y);
2600:   return(0);
2601: }

2603: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2604: {
2605:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2606:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2607:   const PetscScalar *x,*v;
2608:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2609:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2610:   PetscErrorCode    ierr;
2611:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2612:   PetscInt          n,i;

2615:   if (yy != zz) {VecCopy(yy,zz);}
2616:   VecGetArrayRead(xx,&x);
2617:   VecGetArray(zz,&y);
2618:   for (i=0; i<m; i++) {
2619:     idx     = a->j + a->i[i];
2620:     v       = a->a + a->i[i];
2621:     n       = a->i[i+1] - a->i[i];
2622:     alpha1  = x[18*i];
2623:     alpha2  = x[18*i+1];
2624:     alpha3  = x[18*i+2];
2625:     alpha4  = x[18*i+3];
2626:     alpha5  = x[18*i+4];
2627:     alpha6  = x[18*i+5];
2628:     alpha7  = x[18*i+6];
2629:     alpha8  = x[18*i+7];
2630:     alpha9  = x[18*i+8];
2631:     alpha10 = x[18*i+9];
2632:     alpha11 = x[18*i+10];
2633:     alpha12 = x[18*i+11];
2634:     alpha13 = x[18*i+12];
2635:     alpha14 = x[18*i+13];
2636:     alpha15 = x[18*i+14];
2637:     alpha16 = x[18*i+15];
2638:     alpha17 = x[18*i+16];
2639:     alpha18 = x[18*i+17];
2640:     while (n-->0) {
2641:       y[18*(*idx)]    += alpha1*(*v);
2642:       y[18*(*idx)+1]  += alpha2*(*v);
2643:       y[18*(*idx)+2]  += alpha3*(*v);
2644:       y[18*(*idx)+3]  += alpha4*(*v);
2645:       y[18*(*idx)+4]  += alpha5*(*v);
2646:       y[18*(*idx)+5]  += alpha6*(*v);
2647:       y[18*(*idx)+6]  += alpha7*(*v);
2648:       y[18*(*idx)+7]  += alpha8*(*v);
2649:       y[18*(*idx)+8]  += alpha9*(*v);
2650:       y[18*(*idx)+9]  += alpha10*(*v);
2651:       y[18*(*idx)+10] += alpha11*(*v);
2652:       y[18*(*idx)+11] += alpha12*(*v);
2653:       y[18*(*idx)+12] += alpha13*(*v);
2654:       y[18*(*idx)+13] += alpha14*(*v);
2655:       y[18*(*idx)+14] += alpha15*(*v);
2656:       y[18*(*idx)+15] += alpha16*(*v);
2657:       y[18*(*idx)+16] += alpha17*(*v);
2658:       y[18*(*idx)+17] += alpha18*(*v);
2659:       idx++; v++;
2660:     }
2661:   }
2662:   PetscLogFlops(36.0*a->nz);
2663:   VecRestoreArrayRead(xx,&x);
2664:   VecRestoreArray(zz,&y);
2665:   return(0);
2666: }

2668: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2669: {
2670:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2671:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2672:   const PetscScalar *x,*v;
2673:   PetscScalar       *y,*sums;
2674:   PetscErrorCode    ierr;
2675:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2676:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2679:   VecGetArrayRead(xx,&x);
2680:   VecSet(yy,0.0);
2681:   VecGetArray(yy,&y);
2682:   idx  = a->j;
2683:   v    = a->a;
2684:   ii   = a->i;

2686:   for (i=0; i<m; i++) {
2687:     jrow = ii[i];
2688:     n    = ii[i+1] - jrow;
2689:     sums = y + dof*i;
2690:     for (j=0; j<n; j++) {
2691:       for (k=0; k<dof; k++) {
2692:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2693:       }
2694:       jrow++;
2695:     }
2696:   }

2698:   PetscLogFlops(2.0*dof*a->nz);
2699:   VecRestoreArrayRead(xx,&x);
2700:   VecRestoreArray(yy,&y);
2701:   return(0);
2702: }

2704: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2705: {
2706:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2707:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2708:   const PetscScalar *x,*v;
2709:   PetscScalar       *y,*sums;
2710:   PetscErrorCode    ierr;
2711:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2712:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2715:   if (yy != zz) {VecCopy(yy,zz);}
2716:   VecGetArrayRead(xx,&x);
2717:   VecGetArray(zz,&y);
2718:   idx  = a->j;
2719:   v    = a->a;
2720:   ii   = a->i;

2722:   for (i=0; i<m; i++) {
2723:     jrow = ii[i];
2724:     n    = ii[i+1] - jrow;
2725:     sums = y + dof*i;
2726:     for (j=0; j<n; j++) {
2727:       for (k=0; k<dof; k++) {
2728:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2729:       }
2730:       jrow++;
2731:     }
2732:   }

2734:   PetscLogFlops(2.0*dof*a->nz);
2735:   VecRestoreArrayRead(xx,&x);
2736:   VecRestoreArray(zz,&y);
2737:   return(0);
2738: }

2740: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2741: {
2742:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2743:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2744:   const PetscScalar *x,*v,*alpha;
2745:   PetscScalar       *y;
2746:   PetscErrorCode    ierr;
2747:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2748:   PetscInt          n,i,k;

2751:   VecGetArrayRead(xx,&x);
2752:   VecSet(yy,0.0);
2753:   VecGetArray(yy,&y);
2754:   for (i=0; i<m; i++) {
2755:     idx   = a->j + a->i[i];
2756:     v     = a->a + a->i[i];
2757:     n     = a->i[i+1] - a->i[i];
2758:     alpha = x + dof*i;
2759:     while (n-->0) {
2760:       for (k=0; k<dof; k++) {
2761:         y[dof*(*idx)+k] += alpha[k]*(*v);
2762:       }
2763:       idx++; v++;
2764:     }
2765:   }
2766:   PetscLogFlops(2.0*dof*a->nz);
2767:   VecRestoreArrayRead(xx,&x);
2768:   VecRestoreArray(yy,&y);
2769:   return(0);
2770: }

2772: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2773: {
2774:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2775:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2776:   const PetscScalar *x,*v,*alpha;
2777:   PetscScalar       *y;
2778:   PetscErrorCode    ierr;
2779:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2780:   PetscInt          n,i,k;

2783:   if (yy != zz) {VecCopy(yy,zz);}
2784:   VecGetArrayRead(xx,&x);
2785:   VecGetArray(zz,&y);
2786:   for (i=0; i<m; i++) {
2787:     idx   = a->j + a->i[i];
2788:     v     = a->a + a->i[i];
2789:     n     = a->i[i+1] - a->i[i];
2790:     alpha = x + dof*i;
2791:     while (n-->0) {
2792:       for (k=0; k<dof; k++) {
2793:         y[dof*(*idx)+k] += alpha[k]*(*v);
2794:       }
2795:       idx++; v++;
2796:     }
2797:   }
2798:   PetscLogFlops(2.0*dof*a->nz);
2799:   VecRestoreArrayRead(xx,&x);
2800:   VecRestoreArray(zz,&y);
2801:   return(0);
2802: }

2804: /*===================================================================================*/
2805: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2806: {
2807:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2811:   /* start the scatter */
2812:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2813:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2814:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2815:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2816:   return(0);
2817: }

2819: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2820: {
2821:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2825:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2826:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2827:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2828:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2829:   return(0);
2830: }

2832: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2833: {
2834:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2838:   /* start the scatter */
2839:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2840:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2841:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2842:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2843:   return(0);
2844: }

2846: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2847: {
2848:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2852:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2853:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2854:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2855:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2856:   return(0);
2857: }

2859: /* ----------------------------------------------------------------*/
2860: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2861: {
2862:   Mat_Product    *product = C->product;

2865:   if (product->type == MATPRODUCT_PtAP) {
2866:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2867:   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2868:   return(0);
2869: }

2871: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2872: {
2874:   Mat_Product    *product = C->product;
2875:   PetscBool      flg = PETSC_FALSE;
2876:   Mat            A=product->A,P=product->B;
2877:   PetscInt       alg=1; /* set default algorithm */
2878: #if !defined(PETSC_HAVE_HYPRE)
2879:   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2880:   PetscInt       nalg=4;
2881: #else
2882:   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2883:   PetscInt       nalg=5;
2884: #endif

2887:   if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);

2889:   /* PtAP */
2890:   /* Check matrix local sizes */
2891:   if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2892:   if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);

2894:   /* Set the default algorithm */
2895:   PetscStrcmp(C->product->alg,"default",&flg);
2896:   if (flg) {
2897:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2898:   }

2900:   /* Get runtime option */
2901:   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2902:   PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2903:   if (flg) {
2904:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2905:   }
2906:   PetscOptionsEnd();

2908:   PetscStrcmp(C->product->alg,"allatonce",&flg);
2909:   if (flg) {
2910:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2911:     return(0);
2912:   }

2914:   PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2915:   if (flg) {
2916:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2917:     return(0);
2918:   }

2920:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2921:   PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2922:   MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2923:   MatProductSetFromOptions(C);
2924:   return(0);
2925: }

2927: /* ----------------------------------------------------------------*/
2928: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2929: {
2930:   PetscErrorCode     ierr;
2931:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2932:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2933:   Mat                P         =pp->AIJ;
2934:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2935:   PetscInt           *pti,*ptj,*ptJ;
2936:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2937:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2938:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2939:   MatScalar          *ca;
2940:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2943:   /* Get ij structure of P^T */
2944:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2946:   cn = pn*ppdof;
2947:   /* Allocate ci array, arrays for fill computation and */
2948:   /* free space for accumulating nonzero column info */
2949:   PetscMalloc1(cn+1,&ci);
2950:   ci[0] = 0;

2952:   /* Work arrays for rows of P^T*A */
2953:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2954:   PetscArrayzero(ptadenserow,an);
2955:   PetscArrayzero(denserow,cn);

2957:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2958:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2959:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2960:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2961:   current_space = free_space;

2963:   /* Determine symbolic info for each row of C: */
2964:   for (i=0; i<pn; i++) {
2965:     ptnzi = pti[i+1] - pti[i];
2966:     ptJ   = ptj + pti[i];
2967:     for (dof=0; dof<ppdof; dof++) {
2968:       ptanzi = 0;
2969:       /* Determine symbolic row of PtA: */
2970:       for (j=0; j<ptnzi; j++) {
2971:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2972:         arow = ptJ[j]*ppdof + dof;
2973:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2974:         anzj = ai[arow+1] - ai[arow];
2975:         ajj  = aj + ai[arow];
2976:         for (k=0; k<anzj; k++) {
2977:           if (!ptadenserow[ajj[k]]) {
2978:             ptadenserow[ajj[k]]    = -1;
2979:             ptasparserow[ptanzi++] = ajj[k];
2980:           }
2981:         }
2982:       }
2983:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2984:       ptaj = ptasparserow;
2985:       cnzi = 0;
2986:       for (j=0; j<ptanzi; j++) {
2987:         /* Get offset within block of P */
2988:         pshift = *ptaj%ppdof;
2989:         /* Get block row of P */
2990:         prow = (*ptaj++)/ppdof; /* integer division */
2991:         /* P has same number of nonzeros per row as the compressed form */
2992:         pnzj = pi[prow+1] - pi[prow];
2993:         pjj  = pj + pi[prow];
2994:         for (k=0;k<pnzj;k++) {
2995:           /* Locations in C are shifted by the offset within the block */
2996:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2997:           if (!denserow[pjj[k]*ppdof+pshift]) {
2998:             denserow[pjj[k]*ppdof+pshift] = -1;
2999:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3000:           }
3001:         }
3002:       }

3004:       /* sort sparserow */
3005:       PetscSortInt(cnzi,sparserow);

3007:       /* If free space is not available, make more free space */
3008:       /* Double the amount of total space in the list */
3009:       if (current_space->local_remaining<cnzi) {
3010:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
3011:       }

3013:       /* Copy data into free space, and zero out denserows */
3014:       PetscArraycpy(current_space->array,sparserow,cnzi);

3016:       current_space->array           += cnzi;
3017:       current_space->local_used      += cnzi;
3018:       current_space->local_remaining -= cnzi;

3020:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3021:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

3023:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3024:       /*        For now, we will recompute what is needed. */
3025:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3026:     }
3027:   }
3028:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3029:   /* Allocate space for cj, initialize cj, and */
3030:   /* destroy list of free space and other temporary array(s) */
3031:   PetscMalloc1(ci[cn]+1,&cj);
3032:   PetscFreeSpaceContiguous(&free_space,cj);
3033:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

3035:   /* Allocate space for ca */
3036:   PetscCalloc1(ci[cn]+1,&ca);

3038:   /* put together the new matrix */
3039:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
3040:   MatSetBlockSize(C,pp->dof);

3042:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3043:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3044:   c          = (Mat_SeqAIJ*)(C->data);
3045:   c->free_a  = PETSC_TRUE;
3046:   c->free_ij = PETSC_TRUE;
3047:   c->nonew   = 0;

3049:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3050:   C->ops->productnumeric = MatProductNumeric_PtAP;

3052:   /* Clean up. */
3053:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3054:   return(0);
3055: }

3057: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3058: {
3059:   /* This routine requires testing -- first draft only */
3060:   PetscErrorCode  ierr;
3061:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3062:   Mat             P  =pp->AIJ;
3063:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3064:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3065:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3066:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3067:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3068:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3069:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3070:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3071:   MatScalar       *ca=c->a,*caj,*apa;

3074:   /* Allocate temporary array for storage of one row of A*P */
3075:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

3077:   /* Clear old values in C */
3078:   PetscArrayzero(ca,ci[cm]);

3080:   for (i=0; i<am; i++) {
3081:     /* Form sparse row of A*P */
3082:     anzi  = ai[i+1] - ai[i];
3083:     apnzj = 0;
3084:     for (j=0; j<anzi; j++) {
3085:       /* Get offset within block of P */
3086:       pshift = *aj%ppdof;
3087:       /* Get block row of P */
3088:       prow = *aj++/ppdof;   /* integer division */
3089:       pnzj = pi[prow+1] - pi[prow];
3090:       pjj  = pj + pi[prow];
3091:       paj  = pa + pi[prow];
3092:       for (k=0; k<pnzj; k++) {
3093:         poffset = pjj[k]*ppdof+pshift;
3094:         if (!apjdense[poffset]) {
3095:           apjdense[poffset] = -1;
3096:           apj[apnzj++]      = poffset;
3097:         }
3098:         apa[poffset] += (*aa)*paj[k];
3099:       }
3100:       PetscLogFlops(2.0*pnzj);
3101:       aa++;
3102:     }

3104:     /* Sort the j index array for quick sparse axpy. */
3105:     /* Note: a array does not need sorting as it is in dense storage locations. */
3106:     PetscSortInt(apnzj,apj);

3108:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3109:     prow    = i/ppdof; /* integer division */
3110:     pshift  = i%ppdof;
3111:     poffset = pi[prow];
3112:     pnzi    = pi[prow+1] - poffset;
3113:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3114:     pJ = pj+poffset;
3115:     pA = pa+poffset;
3116:     for (j=0; j<pnzi; j++) {
3117:       crow = (*pJ)*ppdof+pshift;
3118:       cjj  = cj + ci[crow];
3119:       caj  = ca + ci[crow];
3120:       pJ++;
3121:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3122:       for (k=0,nextap=0; nextap<apnzj; k++) {
3123:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3124:       }
3125:       PetscLogFlops(2.0*apnzj);
3126:       pA++;
3127:     }

3129:     /* Zero the current row info for A*P */
3130:     for (j=0; j<apnzj; j++) {
3131:       apa[apj[j]]      = 0.;
3132:       apjdense[apj[j]] = 0;
3133:     }
3134:   }

3136:   /* Assemble the final matrix and clean up */
3137:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3138:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3139:   PetscFree3(apa,apj,apjdense);
3140:   return(0);
3141: }

3143: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3144: {
3145:   PetscErrorCode      ierr;
3146:   Mat_Product         *product = C->product;
3147:   Mat                 A=product->A,P=product->B;

3150:   MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3151:   return(0);
3152: }

3154: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3155: {
3157:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3158: }

3160: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3161: {
3163:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3164: }

3166: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);

3168: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3169: {
3170:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3171:   PetscErrorCode  ierr;


3175:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3176:   return(0);
3177: }

3179: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);

3181: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3182: {
3183:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3184:   PetscErrorCode  ierr;

3187:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3188:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3189:   return(0);
3190: }

3192: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);

3194: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3195: {
3196:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3197:   PetscErrorCode  ierr;


3201:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3202:   return(0);
3203: }

3205: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);

3207: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3208: {
3209:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3210:   PetscErrorCode  ierr;


3214:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3215:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3216:   return(0);
3217: }

3219: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3220: {
3221:   PetscErrorCode      ierr;
3222:   Mat_Product         *product = C->product;
3223:   Mat                 A=product->A,P=product->B;
3224:   PetscBool           flg;

3227:   PetscStrcmp(product->alg,"allatonce",&flg);
3228:   if (flg) {
3229:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3230:     C->ops->productnumeric = MatProductNumeric_PtAP;
3231:     return(0);
3232:   }

3234:   PetscStrcmp(product->alg,"allatonce_merged",&flg);
3235:   if (flg) {
3236:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3237:     C->ops->productnumeric = MatProductNumeric_PtAP;
3238:     return(0);
3239:   }

3241:   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3242: }

3244: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3245: {
3246:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3247:   Mat            a    = b->AIJ,B;
3248:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3250:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3251:   PetscInt       *cols;
3252:   PetscScalar    *vals;

3255:   MatGetSize(a,&m,&n);
3256:   PetscMalloc1(dof*m,&ilen);
3257:   for (i=0; i<m; i++) {
3258:     nmax = PetscMax(nmax,aij->ilen[i]);
3259:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3260:   }
3261:   MatCreate(PETSC_COMM_SELF,&B);
3262:   MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3263:   MatSetType(B,newtype);
3264:   MatSeqAIJSetPreallocation(B,0,ilen);
3265:   PetscFree(ilen);
3266:   PetscMalloc1(nmax,&icols);
3267:   ii   = 0;
3268:   for (i=0; i<m; i++) {
3269:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3270:     for (j=0; j<dof; j++) {
3271:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3272:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3273:       ii++;
3274:     }
3275:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3276:   }
3277:   PetscFree(icols);
3278:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3279:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3281:   if (reuse == MAT_INPLACE_MATRIX) {
3282:     MatHeaderReplace(A,&B);
3283:   } else {
3284:     *newmat = B;
3285:   }
3286:   return(0);
3287: }

3289: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3291: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3292: {
3293:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3294:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3295:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3296:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3297:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3298:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3299:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3300:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3301:   PetscInt       rstart,cstart,*garray,ii,k;
3303:   PetscScalar    *vals,*ovals;

3306:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3307:   for (i=0; i<A->rmap->n/dof; i++) {
3308:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3309:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3310:     for (j=0; j<dof; j++) {
3311:       dnz[dof*i+j] = AIJ->ilen[i];
3312:       onz[dof*i+j] = OAIJ->ilen[i];
3313:     }
3314:   }
3315:   MatCreate(PetscObjectComm((PetscObject)A),&B);
3316:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3317:   MatSetType(B,newtype);
3318:   MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3319:   MatSetBlockSize(B,dof);
3320:   PetscFree2(dnz,onz);

3322:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3323:   rstart = dof*maij->A->rmap->rstart;
3324:   cstart = dof*maij->A->cmap->rstart;
3325:   garray = mpiaij->garray;

3327:   ii = rstart;
3328:   for (i=0; i<A->rmap->n/dof; i++) {
3329:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3330:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3331:     for (j=0; j<dof; j++) {
3332:       for (k=0; k<ncols; k++) {
3333:         icols[k] = cstart + dof*cols[k]+j;
3334:       }
3335:       for (k=0; k<oncols; k++) {
3336:         oicols[k] = dof*garray[ocols[k]]+j;
3337:       }
3338:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3339:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3340:       ii++;
3341:     }
3342:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3343:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3344:   }
3345:   PetscFree2(icols,oicols);

3347:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3348:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3350:   if (reuse == MAT_INPLACE_MATRIX) {
3351:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3352:     ((PetscObject)A)->refct = 1;

3354:     MatHeaderReplace(A,&B);

3356:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3357:   } else {
3358:     *newmat = B;
3359:   }
3360:   return(0);
3361: }

3363: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3364: {
3366:   Mat            A;

3369:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3370:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3371:   MatDestroy(&A);
3372:   return(0);
3373: }

3375: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3376: {
3378:   Mat            A;

3381:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3382:   MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3383:   MatDestroy(&A);
3384:   return(0);
3385: }

3387: /* ---------------------------------------------------------------------------------- */
3388: /*@
3389:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3390:   operations for multicomponent problems.  It interpolates each component the same
3391:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3392:   and MATMPIAIJ for distributed matrices.

3394:   Collective

3396:   Input Parameters:
3397: + A - the AIJ matrix describing the action on blocks
3398: - dof - the block size (number of components per node)

3400:   Output Parameter:
3401: . maij - the new MAIJ matrix

3403:   Operations provided:
3404: + MatMult
3405: . MatMultTranspose
3406: . MatMultAdd
3407: . MatMultTransposeAdd
3408: - MatView

3410:   Level: advanced

3412: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3413: @*/
3414: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3415: {
3417:   PetscMPIInt    size;
3418:   PetscInt       n;
3419:   Mat            B;
3420: #if defined(PETSC_HAVE_CUDA)
3421:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3422:   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3423: #endif

3426:   dof  = PetscAbs(dof);
3427:   PetscObjectReference((PetscObject)A);

3429:   if (dof == 1) *maij = A;
3430:   else {
3431:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3432:     /* propagate vec type */
3433:     MatSetVecType(B,A->defaultvectype);
3434:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3435:     PetscLayoutSetBlockSize(B->rmap,dof);
3436:     PetscLayoutSetBlockSize(B->cmap,dof);
3437:     PetscLayoutSetUp(B->rmap);
3438:     PetscLayoutSetUp(B->cmap);

3440:     B->assembled = PETSC_TRUE;

3442:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3443:     if (size == 1) {
3444:       Mat_SeqMAIJ *b;

3446:       MatSetType(B,MATSEQMAIJ);

3448:       B->ops->setup   = NULL;
3449:       B->ops->destroy = MatDestroy_SeqMAIJ;
3450:       B->ops->view    = MatView_SeqMAIJ;

3452:       b               = (Mat_SeqMAIJ*)B->data;
3453:       b->dof          = dof;
3454:       b->AIJ          = A;

3456:       if (dof == 2) {
3457:         B->ops->mult             = MatMult_SeqMAIJ_2;
3458:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3459:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3460:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3461:       } else if (dof == 3) {
3462:         B->ops->mult             = MatMult_SeqMAIJ_3;
3463:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3464:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3465:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3466:       } else if (dof == 4) {
3467:         B->ops->mult             = MatMult_SeqMAIJ_4;
3468:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3469:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3470:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3471:       } else if (dof == 5) {
3472:         B->ops->mult             = MatMult_SeqMAIJ_5;
3473:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3474:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3475:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3476:       } else if (dof == 6) {
3477:         B->ops->mult             = MatMult_SeqMAIJ_6;
3478:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3479:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3480:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3481:       } else if (dof == 7) {
3482:         B->ops->mult             = MatMult_SeqMAIJ_7;
3483:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3484:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3485:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3486:       } else if (dof == 8) {
3487:         B->ops->mult             = MatMult_SeqMAIJ_8;
3488:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3489:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3490:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3491:       } else if (dof == 9) {
3492:         B->ops->mult             = MatMult_SeqMAIJ_9;
3493:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3494:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3495:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3496:       } else if (dof == 10) {
3497:         B->ops->mult             = MatMult_SeqMAIJ_10;
3498:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3499:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3500:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3501:       } else if (dof == 11) {
3502:         B->ops->mult             = MatMult_SeqMAIJ_11;
3503:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3504:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3505:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3506:       } else if (dof == 16) {
3507:         B->ops->mult             = MatMult_SeqMAIJ_16;
3508:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3509:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3510:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3511:       } else if (dof == 18) {
3512:         B->ops->mult             = MatMult_SeqMAIJ_18;
3513:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3514:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3515:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3516:       } else {
3517:         B->ops->mult             = MatMult_SeqMAIJ_N;
3518:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3519:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3520:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3521:       }
3522: #if defined(PETSC_HAVE_CUDA)
3523:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3524: #endif
3525:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3526:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3527:     } else {
3528:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3529:       Mat_MPIMAIJ *b;
3530:       IS          from,to;
3531:       Vec         gvec;

3533:       MatSetType(B,MATMPIMAIJ);

3535:       B->ops->setup            = NULL;
3536:       B->ops->destroy          = MatDestroy_MPIMAIJ;
3537:       B->ops->view             = MatView_MPIMAIJ;

3539:       b      = (Mat_MPIMAIJ*)B->data;
3540:       b->dof = dof;
3541:       b->A   = A;

3543:       MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3544:       MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);

3546:       VecGetSize(mpiaij->lvec,&n);
3547:       VecCreate(PETSC_COMM_SELF,&b->w);
3548:       VecSetSizes(b->w,n*dof,n*dof);
3549:       VecSetBlockSize(b->w,dof);
3550:       VecSetType(b->w,VECSEQ);

3552:       /* create two temporary Index sets for build scatter gather */
3553:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3554:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3556:       /* create temporary global vector to generate scatter context */
3557:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3559:       /* generate the scatter context */
3560:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3562:       ISDestroy(&from);
3563:       ISDestroy(&to);
3564:       VecDestroy(&gvec);

3566:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3567:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3568:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3569:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3571: #if defined(PETSC_HAVE_CUDA)
3572:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3573: #endif
3574:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3575:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3576:     }
3577:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3578:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3579:     MatSetUp(B);
3580: #if defined(PETSC_HAVE_CUDA)
3581:     /* temporary until we have CUDA implementation of MAIJ */
3582:     {
3583:       PetscBool flg;
3584:       if (convert) {
3585:         PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3586:         if (flg) {
3587:           MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3588:         }
3589:       }
3590:     }
3591: #endif
3592:     *maij = B;
3593:   }
3594:   return(0);
3595: }