Actual source code: maij.c

petsc-3.7.7 2017-09-25
Report Typos and Errors
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
 20: #include <../src/mat/utils/freespace.h>
 21: #include <petsc/private/vecimpl.h>

 25: /*@
 26:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 28:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 30:    Input Parameter:
 31: .  A - the MAIJ matrix

 33:    Output Parameter:
 34: .  B - the AIJ matrix

 36:    Level: advanced

 38:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 40: .seealso: MatCreateMAIJ()
 41: @*/
 42: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 43: {
 45:   PetscBool      ismpimaij,isseqmaij;

 48:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 49:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 50:   if (ismpimaij) {
 51:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 53:     *B = b->A;
 54:   } else if (isseqmaij) {
 55:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 57:     *B = b->AIJ;
 58:   } else {
 59:     *B = A;
 60:   }
 61:   return(0);
 62: }

 66: /*@
 67:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 69:    Logically Collective

 71:    Input Parameter:
 72: +  A - the MAIJ matrix
 73: -  dof - the block size for the new matrix

 75:    Output Parameter:
 76: .  B - the new MAIJ matrix

 78:    Level: advanced

 80: .seealso: MatCreateMAIJ()
 81: @*/
 82: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 83: {
 85:   Mat            Aij = NULL;

 89:   MatMAIJGetAIJ(A,&Aij);
 90:   MatCreateMAIJ(Aij,dof,B);
 91:   return(0);
 92: }

 96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 97: {
 99:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

102:   MatDestroy(&b->AIJ);
103:   PetscFree(A->data);
104:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
105:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
106:   return(0);
107: }

111: PetscErrorCode MatSetUp_MAIJ(Mat A)
112: {
114:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115:   return(0);
116: }

120: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121: {
123:   Mat            B;

126:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
127:   MatView(B,viewer);
128:   MatDestroy(&B);
129:   return(0);
130: }

134: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135: {
137:   Mat            B;

140:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
141:   MatView(B,viewer);
142:   MatDestroy(&B);
143:   return(0);
144: }

148: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149: {
151:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

154:   MatDestroy(&b->AIJ);
155:   MatDestroy(&b->OAIJ);
156:   MatDestroy(&b->A);
157:   VecScatterDestroy(&b->ctx);
158:   VecDestroy(&b->w);
159:   PetscFree(A->data);
160:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
161:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
162:   PetscObjectChangeTypeName((PetscObject)A,0);
163:   return(0);
164: }

166: /*MC
167:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168:   multicomponent problems, interpolating or restricting each component the same way independently.
169:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

171:   Operations provided:
172: . MatMult
173: . MatMultTranspose
174: . MatMultAdd
175: . MatMultTransposeAdd

177:   Level: advanced

179: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180: M*/

184: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
185: {
187:   Mat_MPIMAIJ    *b;
188:   PetscMPIInt    size;

191:   PetscNewLog(A,&b);
192:   A->data  = (void*)b;

194:   PetscMemzero(A->ops,sizeof(struct _MatOps));

196:   A->ops->setup = MatSetUp_MAIJ;

198:   b->AIJ  = 0;
199:   b->dof  = 0;
200:   b->OAIJ = 0;
201:   b->ctx  = 0;
202:   b->w    = 0;
203:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
204:   if (size == 1) {
205:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
206:   } else {
207:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
208:   }
209:   A->preallocated  = PETSC_TRUE;
210:   A->assembled     = PETSC_TRUE;
211:   return(0);
212: }

214: /* --------------------------------------------------------------------------------------*/
217: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
218: {
219:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
220:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
221:   const PetscScalar *x,*v;
222:   PetscScalar       *y, sum1, sum2;
223:   PetscErrorCode    ierr;
224:   PetscInt          nonzerorow=0,n,i,jrow,j;
225:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

228:   VecGetArrayRead(xx,&x);
229:   VecGetArray(yy,&y);
230:   idx  = a->j;
231:   v    = a->a;
232:   ii   = a->i;

234:   for (i=0; i<m; i++) {
235:     jrow  = ii[i];
236:     n     = ii[i+1] - jrow;
237:     sum1  = 0.0;
238:     sum2  = 0.0;

240:     nonzerorow += (n>0);
241:     for (j=0; j<n; j++) {
242:       sum1 += v[jrow]*x[2*idx[jrow]];
243:       sum2 += v[jrow]*x[2*idx[jrow]+1];
244:       jrow++;
245:     }
246:     y[2*i]   = sum1;
247:     y[2*i+1] = sum2;
248:   }

250:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
251:   VecRestoreArrayRead(xx,&x);
252:   VecRestoreArray(yy,&y);
253:   return(0);
254: }

258: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
259: {
260:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
261:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
262:   const PetscScalar *x,*v;
263:   PetscScalar       *y,alpha1,alpha2;
264:   PetscErrorCode    ierr;
265:   PetscInt          n,i;
266:   const PetscInt    m = b->AIJ->rmap->n,*idx;

269:   VecSet(yy,0.0);
270:   VecGetArrayRead(xx,&x);
271:   VecGetArray(yy,&y);

273:   for (i=0; i<m; i++) {
274:     idx    = a->j + a->i[i];
275:     v      = a->a + a->i[i];
276:     n      = a->i[i+1] - a->i[i];
277:     alpha1 = x[2*i];
278:     alpha2 = x[2*i+1];
279:     while (n-->0) {
280:       y[2*(*idx)]   += alpha1*(*v);
281:       y[2*(*idx)+1] += alpha2*(*v);
282:       idx++; v++;
283:     }
284:   }
285:   PetscLogFlops(4.0*a->nz);
286:   VecRestoreArrayRead(xx,&x);
287:   VecRestoreArray(yy,&y);
288:   return(0);
289: }

293: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
294: {
295:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
296:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
297:   const PetscScalar *x,*v;
298:   PetscScalar       *y,sum1, sum2;
299:   PetscErrorCode    ierr;
300:   PetscInt          n,i,jrow,j;
301:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

304:   if (yy != zz) {VecCopy(yy,zz);}
305:   VecGetArrayRead(xx,&x);
306:   VecGetArray(zz,&y);
307:   idx  = a->j;
308:   v    = a->a;
309:   ii   = a->i;

311:   for (i=0; i<m; i++) {
312:     jrow = ii[i];
313:     n    = ii[i+1] - jrow;
314:     sum1 = 0.0;
315:     sum2 = 0.0;
316:     for (j=0; j<n; j++) {
317:       sum1 += v[jrow]*x[2*idx[jrow]];
318:       sum2 += v[jrow]*x[2*idx[jrow]+1];
319:       jrow++;
320:     }
321:     y[2*i]   += sum1;
322:     y[2*i+1] += sum2;
323:   }

325:   PetscLogFlops(4.0*a->nz);
326:   VecRestoreArrayRead(xx,&x);
327:   VecRestoreArray(zz,&y);
328:   return(0);
329: }
332: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
333: {
334:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
335:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
336:   const PetscScalar *x,*v;
337:   PetscScalar       *y,alpha1,alpha2;
338:   PetscErrorCode    ierr;
339:   PetscInt          n,i;
340:   const PetscInt    m = b->AIJ->rmap->n,*idx;

343:   if (yy != zz) {VecCopy(yy,zz);}
344:   VecGetArrayRead(xx,&x);
345:   VecGetArray(zz,&y);

347:   for (i=0; i<m; i++) {
348:     idx    = a->j + a->i[i];
349:     v      = a->a + a->i[i];
350:     n      = a->i[i+1] - a->i[i];
351:     alpha1 = x[2*i];
352:     alpha2 = x[2*i+1];
353:     while (n-->0) {
354:       y[2*(*idx)]   += alpha1*(*v);
355:       y[2*(*idx)+1] += alpha2*(*v);
356:       idx++; v++;
357:     }
358:   }
359:   PetscLogFlops(4.0*a->nz);
360:   VecRestoreArrayRead(xx,&x);
361:   VecRestoreArray(zz,&y);
362:   return(0);
363: }
364: /* --------------------------------------------------------------------------------------*/
367: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
368: {
369:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
370:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
371:   const PetscScalar *x,*v;
372:   PetscScalar       *y,sum1, sum2, sum3;
373:   PetscErrorCode    ierr;
374:   PetscInt          nonzerorow=0,n,i,jrow,j;
375:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

378:   VecGetArrayRead(xx,&x);
379:   VecGetArray(yy,&y);
380:   idx  = a->j;
381:   v    = a->a;
382:   ii   = a->i;

384:   for (i=0; i<m; i++) {
385:     jrow  = ii[i];
386:     n     = ii[i+1] - jrow;
387:     sum1  = 0.0;
388:     sum2  = 0.0;
389:     sum3  = 0.0;

391:     nonzerorow += (n>0);
392:     for (j=0; j<n; j++) {
393:       sum1 += v[jrow]*x[3*idx[jrow]];
394:       sum2 += v[jrow]*x[3*idx[jrow]+1];
395:       sum3 += v[jrow]*x[3*idx[jrow]+2];
396:       jrow++;
397:      }
398:     y[3*i]   = sum1;
399:     y[3*i+1] = sum2;
400:     y[3*i+2] = sum3;
401:   }

403:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
404:   VecRestoreArrayRead(xx,&x);
405:   VecRestoreArray(yy,&y);
406:   return(0);
407: }

411: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
412: {
413:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
414:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
415:   const PetscScalar *x,*v;
416:   PetscScalar       *y,alpha1,alpha2,alpha3;
417:   PetscErrorCode    ierr;
418:   PetscInt          n,i;
419:   const PetscInt    m = b->AIJ->rmap->n,*idx;

422:   VecSet(yy,0.0);
423:   VecGetArrayRead(xx,&x);
424:   VecGetArray(yy,&y);

426:   for (i=0; i<m; i++) {
427:     idx    = a->j + a->i[i];
428:     v      = a->a + a->i[i];
429:     n      = a->i[i+1] - a->i[i];
430:     alpha1 = x[3*i];
431:     alpha2 = x[3*i+1];
432:     alpha3 = x[3*i+2];
433:     while (n-->0) {
434:       y[3*(*idx)]   += alpha1*(*v);
435:       y[3*(*idx)+1] += alpha2*(*v);
436:       y[3*(*idx)+2] += alpha3*(*v);
437:       idx++; v++;
438:     }
439:   }
440:   PetscLogFlops(6.0*a->nz);
441:   VecRestoreArrayRead(xx,&x);
442:   VecRestoreArray(yy,&y);
443:   return(0);
444: }

448: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
449: {
450:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
451:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
452:   const PetscScalar *x,*v;
453:   PetscScalar       *y,sum1, sum2, sum3;
454:   PetscErrorCode    ierr;
455:   PetscInt          n,i,jrow,j;
456:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

459:   if (yy != zz) {VecCopy(yy,zz);}
460:   VecGetArrayRead(xx,&x);
461:   VecGetArray(zz,&y);
462:   idx  = a->j;
463:   v    = a->a;
464:   ii   = a->i;

466:   for (i=0; i<m; i++) {
467:     jrow = ii[i];
468:     n    = ii[i+1] - jrow;
469:     sum1 = 0.0;
470:     sum2 = 0.0;
471:     sum3 = 0.0;
472:     for (j=0; j<n; j++) {
473:       sum1 += v[jrow]*x[3*idx[jrow]];
474:       sum2 += v[jrow]*x[3*idx[jrow]+1];
475:       sum3 += v[jrow]*x[3*idx[jrow]+2];
476:       jrow++;
477:     }
478:     y[3*i]   += sum1;
479:     y[3*i+1] += sum2;
480:     y[3*i+2] += sum3;
481:   }

483:   PetscLogFlops(6.0*a->nz);
484:   VecRestoreArrayRead(xx,&x);
485:   VecRestoreArray(zz,&y);
486:   return(0);
487: }
490: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
491: {
492:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
493:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
494:   const PetscScalar *x,*v;
495:   PetscScalar       *y,alpha1,alpha2,alpha3;
496:   PetscErrorCode    ierr;
497:   PetscInt          n,i;
498:   const PetscInt    m = b->AIJ->rmap->n,*idx;

501:   if (yy != zz) {VecCopy(yy,zz);}
502:   VecGetArrayRead(xx,&x);
503:   VecGetArray(zz,&y);
504:   for (i=0; i<m; i++) {
505:     idx    = a->j + a->i[i];
506:     v      = a->a + a->i[i];
507:     n      = a->i[i+1] - a->i[i];
508:     alpha1 = x[3*i];
509:     alpha2 = x[3*i+1];
510:     alpha3 = x[3*i+2];
511:     while (n-->0) {
512:       y[3*(*idx)]   += alpha1*(*v);
513:       y[3*(*idx)+1] += alpha2*(*v);
514:       y[3*(*idx)+2] += alpha3*(*v);
515:       idx++; v++;
516:     }
517:   }
518:   PetscLogFlops(6.0*a->nz);
519:   VecRestoreArrayRead(xx,&x);
520:   VecRestoreArray(zz,&y);
521:   return(0);
522: }

524: /* ------------------------------------------------------------------------------*/
527: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
528: {
529:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
530:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
531:   const PetscScalar *x,*v;
532:   PetscScalar       *y,sum1, sum2, sum3, sum4;
533:   PetscErrorCode    ierr;
534:   PetscInt          nonzerorow=0,n,i,jrow,j;
535:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

538:   VecGetArrayRead(xx,&x);
539:   VecGetArray(yy,&y);
540:   idx  = a->j;
541:   v    = a->a;
542:   ii   = a->i;

544:   for (i=0; i<m; i++) {
545:     jrow        = ii[i];
546:     n           = ii[i+1] - jrow;
547:     sum1        = 0.0;
548:     sum2        = 0.0;
549:     sum3        = 0.0;
550:     sum4        = 0.0;
551:     nonzerorow += (n>0);
552:     for (j=0; j<n; j++) {
553:       sum1 += v[jrow]*x[4*idx[jrow]];
554:       sum2 += v[jrow]*x[4*idx[jrow]+1];
555:       sum3 += v[jrow]*x[4*idx[jrow]+2];
556:       sum4 += v[jrow]*x[4*idx[jrow]+3];
557:       jrow++;
558:     }
559:     y[4*i]   = sum1;
560:     y[4*i+1] = sum2;
561:     y[4*i+2] = sum3;
562:     y[4*i+3] = sum4;
563:   }

565:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
566:   VecRestoreArrayRead(xx,&x);
567:   VecRestoreArray(yy,&y);
568:   return(0);
569: }

573: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
574: {
575:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
576:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
577:   const PetscScalar *x,*v;
578:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
579:   PetscErrorCode    ierr;
580:   PetscInt          n,i;
581:   const PetscInt    m = b->AIJ->rmap->n,*idx;

584:   VecSet(yy,0.0);
585:   VecGetArrayRead(xx,&x);
586:   VecGetArray(yy,&y);
587:   for (i=0; i<m; i++) {
588:     idx    = a->j + a->i[i];
589:     v      = a->a + a->i[i];
590:     n      = a->i[i+1] - a->i[i];
591:     alpha1 = x[4*i];
592:     alpha2 = x[4*i+1];
593:     alpha3 = x[4*i+2];
594:     alpha4 = x[4*i+3];
595:     while (n-->0) {
596:       y[4*(*idx)]   += alpha1*(*v);
597:       y[4*(*idx)+1] += alpha2*(*v);
598:       y[4*(*idx)+2] += alpha3*(*v);
599:       y[4*(*idx)+3] += alpha4*(*v);
600:       idx++; v++;
601:     }
602:   }
603:   PetscLogFlops(8.0*a->nz);
604:   VecRestoreArrayRead(xx,&x);
605:   VecRestoreArray(yy,&y);
606:   return(0);
607: }

611: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
612: {
613:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
614:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
615:   const PetscScalar *x,*v;
616:   PetscScalar       *y,sum1, sum2, sum3, sum4;
617:   PetscErrorCode    ierr;
618:   PetscInt          n,i,jrow,j;
619:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

622:   if (yy != zz) {VecCopy(yy,zz);}
623:   VecGetArrayRead(xx,&x);
624:   VecGetArray(zz,&y);
625:   idx  = a->j;
626:   v    = a->a;
627:   ii   = a->i;

629:   for (i=0; i<m; i++) {
630:     jrow = ii[i];
631:     n    = ii[i+1] - jrow;
632:     sum1 = 0.0;
633:     sum2 = 0.0;
634:     sum3 = 0.0;
635:     sum4 = 0.0;
636:     for (j=0; j<n; j++) {
637:       sum1 += v[jrow]*x[4*idx[jrow]];
638:       sum2 += v[jrow]*x[4*idx[jrow]+1];
639:       sum3 += v[jrow]*x[4*idx[jrow]+2];
640:       sum4 += v[jrow]*x[4*idx[jrow]+3];
641:       jrow++;
642:     }
643:     y[4*i]   += sum1;
644:     y[4*i+1] += sum2;
645:     y[4*i+2] += sum3;
646:     y[4*i+3] += sum4;
647:   }

649:   PetscLogFlops(8.0*a->nz);
650:   VecRestoreArrayRead(xx,&x);
651:   VecRestoreArray(zz,&y);
652:   return(0);
653: }
656: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
657: {
658:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
659:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
660:   const PetscScalar *x,*v;
661:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
662:   PetscErrorCode    ierr;
663:   PetscInt          n,i;
664:   const PetscInt    m = b->AIJ->rmap->n,*idx;

667:   if (yy != zz) {VecCopy(yy,zz);}
668:   VecGetArrayRead(xx,&x);
669:   VecGetArray(zz,&y);

671:   for (i=0; i<m; i++) {
672:     idx    = a->j + a->i[i];
673:     v      = a->a + a->i[i];
674:     n      = a->i[i+1] - a->i[i];
675:     alpha1 = x[4*i];
676:     alpha2 = x[4*i+1];
677:     alpha3 = x[4*i+2];
678:     alpha4 = x[4*i+3];
679:     while (n-->0) {
680:       y[4*(*idx)]   += alpha1*(*v);
681:       y[4*(*idx)+1] += alpha2*(*v);
682:       y[4*(*idx)+2] += alpha3*(*v);
683:       y[4*(*idx)+3] += alpha4*(*v);
684:       idx++; v++;
685:     }
686:   }
687:   PetscLogFlops(8.0*a->nz);
688:   VecRestoreArrayRead(xx,&x);
689:   VecRestoreArray(zz,&y);
690:   return(0);
691: }
692: /* ------------------------------------------------------------------------------*/

696: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
697: {
698:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
699:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
700:   const PetscScalar *x,*v;
701:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
702:   PetscErrorCode    ierr;
703:   PetscInt          nonzerorow=0,n,i,jrow,j;
704:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

707:   VecGetArrayRead(xx,&x);
708:   VecGetArray(yy,&y);
709:   idx  = a->j;
710:   v    = a->a;
711:   ii   = a->i;

713:   for (i=0; i<m; i++) {
714:     jrow  = ii[i];
715:     n     = ii[i+1] - jrow;
716:     sum1  = 0.0;
717:     sum2  = 0.0;
718:     sum3  = 0.0;
719:     sum4  = 0.0;
720:     sum5  = 0.0;

722:     nonzerorow += (n>0);
723:     for (j=0; j<n; j++) {
724:       sum1 += v[jrow]*x[5*idx[jrow]];
725:       sum2 += v[jrow]*x[5*idx[jrow]+1];
726:       sum3 += v[jrow]*x[5*idx[jrow]+2];
727:       sum4 += v[jrow]*x[5*idx[jrow]+3];
728:       sum5 += v[jrow]*x[5*idx[jrow]+4];
729:       jrow++;
730:     }
731:     y[5*i]   = sum1;
732:     y[5*i+1] = sum2;
733:     y[5*i+2] = sum3;
734:     y[5*i+3] = sum4;
735:     y[5*i+4] = sum5;
736:   }

738:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
739:   VecRestoreArrayRead(xx,&x);
740:   VecRestoreArray(yy,&y);
741:   return(0);
742: }

746: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
747: {
748:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
749:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
750:   const PetscScalar *x,*v;
751:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
752:   PetscErrorCode    ierr;
753:   PetscInt          n,i;
754:   const PetscInt    m = b->AIJ->rmap->n,*idx;

757:   VecSet(yy,0.0);
758:   VecGetArrayRead(xx,&x);
759:   VecGetArray(yy,&y);

761:   for (i=0; i<m; i++) {
762:     idx    = a->j + a->i[i];
763:     v      = a->a + a->i[i];
764:     n      = a->i[i+1] - a->i[i];
765:     alpha1 = x[5*i];
766:     alpha2 = x[5*i+1];
767:     alpha3 = x[5*i+2];
768:     alpha4 = x[5*i+3];
769:     alpha5 = x[5*i+4];
770:     while (n-->0) {
771:       y[5*(*idx)]   += alpha1*(*v);
772:       y[5*(*idx)+1] += alpha2*(*v);
773:       y[5*(*idx)+2] += alpha3*(*v);
774:       y[5*(*idx)+3] += alpha4*(*v);
775:       y[5*(*idx)+4] += alpha5*(*v);
776:       idx++; v++;
777:     }
778:   }
779:   PetscLogFlops(10.0*a->nz);
780:   VecRestoreArrayRead(xx,&x);
781:   VecRestoreArray(yy,&y);
782:   return(0);
783: }

787: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788: {
789:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
790:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
791:   const PetscScalar *x,*v;
792:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
793:   PetscErrorCode    ierr;
794:   PetscInt          n,i,jrow,j;
795:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

798:   if (yy != zz) {VecCopy(yy,zz);}
799:   VecGetArrayRead(xx,&x);
800:   VecGetArray(zz,&y);
801:   idx  = a->j;
802:   v    = a->a;
803:   ii   = a->i;

805:   for (i=0; i<m; i++) {
806:     jrow = ii[i];
807:     n    = ii[i+1] - jrow;
808:     sum1 = 0.0;
809:     sum2 = 0.0;
810:     sum3 = 0.0;
811:     sum4 = 0.0;
812:     sum5 = 0.0;
813:     for (j=0; j<n; j++) {
814:       sum1 += v[jrow]*x[5*idx[jrow]];
815:       sum2 += v[jrow]*x[5*idx[jrow]+1];
816:       sum3 += v[jrow]*x[5*idx[jrow]+2];
817:       sum4 += v[jrow]*x[5*idx[jrow]+3];
818:       sum5 += v[jrow]*x[5*idx[jrow]+4];
819:       jrow++;
820:     }
821:     y[5*i]   += sum1;
822:     y[5*i+1] += sum2;
823:     y[5*i+2] += sum3;
824:     y[5*i+3] += sum4;
825:     y[5*i+4] += sum5;
826:   }

828:   PetscLogFlops(10.0*a->nz);
829:   VecRestoreArrayRead(xx,&x);
830:   VecRestoreArray(zz,&y);
831:   return(0);
832: }

836: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
837: {
838:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
839:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
840:   const PetscScalar *x,*v;
841:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
842:   PetscErrorCode    ierr;
843:   PetscInt          n,i;
844:   const PetscInt    m = b->AIJ->rmap->n,*idx;

847:   if (yy != zz) {VecCopy(yy,zz);}
848:   VecGetArrayRead(xx,&x);
849:   VecGetArray(zz,&y);

851:   for (i=0; i<m; i++) {
852:     idx    = a->j + a->i[i];
853:     v      = a->a + a->i[i];
854:     n      = a->i[i+1] - a->i[i];
855:     alpha1 = x[5*i];
856:     alpha2 = x[5*i+1];
857:     alpha3 = x[5*i+2];
858:     alpha4 = x[5*i+3];
859:     alpha5 = x[5*i+4];
860:     while (n-->0) {
861:       y[5*(*idx)]   += alpha1*(*v);
862:       y[5*(*idx)+1] += alpha2*(*v);
863:       y[5*(*idx)+2] += alpha3*(*v);
864:       y[5*(*idx)+3] += alpha4*(*v);
865:       y[5*(*idx)+4] += alpha5*(*v);
866:       idx++; v++;
867:     }
868:   }
869:   PetscLogFlops(10.0*a->nz);
870:   VecRestoreArrayRead(xx,&x);
871:   VecRestoreArray(zz,&y);
872:   return(0);
873: }

875: /* ------------------------------------------------------------------------------*/
878: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879: {
880:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
881:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
882:   const PetscScalar *x,*v;
883:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
884:   PetscErrorCode    ierr;
885:   PetscInt          nonzerorow=0,n,i,jrow,j;
886:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

889:   VecGetArrayRead(xx,&x);
890:   VecGetArray(yy,&y);
891:   idx  = a->j;
892:   v    = a->a;
893:   ii   = a->i;

895:   for (i=0; i<m; i++) {
896:     jrow  = ii[i];
897:     n     = ii[i+1] - jrow;
898:     sum1  = 0.0;
899:     sum2  = 0.0;
900:     sum3  = 0.0;
901:     sum4  = 0.0;
902:     sum5  = 0.0;
903:     sum6  = 0.0;

905:     nonzerorow += (n>0);
906:     for (j=0; j<n; j++) {
907:       sum1 += v[jrow]*x[6*idx[jrow]];
908:       sum2 += v[jrow]*x[6*idx[jrow]+1];
909:       sum3 += v[jrow]*x[6*idx[jrow]+2];
910:       sum4 += v[jrow]*x[6*idx[jrow]+3];
911:       sum5 += v[jrow]*x[6*idx[jrow]+4];
912:       sum6 += v[jrow]*x[6*idx[jrow]+5];
913:       jrow++;
914:     }
915:     y[6*i]   = sum1;
916:     y[6*i+1] = sum2;
917:     y[6*i+2] = sum3;
918:     y[6*i+3] = sum4;
919:     y[6*i+4] = sum5;
920:     y[6*i+5] = sum6;
921:   }

923:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
924:   VecRestoreArrayRead(xx,&x);
925:   VecRestoreArray(yy,&y);
926:   return(0);
927: }

931: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
932: {
933:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
934:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
935:   const PetscScalar *x,*v;
936:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
937:   PetscErrorCode    ierr;
938:   PetscInt          n,i;
939:   const PetscInt    m = b->AIJ->rmap->n,*idx;

942:   VecSet(yy,0.0);
943:   VecGetArrayRead(xx,&x);
944:   VecGetArray(yy,&y);

946:   for (i=0; i<m; i++) {
947:     idx    = a->j + a->i[i];
948:     v      = a->a + a->i[i];
949:     n      = a->i[i+1] - a->i[i];
950:     alpha1 = x[6*i];
951:     alpha2 = x[6*i+1];
952:     alpha3 = x[6*i+2];
953:     alpha4 = x[6*i+3];
954:     alpha5 = x[6*i+4];
955:     alpha6 = x[6*i+5];
956:     while (n-->0) {
957:       y[6*(*idx)]   += alpha1*(*v);
958:       y[6*(*idx)+1] += alpha2*(*v);
959:       y[6*(*idx)+2] += alpha3*(*v);
960:       y[6*(*idx)+3] += alpha4*(*v);
961:       y[6*(*idx)+4] += alpha5*(*v);
962:       y[6*(*idx)+5] += alpha6*(*v);
963:       idx++; v++;
964:     }
965:   }
966:   PetscLogFlops(12.0*a->nz);
967:   VecRestoreArrayRead(xx,&x);
968:   VecRestoreArray(yy,&y);
969:   return(0);
970: }

974: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
975: {
976:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
977:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
978:   const PetscScalar *x,*v;
979:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
980:   PetscErrorCode    ierr;
981:   PetscInt          n,i,jrow,j;
982:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

985:   if (yy != zz) {VecCopy(yy,zz);}
986:   VecGetArrayRead(xx,&x);
987:   VecGetArray(zz,&y);
988:   idx  = a->j;
989:   v    = a->a;
990:   ii   = a->i;

992:   for (i=0; i<m; i++) {
993:     jrow = ii[i];
994:     n    = ii[i+1] - jrow;
995:     sum1 = 0.0;
996:     sum2 = 0.0;
997:     sum3 = 0.0;
998:     sum4 = 0.0;
999:     sum5 = 0.0;
1000:     sum6 = 0.0;
1001:     for (j=0; j<n; j++) {
1002:       sum1 += v[jrow]*x[6*idx[jrow]];
1003:       sum2 += v[jrow]*x[6*idx[jrow]+1];
1004:       sum3 += v[jrow]*x[6*idx[jrow]+2];
1005:       sum4 += v[jrow]*x[6*idx[jrow]+3];
1006:       sum5 += v[jrow]*x[6*idx[jrow]+4];
1007:       sum6 += v[jrow]*x[6*idx[jrow]+5];
1008:       jrow++;
1009:     }
1010:     y[6*i]   += sum1;
1011:     y[6*i+1] += sum2;
1012:     y[6*i+2] += sum3;
1013:     y[6*i+3] += sum4;
1014:     y[6*i+4] += sum5;
1015:     y[6*i+5] += sum6;
1016:   }

1018:   PetscLogFlops(12.0*a->nz);
1019:   VecRestoreArrayRead(xx,&x);
1020:   VecRestoreArray(zz,&y);
1021:   return(0);
1022: }

1026: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1027: {
1028:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1029:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1030:   const PetscScalar *x,*v;
1031:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1032:   PetscErrorCode    ierr;
1033:   PetscInt          n,i;
1034:   const PetscInt    m = b->AIJ->rmap->n,*idx;

1037:   if (yy != zz) {VecCopy(yy,zz);}
1038:   VecGetArrayRead(xx,&x);
1039:   VecGetArray(zz,&y);

1041:   for (i=0; i<m; i++) {
1042:     idx    = a->j + a->i[i];
1043:     v      = a->a + a->i[i];
1044:     n      = a->i[i+1] - a->i[i];
1045:     alpha1 = x[6*i];
1046:     alpha2 = x[6*i+1];
1047:     alpha3 = x[6*i+2];
1048:     alpha4 = x[6*i+3];
1049:     alpha5 = x[6*i+4];
1050:     alpha6 = x[6*i+5];
1051:     while (n-->0) {
1052:       y[6*(*idx)]   += alpha1*(*v);
1053:       y[6*(*idx)+1] += alpha2*(*v);
1054:       y[6*(*idx)+2] += alpha3*(*v);
1055:       y[6*(*idx)+3] += alpha4*(*v);
1056:       y[6*(*idx)+4] += alpha5*(*v);
1057:       y[6*(*idx)+5] += alpha6*(*v);
1058:       idx++; v++;
1059:     }
1060:   }
1061:   PetscLogFlops(12.0*a->nz);
1062:   VecRestoreArrayRead(xx,&x);
1063:   VecRestoreArray(zz,&y);
1064:   return(0);
1065: }

1067: /* ------------------------------------------------------------------------------*/
1070: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1071: {
1072:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1073:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1074:   const PetscScalar *x,*v;
1075:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1076:   PetscErrorCode    ierr;
1077:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1078:   PetscInt          nonzerorow=0,n,i,jrow,j;

1081:   VecGetArrayRead(xx,&x);
1082:   VecGetArray(yy,&y);
1083:   idx  = a->j;
1084:   v    = a->a;
1085:   ii   = a->i;

1087:   for (i=0; i<m; i++) {
1088:     jrow  = ii[i];
1089:     n     = ii[i+1] - jrow;
1090:     sum1  = 0.0;
1091:     sum2  = 0.0;
1092:     sum3  = 0.0;
1093:     sum4  = 0.0;
1094:     sum5  = 0.0;
1095:     sum6  = 0.0;
1096:     sum7  = 0.0;

1098:     nonzerorow += (n>0);
1099:     for (j=0; j<n; j++) {
1100:       sum1 += v[jrow]*x[7*idx[jrow]];
1101:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1102:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1103:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1104:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1105:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1106:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1107:       jrow++;
1108:     }
1109:     y[7*i]   = sum1;
1110:     y[7*i+1] = sum2;
1111:     y[7*i+2] = sum3;
1112:     y[7*i+3] = sum4;
1113:     y[7*i+4] = sum5;
1114:     y[7*i+5] = sum6;
1115:     y[7*i+6] = sum7;
1116:   }

1118:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1119:   VecRestoreArrayRead(xx,&x);
1120:   VecRestoreArray(yy,&y);
1121:   return(0);
1122: }

1126: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1127: {
1128:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1129:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1130:   const PetscScalar *x,*v;
1131:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1132:   PetscErrorCode    ierr;
1133:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1134:   PetscInt          n,i;

1137:   VecSet(yy,0.0);
1138:   VecGetArrayRead(xx,&x);
1139:   VecGetArray(yy,&y);

1141:   for (i=0; i<m; i++) {
1142:     idx    = a->j + a->i[i];
1143:     v      = a->a + a->i[i];
1144:     n      = a->i[i+1] - a->i[i];
1145:     alpha1 = x[7*i];
1146:     alpha2 = x[7*i+1];
1147:     alpha3 = x[7*i+2];
1148:     alpha4 = x[7*i+3];
1149:     alpha5 = x[7*i+4];
1150:     alpha6 = x[7*i+5];
1151:     alpha7 = x[7*i+6];
1152:     while (n-->0) {
1153:       y[7*(*idx)]   += alpha1*(*v);
1154:       y[7*(*idx)+1] += alpha2*(*v);
1155:       y[7*(*idx)+2] += alpha3*(*v);
1156:       y[7*(*idx)+3] += alpha4*(*v);
1157:       y[7*(*idx)+4] += alpha5*(*v);
1158:       y[7*(*idx)+5] += alpha6*(*v);
1159:       y[7*(*idx)+6] += alpha7*(*v);
1160:       idx++; v++;
1161:     }
1162:   }
1163:   PetscLogFlops(14.0*a->nz);
1164:   VecRestoreArrayRead(xx,&x);
1165:   VecRestoreArray(yy,&y);
1166:   return(0);
1167: }

1171: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1172: {
1173:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1174:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1175:   const PetscScalar *x,*v;
1176:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1177:   PetscErrorCode    ierr;
1178:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1179:   PetscInt          n,i,jrow,j;

1182:   if (yy != zz) {VecCopy(yy,zz);}
1183:   VecGetArrayRead(xx,&x);
1184:   VecGetArray(zz,&y);
1185:   idx  = a->j;
1186:   v    = a->a;
1187:   ii   = a->i;

1189:   for (i=0; i<m; i++) {
1190:     jrow = ii[i];
1191:     n    = ii[i+1] - jrow;
1192:     sum1 = 0.0;
1193:     sum2 = 0.0;
1194:     sum3 = 0.0;
1195:     sum4 = 0.0;
1196:     sum5 = 0.0;
1197:     sum6 = 0.0;
1198:     sum7 = 0.0;
1199:     for (j=0; j<n; j++) {
1200:       sum1 += v[jrow]*x[7*idx[jrow]];
1201:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1202:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1203:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1204:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1205:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1206:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1207:       jrow++;
1208:     }
1209:     y[7*i]   += sum1;
1210:     y[7*i+1] += sum2;
1211:     y[7*i+2] += sum3;
1212:     y[7*i+3] += sum4;
1213:     y[7*i+4] += sum5;
1214:     y[7*i+5] += sum6;
1215:     y[7*i+6] += sum7;
1216:   }

1218:   PetscLogFlops(14.0*a->nz);
1219:   VecRestoreArrayRead(xx,&x);
1220:   VecRestoreArray(zz,&y);
1221:   return(0);
1222: }

1226: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1227: {
1228:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1229:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1230:   const PetscScalar *x,*v;
1231:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1232:   PetscErrorCode    ierr;
1233:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1234:   PetscInt          n,i;

1237:   if (yy != zz) {VecCopy(yy,zz);}
1238:   VecGetArrayRead(xx,&x);
1239:   VecGetArray(zz,&y);
1240:   for (i=0; i<m; i++) {
1241:     idx    = a->j + a->i[i];
1242:     v      = a->a + a->i[i];
1243:     n      = a->i[i+1] - a->i[i];
1244:     alpha1 = x[7*i];
1245:     alpha2 = x[7*i+1];
1246:     alpha3 = x[7*i+2];
1247:     alpha4 = x[7*i+3];
1248:     alpha5 = x[7*i+4];
1249:     alpha6 = x[7*i+5];
1250:     alpha7 = x[7*i+6];
1251:     while (n-->0) {
1252:       y[7*(*idx)]   += alpha1*(*v);
1253:       y[7*(*idx)+1] += alpha2*(*v);
1254:       y[7*(*idx)+2] += alpha3*(*v);
1255:       y[7*(*idx)+3] += alpha4*(*v);
1256:       y[7*(*idx)+4] += alpha5*(*v);
1257:       y[7*(*idx)+5] += alpha6*(*v);
1258:       y[7*(*idx)+6] += alpha7*(*v);
1259:       idx++; v++;
1260:     }
1261:   }
1262:   PetscLogFlops(14.0*a->nz);
1263:   VecRestoreArrayRead(xx,&x);
1264:   VecRestoreArray(zz,&y);
1265:   return(0);
1266: }

1270: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1271: {
1272:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1273:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1274:   const PetscScalar *x,*v;
1275:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276:   PetscErrorCode    ierr;
1277:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1278:   PetscInt          nonzerorow=0,n,i,jrow,j;

1281:   VecGetArrayRead(xx,&x);
1282:   VecGetArray(yy,&y);
1283:   idx  = a->j;
1284:   v    = a->a;
1285:   ii   = a->i;

1287:   for (i=0; i<m; i++) {
1288:     jrow  = ii[i];
1289:     n     = ii[i+1] - jrow;
1290:     sum1  = 0.0;
1291:     sum2  = 0.0;
1292:     sum3  = 0.0;
1293:     sum4  = 0.0;
1294:     sum5  = 0.0;
1295:     sum6  = 0.0;
1296:     sum7  = 0.0;
1297:     sum8  = 0.0;

1299:     nonzerorow += (n>0);
1300:     for (j=0; j<n; j++) {
1301:       sum1 += v[jrow]*x[8*idx[jrow]];
1302:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1303:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1304:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1305:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1306:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1307:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1308:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1309:       jrow++;
1310:     }
1311:     y[8*i]   = sum1;
1312:     y[8*i+1] = sum2;
1313:     y[8*i+2] = sum3;
1314:     y[8*i+3] = sum4;
1315:     y[8*i+4] = sum5;
1316:     y[8*i+5] = sum6;
1317:     y[8*i+6] = sum7;
1318:     y[8*i+7] = sum8;
1319:   }

1321:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1322:   VecRestoreArrayRead(xx,&x);
1323:   VecRestoreArray(yy,&y);
1324:   return(0);
1325: }

1329: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1330: {
1331:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1332:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1333:   const PetscScalar *x,*v;
1334:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335:   PetscErrorCode    ierr;
1336:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1337:   PetscInt          n,i;

1340:   VecSet(yy,0.0);
1341:   VecGetArrayRead(xx,&x);
1342:   VecGetArray(yy,&y);

1344:   for (i=0; i<m; i++) {
1345:     idx    = a->j + a->i[i];
1346:     v      = a->a + a->i[i];
1347:     n      = a->i[i+1] - a->i[i];
1348:     alpha1 = x[8*i];
1349:     alpha2 = x[8*i+1];
1350:     alpha3 = x[8*i+2];
1351:     alpha4 = x[8*i+3];
1352:     alpha5 = x[8*i+4];
1353:     alpha6 = x[8*i+5];
1354:     alpha7 = x[8*i+6];
1355:     alpha8 = x[8*i+7];
1356:     while (n-->0) {
1357:       y[8*(*idx)]   += alpha1*(*v);
1358:       y[8*(*idx)+1] += alpha2*(*v);
1359:       y[8*(*idx)+2] += alpha3*(*v);
1360:       y[8*(*idx)+3] += alpha4*(*v);
1361:       y[8*(*idx)+4] += alpha5*(*v);
1362:       y[8*(*idx)+5] += alpha6*(*v);
1363:       y[8*(*idx)+6] += alpha7*(*v);
1364:       y[8*(*idx)+7] += alpha8*(*v);
1365:       idx++; v++;
1366:     }
1367:   }
1368:   PetscLogFlops(16.0*a->nz);
1369:   VecRestoreArrayRead(xx,&x);
1370:   VecRestoreArray(yy,&y);
1371:   return(0);
1372: }

1376: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1377: {
1378:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1379:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1380:   const PetscScalar *x,*v;
1381:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1382:   PetscErrorCode    ierr;
1383:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1384:   PetscInt          n,i,jrow,j;

1387:   if (yy != zz) {VecCopy(yy,zz);}
1388:   VecGetArrayRead(xx,&x);
1389:   VecGetArray(zz,&y);
1390:   idx  = a->j;
1391:   v    = a->a;
1392:   ii   = a->i;

1394:   for (i=0; i<m; i++) {
1395:     jrow = ii[i];
1396:     n    = ii[i+1] - jrow;
1397:     sum1 = 0.0;
1398:     sum2 = 0.0;
1399:     sum3 = 0.0;
1400:     sum4 = 0.0;
1401:     sum5 = 0.0;
1402:     sum6 = 0.0;
1403:     sum7 = 0.0;
1404:     sum8 = 0.0;
1405:     for (j=0; j<n; j++) {
1406:       sum1 += v[jrow]*x[8*idx[jrow]];
1407:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1408:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1409:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1410:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1411:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1412:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1413:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1414:       jrow++;
1415:     }
1416:     y[8*i]   += sum1;
1417:     y[8*i+1] += sum2;
1418:     y[8*i+2] += sum3;
1419:     y[8*i+3] += sum4;
1420:     y[8*i+4] += sum5;
1421:     y[8*i+5] += sum6;
1422:     y[8*i+6] += sum7;
1423:     y[8*i+7] += sum8;
1424:   }

1426:   PetscLogFlops(16.0*a->nz);
1427:   VecRestoreArrayRead(xx,&x);
1428:   VecRestoreArray(zz,&y);
1429:   return(0);
1430: }

1434: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1435: {
1436:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1437:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1438:   const PetscScalar *x,*v;
1439:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1440:   PetscErrorCode    ierr;
1441:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1442:   PetscInt          n,i;

1445:   if (yy != zz) {VecCopy(yy,zz);}
1446:   VecGetArrayRead(xx,&x);
1447:   VecGetArray(zz,&y);
1448:   for (i=0; i<m; i++) {
1449:     idx    = a->j + a->i[i];
1450:     v      = a->a + a->i[i];
1451:     n      = a->i[i+1] - a->i[i];
1452:     alpha1 = x[8*i];
1453:     alpha2 = x[8*i+1];
1454:     alpha3 = x[8*i+2];
1455:     alpha4 = x[8*i+3];
1456:     alpha5 = x[8*i+4];
1457:     alpha6 = x[8*i+5];
1458:     alpha7 = x[8*i+6];
1459:     alpha8 = x[8*i+7];
1460:     while (n-->0) {
1461:       y[8*(*idx)]   += alpha1*(*v);
1462:       y[8*(*idx)+1] += alpha2*(*v);
1463:       y[8*(*idx)+2] += alpha3*(*v);
1464:       y[8*(*idx)+3] += alpha4*(*v);
1465:       y[8*(*idx)+4] += alpha5*(*v);
1466:       y[8*(*idx)+5] += alpha6*(*v);
1467:       y[8*(*idx)+6] += alpha7*(*v);
1468:       y[8*(*idx)+7] += alpha8*(*v);
1469:       idx++; v++;
1470:     }
1471:   }
1472:   PetscLogFlops(16.0*a->nz);
1473:   VecRestoreArrayRead(xx,&x);
1474:   VecRestoreArray(zz,&y);
1475:   return(0);
1476: }

1478: /* ------------------------------------------------------------------------------*/
1481: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1482: {
1483:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1484:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1485:   const PetscScalar *x,*v;
1486:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1487:   PetscErrorCode    ierr;
1488:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1489:   PetscInt          nonzerorow=0,n,i,jrow,j;

1492:   VecGetArrayRead(xx,&x);
1493:   VecGetArray(yy,&y);
1494:   idx  = a->j;
1495:   v    = a->a;
1496:   ii   = a->i;

1498:   for (i=0; i<m; i++) {
1499:     jrow  = ii[i];
1500:     n     = ii[i+1] - jrow;
1501:     sum1  = 0.0;
1502:     sum2  = 0.0;
1503:     sum3  = 0.0;
1504:     sum4  = 0.0;
1505:     sum5  = 0.0;
1506:     sum6  = 0.0;
1507:     sum7  = 0.0;
1508:     sum8  = 0.0;
1509:     sum9  = 0.0;

1511:     nonzerorow += (n>0);
1512:     for (j=0; j<n; j++) {
1513:       sum1 += v[jrow]*x[9*idx[jrow]];
1514:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1515:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1516:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1517:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1518:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1519:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1520:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1521:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1522:       jrow++;
1523:     }
1524:     y[9*i]   = sum1;
1525:     y[9*i+1] = sum2;
1526:     y[9*i+2] = sum3;
1527:     y[9*i+3] = sum4;
1528:     y[9*i+4] = sum5;
1529:     y[9*i+5] = sum6;
1530:     y[9*i+6] = sum7;
1531:     y[9*i+7] = sum8;
1532:     y[9*i+8] = sum9;
1533:   }

1535:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1536:   VecRestoreArrayRead(xx,&x);
1537:   VecRestoreArray(yy,&y);
1538:   return(0);
1539: }

1541: /* ------------------------------------------------------------------------------*/

1545: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1546: {
1547:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1548:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1549:   const PetscScalar *x,*v;
1550:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1551:   PetscErrorCode    ierr;
1552:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1553:   PetscInt          n,i;

1556:   VecSet(yy,0.0);
1557:   VecGetArrayRead(xx,&x);
1558:   VecGetArray(yy,&y);

1560:   for (i=0; i<m; i++) {
1561:     idx    = a->j + a->i[i];
1562:     v      = a->a + a->i[i];
1563:     n      = a->i[i+1] - a->i[i];
1564:     alpha1 = x[9*i];
1565:     alpha2 = x[9*i+1];
1566:     alpha3 = x[9*i+2];
1567:     alpha4 = x[9*i+3];
1568:     alpha5 = x[9*i+4];
1569:     alpha6 = x[9*i+5];
1570:     alpha7 = x[9*i+6];
1571:     alpha8 = x[9*i+7];
1572:     alpha9 = x[9*i+8];
1573:     while (n-->0) {
1574:       y[9*(*idx)]   += alpha1*(*v);
1575:       y[9*(*idx)+1] += alpha2*(*v);
1576:       y[9*(*idx)+2] += alpha3*(*v);
1577:       y[9*(*idx)+3] += alpha4*(*v);
1578:       y[9*(*idx)+4] += alpha5*(*v);
1579:       y[9*(*idx)+5] += alpha6*(*v);
1580:       y[9*(*idx)+6] += alpha7*(*v);
1581:       y[9*(*idx)+7] += alpha8*(*v);
1582:       y[9*(*idx)+8] += alpha9*(*v);
1583:       idx++; v++;
1584:     }
1585:   }
1586:   PetscLogFlops(18.0*a->nz);
1587:   VecRestoreArrayRead(xx,&x);
1588:   VecRestoreArray(yy,&y);
1589:   return(0);
1590: }

1594: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1595: {
1596:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1597:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1598:   const PetscScalar *x,*v;
1599:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1600:   PetscErrorCode    ierr;
1601:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1602:   PetscInt          n,i,jrow,j;

1605:   if (yy != zz) {VecCopy(yy,zz);}
1606:   VecGetArrayRead(xx,&x);
1607:   VecGetArray(zz,&y);
1608:   idx  = a->j;
1609:   v    = a->a;
1610:   ii   = a->i;

1612:   for (i=0; i<m; i++) {
1613:     jrow = ii[i];
1614:     n    = ii[i+1] - jrow;
1615:     sum1 = 0.0;
1616:     sum2 = 0.0;
1617:     sum3 = 0.0;
1618:     sum4 = 0.0;
1619:     sum5 = 0.0;
1620:     sum6 = 0.0;
1621:     sum7 = 0.0;
1622:     sum8 = 0.0;
1623:     sum9 = 0.0;
1624:     for (j=0; j<n; j++) {
1625:       sum1 += v[jrow]*x[9*idx[jrow]];
1626:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1627:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1628:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1629:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1630:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1631:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1632:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1633:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1634:       jrow++;
1635:     }
1636:     y[9*i]   += sum1;
1637:     y[9*i+1] += sum2;
1638:     y[9*i+2] += sum3;
1639:     y[9*i+3] += sum4;
1640:     y[9*i+4] += sum5;
1641:     y[9*i+5] += sum6;
1642:     y[9*i+6] += sum7;
1643:     y[9*i+7] += sum8;
1644:     y[9*i+8] += sum9;
1645:   }

1647:   PetscLogFlops(18*a->nz);
1648:   VecRestoreArrayRead(xx,&x);
1649:   VecRestoreArray(zz,&y);
1650:   return(0);
1651: }

1655: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1656: {
1657:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1658:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1659:   const PetscScalar *x,*v;
1660:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1661:   PetscErrorCode    ierr;
1662:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1663:   PetscInt          n,i;

1666:   if (yy != zz) {VecCopy(yy,zz);}
1667:   VecGetArrayRead(xx,&x);
1668:   VecGetArray(zz,&y);
1669:   for (i=0; i<m; i++) {
1670:     idx    = a->j + a->i[i];
1671:     v      = a->a + a->i[i];
1672:     n      = a->i[i+1] - a->i[i];
1673:     alpha1 = x[9*i];
1674:     alpha2 = x[9*i+1];
1675:     alpha3 = x[9*i+2];
1676:     alpha4 = x[9*i+3];
1677:     alpha5 = x[9*i+4];
1678:     alpha6 = x[9*i+5];
1679:     alpha7 = x[9*i+6];
1680:     alpha8 = x[9*i+7];
1681:     alpha9 = x[9*i+8];
1682:     while (n-->0) {
1683:       y[9*(*idx)]   += alpha1*(*v);
1684:       y[9*(*idx)+1] += alpha2*(*v);
1685:       y[9*(*idx)+2] += alpha3*(*v);
1686:       y[9*(*idx)+3] += alpha4*(*v);
1687:       y[9*(*idx)+4] += alpha5*(*v);
1688:       y[9*(*idx)+5] += alpha6*(*v);
1689:       y[9*(*idx)+6] += alpha7*(*v);
1690:       y[9*(*idx)+7] += alpha8*(*v);
1691:       y[9*(*idx)+8] += alpha9*(*v);
1692:       idx++; v++;
1693:     }
1694:   }
1695:   PetscLogFlops(18.0*a->nz);
1696:   VecRestoreArrayRead(xx,&x);
1697:   VecRestoreArray(zz,&y);
1698:   return(0);
1699: }
1702: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1703: {
1704:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1705:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1706:   const PetscScalar *x,*v;
1707:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1708:   PetscErrorCode    ierr;
1709:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1710:   PetscInt          nonzerorow=0,n,i,jrow,j;

1713:   VecGetArrayRead(xx,&x);
1714:   VecGetArray(yy,&y);
1715:   idx  = a->j;
1716:   v    = a->a;
1717:   ii   = a->i;

1719:   for (i=0; i<m; i++) {
1720:     jrow  = ii[i];
1721:     n     = ii[i+1] - jrow;
1722:     sum1  = 0.0;
1723:     sum2  = 0.0;
1724:     sum3  = 0.0;
1725:     sum4  = 0.0;
1726:     sum5  = 0.0;
1727:     sum6  = 0.0;
1728:     sum7  = 0.0;
1729:     sum8  = 0.0;
1730:     sum9  = 0.0;
1731:     sum10 = 0.0;

1733:     nonzerorow += (n>0);
1734:     for (j=0; j<n; j++) {
1735:       sum1  += v[jrow]*x[10*idx[jrow]];
1736:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1737:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1738:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1739:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1740:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1741:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1742:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1743:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1744:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1745:       jrow++;
1746:     }
1747:     y[10*i]   = sum1;
1748:     y[10*i+1] = sum2;
1749:     y[10*i+2] = sum3;
1750:     y[10*i+3] = sum4;
1751:     y[10*i+4] = sum5;
1752:     y[10*i+5] = sum6;
1753:     y[10*i+6] = sum7;
1754:     y[10*i+7] = sum8;
1755:     y[10*i+8] = sum9;
1756:     y[10*i+9] = sum10;
1757:   }

1759:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1760:   VecRestoreArrayRead(xx,&x);
1761:   VecRestoreArray(yy,&y);
1762:   return(0);
1763: }

1767: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1768: {
1769:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1770:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1771:   const PetscScalar *x,*v;
1772:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1773:   PetscErrorCode    ierr;
1774:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1775:   PetscInt          n,i,jrow,j;

1778:   if (yy != zz) {VecCopy(yy,zz);}
1779:   VecGetArrayRead(xx,&x);
1780:   VecGetArray(zz,&y);
1781:   idx  = a->j;
1782:   v    = a->a;
1783:   ii   = a->i;

1785:   for (i=0; i<m; i++) {
1786:     jrow  = ii[i];
1787:     n     = ii[i+1] - jrow;
1788:     sum1  = 0.0;
1789:     sum2  = 0.0;
1790:     sum3  = 0.0;
1791:     sum4  = 0.0;
1792:     sum5  = 0.0;
1793:     sum6  = 0.0;
1794:     sum7  = 0.0;
1795:     sum8  = 0.0;
1796:     sum9  = 0.0;
1797:     sum10 = 0.0;
1798:     for (j=0; j<n; j++) {
1799:       sum1  += v[jrow]*x[10*idx[jrow]];
1800:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1801:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1802:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1803:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1804:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1805:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1806:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1807:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1808:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1809:       jrow++;
1810:     }
1811:     y[10*i]   += sum1;
1812:     y[10*i+1] += sum2;
1813:     y[10*i+2] += sum3;
1814:     y[10*i+3] += sum4;
1815:     y[10*i+4] += sum5;
1816:     y[10*i+5] += sum6;
1817:     y[10*i+6] += sum7;
1818:     y[10*i+7] += sum8;
1819:     y[10*i+8] += sum9;
1820:     y[10*i+9] += sum10;
1821:   }

1823:   PetscLogFlops(20.0*a->nz);
1824:   VecRestoreArrayRead(xx,&x);
1825:   VecRestoreArray(yy,&y);
1826:   return(0);
1827: }

1831: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1832: {
1833:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1834:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1835:   const PetscScalar *x,*v;
1836:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1837:   PetscErrorCode    ierr;
1838:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1839:   PetscInt          n,i;

1842:   VecSet(yy,0.0);
1843:   VecGetArrayRead(xx,&x);
1844:   VecGetArray(yy,&y);

1846:   for (i=0; i<m; i++) {
1847:     idx     = a->j + a->i[i];
1848:     v       = a->a + a->i[i];
1849:     n       = a->i[i+1] - a->i[i];
1850:     alpha1  = x[10*i];
1851:     alpha2  = x[10*i+1];
1852:     alpha3  = x[10*i+2];
1853:     alpha4  = x[10*i+3];
1854:     alpha5  = x[10*i+4];
1855:     alpha6  = x[10*i+5];
1856:     alpha7  = x[10*i+6];
1857:     alpha8  = x[10*i+7];
1858:     alpha9  = x[10*i+8];
1859:     alpha10 = x[10*i+9];
1860:     while (n-->0) {
1861:       y[10*(*idx)]   += alpha1*(*v);
1862:       y[10*(*idx)+1] += alpha2*(*v);
1863:       y[10*(*idx)+2] += alpha3*(*v);
1864:       y[10*(*idx)+3] += alpha4*(*v);
1865:       y[10*(*idx)+4] += alpha5*(*v);
1866:       y[10*(*idx)+5] += alpha6*(*v);
1867:       y[10*(*idx)+6] += alpha7*(*v);
1868:       y[10*(*idx)+7] += alpha8*(*v);
1869:       y[10*(*idx)+8] += alpha9*(*v);
1870:       y[10*(*idx)+9] += alpha10*(*v);
1871:       idx++; v++;
1872:     }
1873:   }
1874:   PetscLogFlops(20.0*a->nz);
1875:   VecRestoreArrayRead(xx,&x);
1876:   VecRestoreArray(yy,&y);
1877:   return(0);
1878: }

1882: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1883: {
1884:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1885:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1886:   const PetscScalar *x,*v;
1887:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1888:   PetscErrorCode    ierr;
1889:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1890:   PetscInt          n,i;

1893:   if (yy != zz) {VecCopy(yy,zz);}
1894:   VecGetArrayRead(xx,&x);
1895:   VecGetArray(zz,&y);
1896:   for (i=0; i<m; i++) {
1897:     idx     = a->j + a->i[i];
1898:     v       = a->a + a->i[i];
1899:     n       = a->i[i+1] - a->i[i];
1900:     alpha1  = x[10*i];
1901:     alpha2  = x[10*i+1];
1902:     alpha3  = x[10*i+2];
1903:     alpha4  = x[10*i+3];
1904:     alpha5  = x[10*i+4];
1905:     alpha6  = x[10*i+5];
1906:     alpha7  = x[10*i+6];
1907:     alpha8  = x[10*i+7];
1908:     alpha9  = x[10*i+8];
1909:     alpha10 = x[10*i+9];
1910:     while (n-->0) {
1911:       y[10*(*idx)]   += alpha1*(*v);
1912:       y[10*(*idx)+1] += alpha2*(*v);
1913:       y[10*(*idx)+2] += alpha3*(*v);
1914:       y[10*(*idx)+3] += alpha4*(*v);
1915:       y[10*(*idx)+4] += alpha5*(*v);
1916:       y[10*(*idx)+5] += alpha6*(*v);
1917:       y[10*(*idx)+6] += alpha7*(*v);
1918:       y[10*(*idx)+7] += alpha8*(*v);
1919:       y[10*(*idx)+8] += alpha9*(*v);
1920:       y[10*(*idx)+9] += alpha10*(*v);
1921:       idx++; v++;
1922:     }
1923:   }
1924:   PetscLogFlops(20.0*a->nz);
1925:   VecRestoreArrayRead(xx,&x);
1926:   VecRestoreArray(zz,&y);
1927:   return(0);
1928: }


1931: /*--------------------------------------------------------------------------------------------*/
1934: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1935: {
1936:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1937:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1938:   const PetscScalar *x,*v;
1939:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1940:   PetscErrorCode    ierr;
1941:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1942:   PetscInt          nonzerorow=0,n,i,jrow,j;

1945:   VecGetArrayRead(xx,&x);
1946:   VecGetArray(yy,&y);
1947:   idx  = a->j;
1948:   v    = a->a;
1949:   ii   = a->i;

1951:   for (i=0; i<m; i++) {
1952:     jrow  = ii[i];
1953:     n     = ii[i+1] - jrow;
1954:     sum1  = 0.0;
1955:     sum2  = 0.0;
1956:     sum3  = 0.0;
1957:     sum4  = 0.0;
1958:     sum5  = 0.0;
1959:     sum6  = 0.0;
1960:     sum7  = 0.0;
1961:     sum8  = 0.0;
1962:     sum9  = 0.0;
1963:     sum10 = 0.0;
1964:     sum11 = 0.0;

1966:     nonzerorow += (n>0);
1967:     for (j=0; j<n; j++) {
1968:       sum1  += v[jrow]*x[11*idx[jrow]];
1969:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1970:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1971:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1972:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1973:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1974:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1975:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1976:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1977:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1978:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1979:       jrow++;
1980:     }
1981:     y[11*i]    = sum1;
1982:     y[11*i+1]  = sum2;
1983:     y[11*i+2]  = sum3;
1984:     y[11*i+3]  = sum4;
1985:     y[11*i+4]  = sum5;
1986:     y[11*i+5]  = sum6;
1987:     y[11*i+6]  = sum7;
1988:     y[11*i+7]  = sum8;
1989:     y[11*i+8]  = sum9;
1990:     y[11*i+9]  = sum10;
1991:     y[11*i+10] = sum11;
1992:   }

1994:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1995:   VecRestoreArrayRead(xx,&x);
1996:   VecRestoreArray(yy,&y);
1997:   return(0);
1998: }

2002: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2003: {
2004:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2005:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2006:   const PetscScalar *x,*v;
2007:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2008:   PetscErrorCode    ierr;
2009:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2010:   PetscInt          n,i,jrow,j;

2013:   if (yy != zz) {VecCopy(yy,zz);}
2014:   VecGetArrayRead(xx,&x);
2015:   VecGetArray(zz,&y);
2016:   idx  = a->j;
2017:   v    = a->a;
2018:   ii   = a->i;

2020:   for (i=0; i<m; i++) {
2021:     jrow  = ii[i];
2022:     n     = ii[i+1] - jrow;
2023:     sum1  = 0.0;
2024:     sum2  = 0.0;
2025:     sum3  = 0.0;
2026:     sum4  = 0.0;
2027:     sum5  = 0.0;
2028:     sum6  = 0.0;
2029:     sum7  = 0.0;
2030:     sum8  = 0.0;
2031:     sum9  = 0.0;
2032:     sum10 = 0.0;
2033:     sum11 = 0.0;
2034:     for (j=0; j<n; j++) {
2035:       sum1  += v[jrow]*x[11*idx[jrow]];
2036:       sum2  += v[jrow]*x[11*idx[jrow]+1];
2037:       sum3  += v[jrow]*x[11*idx[jrow]+2];
2038:       sum4  += v[jrow]*x[11*idx[jrow]+3];
2039:       sum5  += v[jrow]*x[11*idx[jrow]+4];
2040:       sum6  += v[jrow]*x[11*idx[jrow]+5];
2041:       sum7  += v[jrow]*x[11*idx[jrow]+6];
2042:       sum8  += v[jrow]*x[11*idx[jrow]+7];
2043:       sum9  += v[jrow]*x[11*idx[jrow]+8];
2044:       sum10 += v[jrow]*x[11*idx[jrow]+9];
2045:       sum11 += v[jrow]*x[11*idx[jrow]+10];
2046:       jrow++;
2047:     }
2048:     y[11*i]    += sum1;
2049:     y[11*i+1]  += sum2;
2050:     y[11*i+2]  += sum3;
2051:     y[11*i+3]  += sum4;
2052:     y[11*i+4]  += sum5;
2053:     y[11*i+5]  += sum6;
2054:     y[11*i+6]  += sum7;
2055:     y[11*i+7]  += sum8;
2056:     y[11*i+8]  += sum9;
2057:     y[11*i+9]  += sum10;
2058:     y[11*i+10] += sum11;
2059:   }

2061:   PetscLogFlops(22*a->nz);
2062:   VecRestoreArrayRead(xx,&x);
2063:   VecRestoreArray(yy,&y);
2064:   return(0);
2065: }

2069: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2070: {
2071:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2072:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2073:   const PetscScalar *x,*v;
2074:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2075:   PetscErrorCode    ierr;
2076:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2077:   PetscInt          n,i;

2080:   VecSet(yy,0.0);
2081:   VecGetArrayRead(xx,&x);
2082:   VecGetArray(yy,&y);

2084:   for (i=0; i<m; i++) {
2085:     idx     = a->j + a->i[i];
2086:     v       = a->a + a->i[i];
2087:     n       = a->i[i+1] - a->i[i];
2088:     alpha1  = x[11*i];
2089:     alpha2  = x[11*i+1];
2090:     alpha3  = x[11*i+2];
2091:     alpha4  = x[11*i+3];
2092:     alpha5  = x[11*i+4];
2093:     alpha6  = x[11*i+5];
2094:     alpha7  = x[11*i+6];
2095:     alpha8  = x[11*i+7];
2096:     alpha9  = x[11*i+8];
2097:     alpha10 = x[11*i+9];
2098:     alpha11 = x[11*i+10];
2099:     while (n-->0) {
2100:       y[11*(*idx)]    += alpha1*(*v);
2101:       y[11*(*idx)+1]  += alpha2*(*v);
2102:       y[11*(*idx)+2]  += alpha3*(*v);
2103:       y[11*(*idx)+3]  += alpha4*(*v);
2104:       y[11*(*idx)+4]  += alpha5*(*v);
2105:       y[11*(*idx)+5]  += alpha6*(*v);
2106:       y[11*(*idx)+6]  += alpha7*(*v);
2107:       y[11*(*idx)+7]  += alpha8*(*v);
2108:       y[11*(*idx)+8]  += alpha9*(*v);
2109:       y[11*(*idx)+9]  += alpha10*(*v);
2110:       y[11*(*idx)+10] += alpha11*(*v);
2111:       idx++; v++;
2112:     }
2113:   }
2114:   PetscLogFlops(22*a->nz);
2115:   VecRestoreArrayRead(xx,&x);
2116:   VecRestoreArray(yy,&y);
2117:   return(0);
2118: }

2122: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2123: {
2124:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2125:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2126:   const PetscScalar *x,*v;
2127:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2128:   PetscErrorCode    ierr;
2129:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2130:   PetscInt          n,i;

2133:   if (yy != zz) {VecCopy(yy,zz);}
2134:   VecGetArrayRead(xx,&x);
2135:   VecGetArray(zz,&y);
2136:   for (i=0; i<m; i++) {
2137:     idx     = a->j + a->i[i];
2138:     v       = a->a + a->i[i];
2139:     n       = a->i[i+1] - a->i[i];
2140:     alpha1  = x[11*i];
2141:     alpha2  = x[11*i+1];
2142:     alpha3  = x[11*i+2];
2143:     alpha4  = x[11*i+3];
2144:     alpha5  = x[11*i+4];
2145:     alpha6  = x[11*i+5];
2146:     alpha7  = x[11*i+6];
2147:     alpha8  = x[11*i+7];
2148:     alpha9  = x[11*i+8];
2149:     alpha10 = x[11*i+9];
2150:     alpha11 = x[11*i+10];
2151:     while (n-->0) {
2152:       y[11*(*idx)]    += alpha1*(*v);
2153:       y[11*(*idx)+1]  += alpha2*(*v);
2154:       y[11*(*idx)+2]  += alpha3*(*v);
2155:       y[11*(*idx)+3]  += alpha4*(*v);
2156:       y[11*(*idx)+4]  += alpha5*(*v);
2157:       y[11*(*idx)+5]  += alpha6*(*v);
2158:       y[11*(*idx)+6]  += alpha7*(*v);
2159:       y[11*(*idx)+7]  += alpha8*(*v);
2160:       y[11*(*idx)+8]  += alpha9*(*v);
2161:       y[11*(*idx)+9]  += alpha10*(*v);
2162:       y[11*(*idx)+10] += alpha11*(*v);
2163:       idx++; v++;
2164:     }
2165:   }
2166:   PetscLogFlops(22*a->nz);
2167:   VecRestoreArrayRead(xx,&x);
2168:   VecRestoreArray(zz,&y);
2169:   return(0);
2170: }


2173: /*--------------------------------------------------------------------------------------------*/
2176: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2177: {
2178:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2179:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2180:   const PetscScalar *x,*v;
2181:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2182:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2183:   PetscErrorCode    ierr;
2184:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2185:   PetscInt          nonzerorow=0,n,i,jrow,j;

2188:   VecGetArrayRead(xx,&x);
2189:   VecGetArray(yy,&y);
2190:   idx  = a->j;
2191:   v    = a->a;
2192:   ii   = a->i;

2194:   for (i=0; i<m; i++) {
2195:     jrow  = ii[i];
2196:     n     = ii[i+1] - jrow;
2197:     sum1  = 0.0;
2198:     sum2  = 0.0;
2199:     sum3  = 0.0;
2200:     sum4  = 0.0;
2201:     sum5  = 0.0;
2202:     sum6  = 0.0;
2203:     sum7  = 0.0;
2204:     sum8  = 0.0;
2205:     sum9  = 0.0;
2206:     sum10 = 0.0;
2207:     sum11 = 0.0;
2208:     sum12 = 0.0;
2209:     sum13 = 0.0;
2210:     sum14 = 0.0;
2211:     sum15 = 0.0;
2212:     sum16 = 0.0;

2214:     nonzerorow += (n>0);
2215:     for (j=0; j<n; j++) {
2216:       sum1  += v[jrow]*x[16*idx[jrow]];
2217:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2218:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2219:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2220:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2221:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2222:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2223:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2224:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2225:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2226:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2227:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2228:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2229:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2230:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2231:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2232:       jrow++;
2233:     }
2234:     y[16*i]    = sum1;
2235:     y[16*i+1]  = sum2;
2236:     y[16*i+2]  = sum3;
2237:     y[16*i+3]  = sum4;
2238:     y[16*i+4]  = sum5;
2239:     y[16*i+5]  = sum6;
2240:     y[16*i+6]  = sum7;
2241:     y[16*i+7]  = sum8;
2242:     y[16*i+8]  = sum9;
2243:     y[16*i+9]  = sum10;
2244:     y[16*i+10] = sum11;
2245:     y[16*i+11] = sum12;
2246:     y[16*i+12] = sum13;
2247:     y[16*i+13] = sum14;
2248:     y[16*i+14] = sum15;
2249:     y[16*i+15] = sum16;
2250:   }

2252:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2253:   VecRestoreArrayRead(xx,&x);
2254:   VecRestoreArray(yy,&y);
2255:   return(0);
2256: }

2260: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2261: {
2262:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2263:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2264:   const PetscScalar *x,*v;
2265:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2266:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2267:   PetscErrorCode    ierr;
2268:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2269:   PetscInt          n,i;

2272:   VecSet(yy,0.0);
2273:   VecGetArrayRead(xx,&x);
2274:   VecGetArray(yy,&y);

2276:   for (i=0; i<m; i++) {
2277:     idx     = a->j + a->i[i];
2278:     v       = a->a + a->i[i];
2279:     n       = a->i[i+1] - a->i[i];
2280:     alpha1  = x[16*i];
2281:     alpha2  = x[16*i+1];
2282:     alpha3  = x[16*i+2];
2283:     alpha4  = x[16*i+3];
2284:     alpha5  = x[16*i+4];
2285:     alpha6  = x[16*i+5];
2286:     alpha7  = x[16*i+6];
2287:     alpha8  = x[16*i+7];
2288:     alpha9  = x[16*i+8];
2289:     alpha10 = x[16*i+9];
2290:     alpha11 = x[16*i+10];
2291:     alpha12 = x[16*i+11];
2292:     alpha13 = x[16*i+12];
2293:     alpha14 = x[16*i+13];
2294:     alpha15 = x[16*i+14];
2295:     alpha16 = x[16*i+15];
2296:     while (n-->0) {
2297:       y[16*(*idx)]    += alpha1*(*v);
2298:       y[16*(*idx)+1]  += alpha2*(*v);
2299:       y[16*(*idx)+2]  += alpha3*(*v);
2300:       y[16*(*idx)+3]  += alpha4*(*v);
2301:       y[16*(*idx)+4]  += alpha5*(*v);
2302:       y[16*(*idx)+5]  += alpha6*(*v);
2303:       y[16*(*idx)+6]  += alpha7*(*v);
2304:       y[16*(*idx)+7]  += alpha8*(*v);
2305:       y[16*(*idx)+8]  += alpha9*(*v);
2306:       y[16*(*idx)+9]  += alpha10*(*v);
2307:       y[16*(*idx)+10] += alpha11*(*v);
2308:       y[16*(*idx)+11] += alpha12*(*v);
2309:       y[16*(*idx)+12] += alpha13*(*v);
2310:       y[16*(*idx)+13] += alpha14*(*v);
2311:       y[16*(*idx)+14] += alpha15*(*v);
2312:       y[16*(*idx)+15] += alpha16*(*v);
2313:       idx++; v++;
2314:     }
2315:   }
2316:   PetscLogFlops(32.0*a->nz);
2317:   VecRestoreArrayRead(xx,&x);
2318:   VecRestoreArray(yy,&y);
2319:   return(0);
2320: }

2324: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2325: {
2326:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2327:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2328:   const PetscScalar *x,*v;
2329:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2330:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2331:   PetscErrorCode    ierr;
2332:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2333:   PetscInt          n,i,jrow,j;

2336:   if (yy != zz) {VecCopy(yy,zz);}
2337:   VecGetArrayRead(xx,&x);
2338:   VecGetArray(zz,&y);
2339:   idx  = a->j;
2340:   v    = a->a;
2341:   ii   = a->i;

2343:   for (i=0; i<m; i++) {
2344:     jrow  = ii[i];
2345:     n     = ii[i+1] - jrow;
2346:     sum1  = 0.0;
2347:     sum2  = 0.0;
2348:     sum3  = 0.0;
2349:     sum4  = 0.0;
2350:     sum5  = 0.0;
2351:     sum6  = 0.0;
2352:     sum7  = 0.0;
2353:     sum8  = 0.0;
2354:     sum9  = 0.0;
2355:     sum10 = 0.0;
2356:     sum11 = 0.0;
2357:     sum12 = 0.0;
2358:     sum13 = 0.0;
2359:     sum14 = 0.0;
2360:     sum15 = 0.0;
2361:     sum16 = 0.0;
2362:     for (j=0; j<n; j++) {
2363:       sum1  += v[jrow]*x[16*idx[jrow]];
2364:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2365:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2366:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2367:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2368:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2369:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2370:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2371:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2372:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2373:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2374:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2375:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2376:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2377:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2378:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2379:       jrow++;
2380:     }
2381:     y[16*i]    += sum1;
2382:     y[16*i+1]  += sum2;
2383:     y[16*i+2]  += sum3;
2384:     y[16*i+3]  += sum4;
2385:     y[16*i+4]  += sum5;
2386:     y[16*i+5]  += sum6;
2387:     y[16*i+6]  += sum7;
2388:     y[16*i+7]  += sum8;
2389:     y[16*i+8]  += sum9;
2390:     y[16*i+9]  += sum10;
2391:     y[16*i+10] += sum11;
2392:     y[16*i+11] += sum12;
2393:     y[16*i+12] += sum13;
2394:     y[16*i+13] += sum14;
2395:     y[16*i+14] += sum15;
2396:     y[16*i+15] += sum16;
2397:   }

2399:   PetscLogFlops(32.0*a->nz);
2400:   VecRestoreArrayRead(xx,&x);
2401:   VecRestoreArray(zz,&y);
2402:   return(0);
2403: }

2407: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2408: {
2409:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2410:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2411:   const PetscScalar *x,*v;
2412:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2413:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2414:   PetscErrorCode    ierr;
2415:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2416:   PetscInt          n,i;

2419:   if (yy != zz) {VecCopy(yy,zz);}
2420:   VecGetArrayRead(xx,&x);
2421:   VecGetArray(zz,&y);
2422:   for (i=0; i<m; i++) {
2423:     idx     = a->j + a->i[i];
2424:     v       = a->a + a->i[i];
2425:     n       = a->i[i+1] - a->i[i];
2426:     alpha1  = x[16*i];
2427:     alpha2  = x[16*i+1];
2428:     alpha3  = x[16*i+2];
2429:     alpha4  = x[16*i+3];
2430:     alpha5  = x[16*i+4];
2431:     alpha6  = x[16*i+5];
2432:     alpha7  = x[16*i+6];
2433:     alpha8  = x[16*i+7];
2434:     alpha9  = x[16*i+8];
2435:     alpha10 = x[16*i+9];
2436:     alpha11 = x[16*i+10];
2437:     alpha12 = x[16*i+11];
2438:     alpha13 = x[16*i+12];
2439:     alpha14 = x[16*i+13];
2440:     alpha15 = x[16*i+14];
2441:     alpha16 = x[16*i+15];
2442:     while (n-->0) {
2443:       y[16*(*idx)]    += alpha1*(*v);
2444:       y[16*(*idx)+1]  += alpha2*(*v);
2445:       y[16*(*idx)+2]  += alpha3*(*v);
2446:       y[16*(*idx)+3]  += alpha4*(*v);
2447:       y[16*(*idx)+4]  += alpha5*(*v);
2448:       y[16*(*idx)+5]  += alpha6*(*v);
2449:       y[16*(*idx)+6]  += alpha7*(*v);
2450:       y[16*(*idx)+7]  += alpha8*(*v);
2451:       y[16*(*idx)+8]  += alpha9*(*v);
2452:       y[16*(*idx)+9]  += alpha10*(*v);
2453:       y[16*(*idx)+10] += alpha11*(*v);
2454:       y[16*(*idx)+11] += alpha12*(*v);
2455:       y[16*(*idx)+12] += alpha13*(*v);
2456:       y[16*(*idx)+13] += alpha14*(*v);
2457:       y[16*(*idx)+14] += alpha15*(*v);
2458:       y[16*(*idx)+15] += alpha16*(*v);
2459:       idx++; v++;
2460:     }
2461:   }
2462:   PetscLogFlops(32.0*a->nz);
2463:   VecRestoreArrayRead(xx,&x);
2464:   VecRestoreArray(zz,&y);
2465:   return(0);
2466: }

2468: /*--------------------------------------------------------------------------------------------*/
2471: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2472: {
2473:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2474:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2475:   const PetscScalar *x,*v;
2476:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2477:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2478:   PetscErrorCode    ierr;
2479:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2480:   PetscInt          nonzerorow=0,n,i,jrow,j;

2483:   VecGetArrayRead(xx,&x);
2484:   VecGetArray(yy,&y);
2485:   idx  = a->j;
2486:   v    = a->a;
2487:   ii   = a->i;

2489:   for (i=0; i<m; i++) {
2490:     jrow  = ii[i];
2491:     n     = ii[i+1] - jrow;
2492:     sum1  = 0.0;
2493:     sum2  = 0.0;
2494:     sum3  = 0.0;
2495:     sum4  = 0.0;
2496:     sum5  = 0.0;
2497:     sum6  = 0.0;
2498:     sum7  = 0.0;
2499:     sum8  = 0.0;
2500:     sum9  = 0.0;
2501:     sum10 = 0.0;
2502:     sum11 = 0.0;
2503:     sum12 = 0.0;
2504:     sum13 = 0.0;
2505:     sum14 = 0.0;
2506:     sum15 = 0.0;
2507:     sum16 = 0.0;
2508:     sum17 = 0.0;
2509:     sum18 = 0.0;

2511:     nonzerorow += (n>0);
2512:     for (j=0; j<n; j++) {
2513:       sum1  += v[jrow]*x[18*idx[jrow]];
2514:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2515:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2516:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2517:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2518:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2519:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2520:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2521:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2522:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2523:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2524:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2525:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2526:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2527:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2528:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2529:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2530:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2531:       jrow++;
2532:     }
2533:     y[18*i]    = sum1;
2534:     y[18*i+1]  = sum2;
2535:     y[18*i+2]  = sum3;
2536:     y[18*i+3]  = sum4;
2537:     y[18*i+4]  = sum5;
2538:     y[18*i+5]  = sum6;
2539:     y[18*i+6]  = sum7;
2540:     y[18*i+7]  = sum8;
2541:     y[18*i+8]  = sum9;
2542:     y[18*i+9]  = sum10;
2543:     y[18*i+10] = sum11;
2544:     y[18*i+11] = sum12;
2545:     y[18*i+12] = sum13;
2546:     y[18*i+13] = sum14;
2547:     y[18*i+14] = sum15;
2548:     y[18*i+15] = sum16;
2549:     y[18*i+16] = sum17;
2550:     y[18*i+17] = sum18;
2551:   }

2553:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2554:   VecRestoreArrayRead(xx,&x);
2555:   VecRestoreArray(yy,&y);
2556:   return(0);
2557: }

2561: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2562: {
2563:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2564:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2565:   const PetscScalar *x,*v;
2566:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2567:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2568:   PetscErrorCode    ierr;
2569:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2570:   PetscInt          n,i;

2573:   VecSet(yy,0.0);
2574:   VecGetArrayRead(xx,&x);
2575:   VecGetArray(yy,&y);

2577:   for (i=0; i<m; i++) {
2578:     idx     = a->j + a->i[i];
2579:     v       = a->a + a->i[i];
2580:     n       = a->i[i+1] - a->i[i];
2581:     alpha1  = x[18*i];
2582:     alpha2  = x[18*i+1];
2583:     alpha3  = x[18*i+2];
2584:     alpha4  = x[18*i+3];
2585:     alpha5  = x[18*i+4];
2586:     alpha6  = x[18*i+5];
2587:     alpha7  = x[18*i+6];
2588:     alpha8  = x[18*i+7];
2589:     alpha9  = x[18*i+8];
2590:     alpha10 = x[18*i+9];
2591:     alpha11 = x[18*i+10];
2592:     alpha12 = x[18*i+11];
2593:     alpha13 = x[18*i+12];
2594:     alpha14 = x[18*i+13];
2595:     alpha15 = x[18*i+14];
2596:     alpha16 = x[18*i+15];
2597:     alpha17 = x[18*i+16];
2598:     alpha18 = x[18*i+17];
2599:     while (n-->0) {
2600:       y[18*(*idx)]    += alpha1*(*v);
2601:       y[18*(*idx)+1]  += alpha2*(*v);
2602:       y[18*(*idx)+2]  += alpha3*(*v);
2603:       y[18*(*idx)+3]  += alpha4*(*v);
2604:       y[18*(*idx)+4]  += alpha5*(*v);
2605:       y[18*(*idx)+5]  += alpha6*(*v);
2606:       y[18*(*idx)+6]  += alpha7*(*v);
2607:       y[18*(*idx)+7]  += alpha8*(*v);
2608:       y[18*(*idx)+8]  += alpha9*(*v);
2609:       y[18*(*idx)+9]  += alpha10*(*v);
2610:       y[18*(*idx)+10] += alpha11*(*v);
2611:       y[18*(*idx)+11] += alpha12*(*v);
2612:       y[18*(*idx)+12] += alpha13*(*v);
2613:       y[18*(*idx)+13] += alpha14*(*v);
2614:       y[18*(*idx)+14] += alpha15*(*v);
2615:       y[18*(*idx)+15] += alpha16*(*v);
2616:       y[18*(*idx)+16] += alpha17*(*v);
2617:       y[18*(*idx)+17] += alpha18*(*v);
2618:       idx++; v++;
2619:     }
2620:   }
2621:   PetscLogFlops(36.0*a->nz);
2622:   VecRestoreArrayRead(xx,&x);
2623:   VecRestoreArray(yy,&y);
2624:   return(0);
2625: }

2629: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2630: {
2631:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2632:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2633:   const PetscScalar *x,*v;
2634:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2635:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2636:   PetscErrorCode    ierr;
2637:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2638:   PetscInt          n,i,jrow,j;

2641:   if (yy != zz) {VecCopy(yy,zz);}
2642:   VecGetArrayRead(xx,&x);
2643:   VecGetArray(zz,&y);
2644:   idx  = a->j;
2645:   v    = a->a;
2646:   ii   = a->i;

2648:   for (i=0; i<m; i++) {
2649:     jrow  = ii[i];
2650:     n     = ii[i+1] - jrow;
2651:     sum1  = 0.0;
2652:     sum2  = 0.0;
2653:     sum3  = 0.0;
2654:     sum4  = 0.0;
2655:     sum5  = 0.0;
2656:     sum6  = 0.0;
2657:     sum7  = 0.0;
2658:     sum8  = 0.0;
2659:     sum9  = 0.0;
2660:     sum10 = 0.0;
2661:     sum11 = 0.0;
2662:     sum12 = 0.0;
2663:     sum13 = 0.0;
2664:     sum14 = 0.0;
2665:     sum15 = 0.0;
2666:     sum16 = 0.0;
2667:     sum17 = 0.0;
2668:     sum18 = 0.0;
2669:     for (j=0; j<n; j++) {
2670:       sum1  += v[jrow]*x[18*idx[jrow]];
2671:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2672:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2673:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2674:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2675:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2676:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2677:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2678:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2679:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2680:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2681:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2682:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2683:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2684:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2685:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2686:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2687:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2688:       jrow++;
2689:     }
2690:     y[18*i]    += sum1;
2691:     y[18*i+1]  += sum2;
2692:     y[18*i+2]  += sum3;
2693:     y[18*i+3]  += sum4;
2694:     y[18*i+4]  += sum5;
2695:     y[18*i+5]  += sum6;
2696:     y[18*i+6]  += sum7;
2697:     y[18*i+7]  += sum8;
2698:     y[18*i+8]  += sum9;
2699:     y[18*i+9]  += sum10;
2700:     y[18*i+10] += sum11;
2701:     y[18*i+11] += sum12;
2702:     y[18*i+12] += sum13;
2703:     y[18*i+13] += sum14;
2704:     y[18*i+14] += sum15;
2705:     y[18*i+15] += sum16;
2706:     y[18*i+16] += sum17;
2707:     y[18*i+17] += sum18;
2708:   }

2710:   PetscLogFlops(36.0*a->nz);
2711:   VecRestoreArrayRead(xx,&x);
2712:   VecRestoreArray(zz,&y);
2713:   return(0);
2714: }

2718: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2719: {
2720:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2721:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2722:   const PetscScalar *x,*v;
2723:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2724:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2725:   PetscErrorCode    ierr;
2726:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2727:   PetscInt          n,i;

2730:   if (yy != zz) {VecCopy(yy,zz);}
2731:   VecGetArrayRead(xx,&x);
2732:   VecGetArray(zz,&y);
2733:   for (i=0; i<m; i++) {
2734:     idx     = a->j + a->i[i];
2735:     v       = a->a + a->i[i];
2736:     n       = a->i[i+1] - a->i[i];
2737:     alpha1  = x[18*i];
2738:     alpha2  = x[18*i+1];
2739:     alpha3  = x[18*i+2];
2740:     alpha4  = x[18*i+3];
2741:     alpha5  = x[18*i+4];
2742:     alpha6  = x[18*i+5];
2743:     alpha7  = x[18*i+6];
2744:     alpha8  = x[18*i+7];
2745:     alpha9  = x[18*i+8];
2746:     alpha10 = x[18*i+9];
2747:     alpha11 = x[18*i+10];
2748:     alpha12 = x[18*i+11];
2749:     alpha13 = x[18*i+12];
2750:     alpha14 = x[18*i+13];
2751:     alpha15 = x[18*i+14];
2752:     alpha16 = x[18*i+15];
2753:     alpha17 = x[18*i+16];
2754:     alpha18 = x[18*i+17];
2755:     while (n-->0) {
2756:       y[18*(*idx)]    += alpha1*(*v);
2757:       y[18*(*idx)+1]  += alpha2*(*v);
2758:       y[18*(*idx)+2]  += alpha3*(*v);
2759:       y[18*(*idx)+3]  += alpha4*(*v);
2760:       y[18*(*idx)+4]  += alpha5*(*v);
2761:       y[18*(*idx)+5]  += alpha6*(*v);
2762:       y[18*(*idx)+6]  += alpha7*(*v);
2763:       y[18*(*idx)+7]  += alpha8*(*v);
2764:       y[18*(*idx)+8]  += alpha9*(*v);
2765:       y[18*(*idx)+9]  += alpha10*(*v);
2766:       y[18*(*idx)+10] += alpha11*(*v);
2767:       y[18*(*idx)+11] += alpha12*(*v);
2768:       y[18*(*idx)+12] += alpha13*(*v);
2769:       y[18*(*idx)+13] += alpha14*(*v);
2770:       y[18*(*idx)+14] += alpha15*(*v);
2771:       y[18*(*idx)+15] += alpha16*(*v);
2772:       y[18*(*idx)+16] += alpha17*(*v);
2773:       y[18*(*idx)+17] += alpha18*(*v);
2774:       idx++; v++;
2775:     }
2776:   }
2777:   PetscLogFlops(36.0*a->nz);
2778:   VecRestoreArrayRead(xx,&x);
2779:   VecRestoreArray(zz,&y);
2780:   return(0);
2781: }

2785: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2786: {
2787:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2788:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2789:   const PetscScalar *x,*v;
2790:   PetscScalar       *y,*sums;
2791:   PetscErrorCode    ierr;
2792:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2793:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2796:   VecGetArrayRead(xx,&x);
2797:   VecSet(yy,0.0);
2798:   VecGetArray(yy,&y);
2799:   idx  = a->j;
2800:   v    = a->a;
2801:   ii   = a->i;

2803:   for (i=0; i<m; i++) {
2804:     jrow = ii[i];
2805:     n    = ii[i+1] - jrow;
2806:     sums = y + dof*i;
2807:     for (j=0; j<n; j++) {
2808:       for (k=0; k<dof; k++) {
2809:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2810:       }
2811:       jrow++;
2812:     }
2813:   }

2815:   PetscLogFlops(2.0*dof*a->nz);
2816:   VecRestoreArrayRead(xx,&x);
2817:   VecRestoreArray(yy,&y);
2818:   return(0);
2819: }

2823: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2824: {
2825:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2826:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2827:   const PetscScalar *x,*v;
2828:   PetscScalar       *y,*sums;
2829:   PetscErrorCode    ierr;
2830:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2831:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2834:   if (yy != zz) {VecCopy(yy,zz);}
2835:   VecGetArrayRead(xx,&x);
2836:   VecGetArray(zz,&y);
2837:   idx  = a->j;
2838:   v    = a->a;
2839:   ii   = a->i;

2841:   for (i=0; i<m; i++) {
2842:     jrow = ii[i];
2843:     n    = ii[i+1] - jrow;
2844:     sums = y + dof*i;
2845:     for (j=0; j<n; j++) {
2846:       for (k=0; k<dof; k++) {
2847:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2848:       }
2849:       jrow++;
2850:     }
2851:   }

2853:   PetscLogFlops(2.0*dof*a->nz);
2854:   VecRestoreArrayRead(xx,&x);
2855:   VecRestoreArray(zz,&y);
2856:   return(0);
2857: }

2861: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2862: {
2863:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2864:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2865:   const PetscScalar *x,*v,*alpha;
2866:   PetscScalar       *y;
2867:   PetscErrorCode    ierr;
2868:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2869:   PetscInt          n,i,k;

2872:   VecGetArrayRead(xx,&x);
2873:   VecSet(yy,0.0);
2874:   VecGetArray(yy,&y);
2875:   for (i=0; i<m; i++) {
2876:     idx   = a->j + a->i[i];
2877:     v     = a->a + a->i[i];
2878:     n     = a->i[i+1] - a->i[i];
2879:     alpha = x + dof*i;
2880:     while (n-->0) {
2881:       for (k=0; k<dof; k++) {
2882:         y[dof*(*idx)+k] += alpha[k]*(*v);
2883:       }
2884:       idx++; v++;
2885:     }
2886:   }
2887:   PetscLogFlops(2.0*dof*a->nz);
2888:   VecRestoreArrayRead(xx,&x);
2889:   VecRestoreArray(yy,&y);
2890:   return(0);
2891: }

2895: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2896: {
2897:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2898:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2899:   const PetscScalar *x,*v,*alpha;
2900:   PetscScalar       *y;
2901:   PetscErrorCode    ierr;
2902:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2903:   PetscInt          n,i,k;

2906:   if (yy != zz) {VecCopy(yy,zz);}
2907:   VecGetArrayRead(xx,&x);
2908:   VecGetArray(zz,&y);
2909:   for (i=0; i<m; i++) {
2910:     idx   = a->j + a->i[i];
2911:     v     = a->a + a->i[i];
2912:     n     = a->i[i+1] - a->i[i];
2913:     alpha = x + dof*i;
2914:     while (n-->0) {
2915:       for (k=0; k<dof; k++) {
2916:         y[dof*(*idx)+k] += alpha[k]*(*v);
2917:       }
2918:       idx++; v++;
2919:     }
2920:   }
2921:   PetscLogFlops(2.0*dof*a->nz);
2922:   VecRestoreArrayRead(xx,&x);
2923:   VecRestoreArray(zz,&y);
2924:   return(0);
2925: }

2927: /*===================================================================================*/
2930: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2931: {
2932:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2936:   /* start the scatter */
2937:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2938:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2939:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2940:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2941:   return(0);
2942: }

2946: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2947: {
2948:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2952:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2953:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2954:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2955:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2956:   return(0);
2957: }

2961: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2962: {
2963:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2967:   /* start the scatter */
2968:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2969:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2970:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2971:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2972:   return(0);
2973: }

2977: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2978: {
2979:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2983:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2984:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2985:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2986:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2987:   return(0);
2988: }

2990: /* ----------------------------------------------------------------*/
2993: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2994: {
2995:   PetscErrorCode     ierr;
2996:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2997:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2998:   Mat                P         =pp->AIJ;
2999:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
3000:   PetscInt           *pti,*ptj,*ptJ;
3001:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3002:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3003:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3004:   MatScalar          *ca;
3005:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

3008:   /* Get ij structure of P^T */
3009:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

3011:   cn = pn*ppdof;
3012:   /* Allocate ci array, arrays for fill computation and */
3013:   /* free space for accumulating nonzero column info */
3014:   PetscMalloc1(cn+1,&ci);
3015:   ci[0] = 0;

3017:   /* Work arrays for rows of P^T*A */
3018:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
3019:   PetscMemzero(ptadenserow,an*sizeof(PetscInt));
3020:   PetscMemzero(denserow,cn*sizeof(PetscInt));

3022:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3023:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3024:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3025:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
3026:   current_space = free_space;

3028:   /* Determine symbolic info for each row of C: */
3029:   for (i=0; i<pn; i++) {
3030:     ptnzi = pti[i+1] - pti[i];
3031:     ptJ   = ptj + pti[i];
3032:     for (dof=0; dof<ppdof; dof++) {
3033:       ptanzi = 0;
3034:       /* Determine symbolic row of PtA: */
3035:       for (j=0; j<ptnzi; j++) {
3036:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3037:         arow = ptJ[j]*ppdof + dof;
3038:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3039:         anzj = ai[arow+1] - ai[arow];
3040:         ajj  = aj + ai[arow];
3041:         for (k=0; k<anzj; k++) {
3042:           if (!ptadenserow[ajj[k]]) {
3043:             ptadenserow[ajj[k]]    = -1;
3044:             ptasparserow[ptanzi++] = ajj[k];
3045:           }
3046:         }
3047:       }
3048:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3049:       ptaj = ptasparserow;
3050:       cnzi = 0;
3051:       for (j=0; j<ptanzi; j++) {
3052:         /* Get offset within block of P */
3053:         pshift = *ptaj%ppdof;
3054:         /* Get block row of P */
3055:         prow = (*ptaj++)/ppdof; /* integer division */
3056:         /* P has same number of nonzeros per row as the compressed form */
3057:         pnzj = pi[prow+1] - pi[prow];
3058:         pjj  = pj + pi[prow];
3059:         for (k=0;k<pnzj;k++) {
3060:           /* Locations in C are shifted by the offset within the block */
3061:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3062:           if (!denserow[pjj[k]*ppdof+pshift]) {
3063:             denserow[pjj[k]*ppdof+pshift] = -1;
3064:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3065:           }
3066:         }
3067:       }

3069:       /* sort sparserow */
3070:       PetscSortInt(cnzi,sparserow);

3072:       /* If free space is not available, make more free space */
3073:       /* Double the amount of total space in the list */
3074:       if (current_space->local_remaining<cnzi) {
3075:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
3076:       }

3078:       /* Copy data into free space, and zero out denserows */
3079:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));

3081:       current_space->array           += cnzi;
3082:       current_space->local_used      += cnzi;
3083:       current_space->local_remaining -= cnzi;

3085:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3086:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

3088:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3089:       /*        For now, we will recompute what is needed. */
3090:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3091:     }
3092:   }
3093:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3094:   /* Allocate space for cj, initialize cj, and */
3095:   /* destroy list of free space and other temporary array(s) */
3096:   PetscMalloc1(ci[cn]+1,&cj);
3097:   PetscFreeSpaceContiguous(&free_space,cj);
3098:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

3100:   /* Allocate space for ca */
3101:   PetscCalloc1(ci[cn]+1,&ca);

3103:   /* put together the new matrix */
3104:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
3105:   MatSetBlockSize(*C,pp->dof);

3107:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3108:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3109:   c          = (Mat_SeqAIJ*)((*C)->data);
3110:   c->free_a  = PETSC_TRUE;
3111:   c->free_ij = PETSC_TRUE;
3112:   c->nonew   = 0;

3114:   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;

3116:   /* Clean up. */
3117:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3118:   return(0);
3119: }

3123: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3124: {
3125:   /* This routine requires testing -- first draft only */
3126:   PetscErrorCode  ierr;
3127:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3128:   Mat             P  =pp->AIJ;
3129:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3130:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3131:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3132:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3133:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3134:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3135:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3136:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3137:   MatScalar       *ca=c->a,*caj,*apa;

3140:   /* Allocate temporary array for storage of one row of A*P */
3141:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

3143:   /* Clear old values in C */
3144:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

3146:   for (i=0; i<am; i++) {
3147:     /* Form sparse row of A*P */
3148:     anzi  = ai[i+1] - ai[i];
3149:     apnzj = 0;
3150:     for (j=0; j<anzi; j++) {
3151:       /* Get offset within block of P */
3152:       pshift = *aj%ppdof;
3153:       /* Get block row of P */
3154:       prow = *aj++/ppdof;   /* integer division */
3155:       pnzj = pi[prow+1] - pi[prow];
3156:       pjj  = pj + pi[prow];
3157:       paj  = pa + pi[prow];
3158:       for (k=0; k<pnzj; k++) {
3159:         poffset = pjj[k]*ppdof+pshift;
3160:         if (!apjdense[poffset]) {
3161:           apjdense[poffset] = -1;
3162:           apj[apnzj++]      = poffset;
3163:         }
3164:         apa[poffset] += (*aa)*paj[k];
3165:       }
3166:       PetscLogFlops(2.0*pnzj);
3167:       aa++;
3168:     }

3170:     /* Sort the j index array for quick sparse axpy. */
3171:     /* Note: a array does not need sorting as it is in dense storage locations. */
3172:     PetscSortInt(apnzj,apj);

3174:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3175:     prow    = i/ppdof; /* integer division */
3176:     pshift  = i%ppdof;
3177:     poffset = pi[prow];
3178:     pnzi    = pi[prow+1] - poffset;
3179:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3180:     pJ = pj+poffset;
3181:     pA = pa+poffset;
3182:     for (j=0; j<pnzi; j++) {
3183:       crow = (*pJ)*ppdof+pshift;
3184:       cjj  = cj + ci[crow];
3185:       caj  = ca + ci[crow];
3186:       pJ++;
3187:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3188:       for (k=0,nextap=0; nextap<apnzj; k++) {
3189:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3190:       }
3191:       PetscLogFlops(2.0*apnzj);
3192:       pA++;
3193:     }

3195:     /* Zero the current row info for A*P */
3196:     for (j=0; j<apnzj; j++) {
3197:       apa[apj[j]]      = 0.;
3198:       apjdense[apj[j]] = 0;
3199:     }
3200:   }

3202:   /* Assemble the final matrix and clean up */
3203:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3204:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3205:   PetscFree3(apa,apj,apjdense);
3206:   return(0);
3207: }

3211: PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3212: {

3216:   if (scall == MAT_INITIAL_MATRIX) {
3217:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3218:     MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3219:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3220:   }
3221:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3222:   MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3223:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3224:   return(0);
3225: }

3229: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3230: {

3234:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3235:   MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);
3236:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3237:   return(0);
3238: }

3242: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3243: {
3245:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3246:   return(0);
3247: }

3251: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3252: {

3256:   if (scall == MAT_INITIAL_MATRIX) {
3257:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3258:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);
3259:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3260:   }
3261:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3262:   ((*C)->ops->ptapnumeric)(A,P,*C);
3263:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3264:   return(0);
3265: }

3269: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3270: {
3271:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3272:   Mat            a    = b->AIJ,B;
3273:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3275:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3276:   PetscInt       *cols;
3277:   PetscScalar    *vals;

3280:   MatGetSize(a,&m,&n);
3281:   PetscMalloc1(dof*m,&ilen);
3282:   for (i=0; i<m; i++) {
3283:     nmax = PetscMax(nmax,aij->ilen[i]);
3284:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3285:   }
3286:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3287:   PetscFree(ilen);
3288:   PetscMalloc1(nmax,&icols);
3289:   ii   = 0;
3290:   for (i=0; i<m; i++) {
3291:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3292:     for (j=0; j<dof; j++) {
3293:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3294:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3295:       ii++;
3296:     }
3297:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3298:   }
3299:   PetscFree(icols);
3300:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3301:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3303:   if (reuse == MAT_INPLACE_MATRIX) {
3304:     MatHeaderReplace(A,&B);
3305:   } else {
3306:     *newmat = B;
3307:   }
3308:   return(0);
3309: }

3311: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3315: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3316: {
3317:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3318:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3319:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3320:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3321:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3322:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3323:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3324:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3325:   PetscInt       rstart,cstart,*garray,ii,k;
3327:   PetscScalar    *vals,*ovals;

3330:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3331:   for (i=0; i<A->rmap->n/dof; i++) {
3332:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3333:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3334:     for (j=0; j<dof; j++) {
3335:       dnz[dof*i+j] = AIJ->ilen[i];
3336:       onz[dof*i+j] = OAIJ->ilen[i];
3337:     }
3338:   }
3339:   MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3340:   MatSetBlockSize(B,dof);
3341:   PetscFree2(dnz,onz);

3343:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3344:   rstart = dof*maij->A->rmap->rstart;
3345:   cstart = dof*maij->A->cmap->rstart;
3346:   garray = mpiaij->garray;

3348:   ii = rstart;
3349:   for (i=0; i<A->rmap->n/dof; i++) {
3350:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3351:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3352:     for (j=0; j<dof; j++) {
3353:       for (k=0; k<ncols; k++) {
3354:         icols[k] = cstart + dof*cols[k]+j;
3355:       }
3356:       for (k=0; k<oncols; k++) {
3357:         oicols[k] = dof*garray[ocols[k]]+j;
3358:       }
3359:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3360:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3361:       ii++;
3362:     }
3363:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3364:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3365:   }
3366:   PetscFree2(icols,oicols);

3368:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3369:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3371:   if (reuse == MAT_INPLACE_MATRIX) {
3372:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3373:     ((PetscObject)A)->refct = 1;

3375:     MatHeaderReplace(A,&B);

3377:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3378:   } else {
3379:     *newmat = B;
3380:   }
3381:   return(0);
3382: }

3386: PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3387: {
3389:   Mat            A;

3392:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3393:   MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3394:   MatDestroy(&A);
3395:   return(0);
3396: }

3398: /* ---------------------------------------------------------------------------------- */
3401: /*@
3402:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3403:   operations for multicomponent problems.  It interpolates each component the same
3404:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3405:   and MATMPIAIJ for distributed matrices.

3407:   Collective

3409:   Input Parameters:
3410: + A - the AIJ matrix describing the action on blocks
3411: - dof - the block size (number of components per node)

3413:   Output Parameter:
3414: . maij - the new MAIJ matrix

3416:   Operations provided:
3417: + MatMult
3418: . MatMultTranspose
3419: . MatMultAdd
3420: . MatMultTransposeAdd
3421: - MatView

3423:   Level: advanced

3425: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3426: @*/
3427: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3428: {
3430:   PetscMPIInt    size;
3431:   PetscInt       n;
3432:   Mat            B;

3435:   PetscObjectReference((PetscObject)A);

3437:   if (dof == 1) *maij = A;
3438:   else {
3439:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3440:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3441:     PetscLayoutSetBlockSize(B->rmap,dof);
3442:     PetscLayoutSetBlockSize(B->cmap,dof);
3443:     PetscLayoutSetUp(B->rmap);
3444:     PetscLayoutSetUp(B->cmap);

3446:     B->assembled = PETSC_TRUE;

3448:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3449:     if (size == 1) {
3450:       Mat_SeqMAIJ *b;

3452:       MatSetType(B,MATSEQMAIJ);

3454:       B->ops->setup   = NULL;
3455:       B->ops->destroy = MatDestroy_SeqMAIJ;
3456:       B->ops->view    = MatView_SeqMAIJ;
3457:       b               = (Mat_SeqMAIJ*)B->data;
3458:       b->dof          = dof;
3459:       b->AIJ          = A;

3461:       if (dof == 2) {
3462:         B->ops->mult             = MatMult_SeqMAIJ_2;
3463:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3464:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3465:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3466:       } else if (dof == 3) {
3467:         B->ops->mult             = MatMult_SeqMAIJ_3;
3468:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3469:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3470:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3471:       } else if (dof == 4) {
3472:         B->ops->mult             = MatMult_SeqMAIJ_4;
3473:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3474:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3475:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3476:       } else if (dof == 5) {
3477:         B->ops->mult             = MatMult_SeqMAIJ_5;
3478:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3479:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3480:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3481:       } else if (dof == 6) {
3482:         B->ops->mult             = MatMult_SeqMAIJ_6;
3483:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3484:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3485:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3486:       } else if (dof == 7) {
3487:         B->ops->mult             = MatMult_SeqMAIJ_7;
3488:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3489:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3490:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3491:       } else if (dof == 8) {
3492:         B->ops->mult             = MatMult_SeqMAIJ_8;
3493:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3494:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3495:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3496:       } else if (dof == 9) {
3497:         B->ops->mult             = MatMult_SeqMAIJ_9;
3498:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3499:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3500:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3501:       } else if (dof == 10) {
3502:         B->ops->mult             = MatMult_SeqMAIJ_10;
3503:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3504:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3505:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3506:       } else if (dof == 11) {
3507:         B->ops->mult             = MatMult_SeqMAIJ_11;
3508:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3509:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3510:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3511:       } else if (dof == 16) {
3512:         B->ops->mult             = MatMult_SeqMAIJ_16;
3513:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3514:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3515:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3516:       } else if (dof == 18) {
3517:         B->ops->mult             = MatMult_SeqMAIJ_18;
3518:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3519:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3520:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3521:       } else {
3522:         B->ops->mult             = MatMult_SeqMAIJ_N;
3523:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3524:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3525:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3526:       }
3527:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3528:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3529:     } else {
3530:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3531:       Mat_MPIMAIJ *b;
3532:       IS          from,to;
3533:       Vec         gvec;

3535:       MatSetType(B,MATMPIMAIJ);

3537:       B->ops->setup   = NULL;
3538:       B->ops->destroy = MatDestroy_MPIMAIJ;
3539:       B->ops->view    = MatView_MPIMAIJ;

3541:       b      = (Mat_MPIMAIJ*)B->data;
3542:       b->dof = dof;
3543:       b->A   = A;

3545:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3546:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3548:       VecGetSize(mpiaij->lvec,&n);
3549:       VecCreate(PETSC_COMM_SELF,&b->w);
3550:       VecSetSizes(b->w,n*dof,n*dof);
3551:       VecSetBlockSize(b->w,dof);
3552:       VecSetType(b->w,VECSEQ);

3554:       /* create two temporary Index sets for build scatter gather */
3555:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3556:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3558:       /* create temporary global vector to generate scatter context */
3559:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3561:       /* generate the scatter context */
3562:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3564:       ISDestroy(&from);
3565:       ISDestroy(&to);
3566:       VecDestroy(&gvec);

3568:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3569:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3570:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3571:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3573:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3574:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3575:     }
3576:     B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3577:     MatSetUp(B);
3578:     *maij = B;
3579:     MatViewFromOptions(B,NULL,"-mat_view");
3580:   }
3581:   return(0);
3582: }