Actual source code: maij.c

petsc-3.4.5 2014-06-29
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
 20: #include <../src/mat/utils/freespace.h>
 21: #include <petsc-private/vecimpl.h>

 25: /*@C
 26:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 28:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 30:    Input Parameter:
 31: .  A - the MAIJ matrix

 33:    Output Parameter:
 34: .  B - the AIJ matrix

 36:    Level: advanced

 38:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 40: .seealso: MatCreateMAIJ()
 41: @*/
 42: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 43: {
 45:   PetscBool      ismpimaij,isseqmaij;

 48:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 49:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 50:   if (ismpimaij) {
 51:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 53:     *B = b->A;
 54:   } else if (isseqmaij) {
 55:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 57:     *B = b->AIJ;
 58:   } else {
 59:     *B = A;
 60:   }
 61:   return(0);
 62: }

 66: /*@C
 67:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 69:    Logically Collective

 71:    Input Parameter:
 72: +  A - the MAIJ matrix
 73: -  dof - the block size for the new matrix

 75:    Output Parameter:
 76: .  B - the new MAIJ matrix

 78:    Level: advanced

 80: .seealso: MatCreateMAIJ()
 81: @*/
 82: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 83: {
 85:   Mat            Aij = NULL;

 89:   MatMAIJGetAIJ(A,&Aij);
 90:   MatCreateMAIJ(Aij,dof,B);
 91:   return(0);
 92: }

 96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 97: {
 99:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

102:   MatDestroy(&b->AIJ);
103:   PetscFree(A->data);
104:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
105:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
106:   return(0);
107: }

111: PetscErrorCode MatSetUp_MAIJ(Mat A)
112: {
114:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115:   return(0);
116: }

120: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121: {
123:   Mat            B;

126:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
127:   MatView(B,viewer);
128:   MatDestroy(&B);
129:   return(0);
130: }

134: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135: {
137:   Mat            B;

140:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
141:   MatView(B,viewer);
142:   MatDestroy(&B);
143:   return(0);
144: }

148: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149: {
151:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

154:   MatDestroy(&b->AIJ);
155:   MatDestroy(&b->OAIJ);
156:   MatDestroy(&b->A);
157:   VecScatterDestroy(&b->ctx);
158:   VecDestroy(&b->w);
159:   PetscFree(A->data);
160:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
161:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
162:   PetscObjectChangeTypeName((PetscObject)A,0);
163:   return(0);
164: }

166: /*MC
167:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168:   multicomponent problems, interpolating or restricting each component the same way independently.
169:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

171:   Operations provided:
172: . MatMult
173: . MatMultTranspose
174: . MatMultAdd
175: . MatMultTransposeAdd

177:   Level: advanced

179: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180: M*/

184: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
185: {
187:   Mat_MPIMAIJ    *b;
188:   PetscMPIInt    size;

191:   PetscNewLog(A,Mat_MPIMAIJ,&b);
192:   A->data  = (void*)b;

194:   PetscMemzero(A->ops,sizeof(struct _MatOps));

196:   A->ops->setup = MatSetUp_MAIJ;

198:   b->AIJ  = 0;
199:   b->dof  = 0;
200:   b->OAIJ = 0;
201:   b->ctx  = 0;
202:   b->w    = 0;
203:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
204:   if (size == 1) {
205:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
206:   } else {
207:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
208:   }
209:   return(0);
210: }

212: /* --------------------------------------------------------------------------------------*/
215: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
216: {
217:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
218:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
219:   const PetscScalar *x,*v;
220:   PetscScalar       *y, sum1, sum2;
221:   PetscErrorCode    ierr;
222:   PetscInt          nonzerorow=0,n,i,jrow,j;
223:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

226:   VecGetArrayRead(xx,&x);
227:   VecGetArray(yy,&y);
228:   idx  = a->j;
229:   v    = a->a;
230:   ii   = a->i;

232:   for (i=0; i<m; i++) {
233:     jrow  = ii[i];
234:     n     = ii[i+1] - jrow;
235:     sum1  = 0.0;
236:     sum2  = 0.0;

238:     nonzerorow += (n>0);
239:     for (j=0; j<n; j++) {
240:       sum1 += v[jrow]*x[2*idx[jrow]];
241:       sum2 += v[jrow]*x[2*idx[jrow]+1];
242:       jrow++;
243:     }
244:     y[2*i]   = sum1;
245:     y[2*i+1] = sum2;
246:   }

248:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
249:   VecRestoreArrayRead(xx,&x);
250:   VecRestoreArray(yy,&y);
251:   return(0);
252: }

256: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
257: {
258:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
259:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
260:   const PetscScalar *x,*v;
261:   PetscScalar       *y,alpha1,alpha2;
262:   PetscErrorCode    ierr;
263:   PetscInt          n,i;
264:   const PetscInt    m = b->AIJ->rmap->n,*idx;

267:   VecSet(yy,0.0);
268:   VecGetArrayRead(xx,&x);
269:   VecGetArray(yy,&y);

271:   for (i=0; i<m; i++) {
272:     idx    = a->j + a->i[i];
273:     v      = a->a + a->i[i];
274:     n      = a->i[i+1] - a->i[i];
275:     alpha1 = x[2*i];
276:     alpha2 = x[2*i+1];
277:     while (n-->0) {
278:       y[2*(*idx)]   += alpha1*(*v);
279:       y[2*(*idx)+1] += alpha2*(*v);
280:       idx++; v++;
281:     }
282:   }
283:   PetscLogFlops(4.0*a->nz);
284:   VecRestoreArrayRead(xx,&x);
285:   VecRestoreArray(yy,&y);
286:   return(0);
287: }

291: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
292: {
293:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
294:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
295:   const PetscScalar *x,*v;
296:   PetscScalar       *y,sum1, sum2;
297:   PetscErrorCode    ierr;
298:   PetscInt          n,i,jrow,j;
299:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

302:   if (yy != zz) {VecCopy(yy,zz);}
303:   VecGetArrayRead(xx,&x);
304:   VecGetArray(zz,&y);
305:   idx  = a->j;
306:   v    = a->a;
307:   ii   = a->i;

309:   for (i=0; i<m; i++) {
310:     jrow = ii[i];
311:     n    = ii[i+1] - jrow;
312:     sum1 = 0.0;
313:     sum2 = 0.0;
314:     for (j=0; j<n; j++) {
315:       sum1 += v[jrow]*x[2*idx[jrow]];
316:       sum2 += v[jrow]*x[2*idx[jrow]+1];
317:       jrow++;
318:     }
319:     y[2*i]   += sum1;
320:     y[2*i+1] += sum2;
321:   }

323:   PetscLogFlops(4.0*a->nz);
324:   VecRestoreArrayRead(xx,&x);
325:   VecRestoreArray(zz,&y);
326:   return(0);
327: }
330: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
331: {
332:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
333:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
334:   const PetscScalar *x,*v;
335:   PetscScalar       *y,alpha1,alpha2;
336:   PetscErrorCode    ierr;
337:   PetscInt          n,i;
338:   const PetscInt    m = b->AIJ->rmap->n,*idx;

341:   if (yy != zz) {VecCopy(yy,zz);}
342:   VecGetArrayRead(xx,&x);
343:   VecGetArray(zz,&y);

345:   for (i=0; i<m; i++) {
346:     idx    = a->j + a->i[i];
347:     v      = a->a + a->i[i];
348:     n      = a->i[i+1] - a->i[i];
349:     alpha1 = x[2*i];
350:     alpha2 = x[2*i+1];
351:     while (n-->0) {
352:       y[2*(*idx)]   += alpha1*(*v);
353:       y[2*(*idx)+1] += alpha2*(*v);
354:       idx++; v++;
355:     }
356:   }
357:   PetscLogFlops(4.0*a->nz);
358:   VecRestoreArrayRead(xx,&x);
359:   VecRestoreArray(zz,&y);
360:   return(0);
361: }
362: /* --------------------------------------------------------------------------------------*/
365: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
366: {
367:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
368:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
369:   const PetscScalar *x,*v;
370:   PetscScalar       *y,sum1, sum2, sum3;
371:   PetscErrorCode    ierr;
372:   PetscInt          nonzerorow=0,n,i,jrow,j;
373:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

376:   VecGetArrayRead(xx,&x);
377:   VecGetArray(yy,&y);
378:   idx  = a->j;
379:   v    = a->a;
380:   ii   = a->i;

382:   for (i=0; i<m; i++) {
383:     jrow  = ii[i];
384:     n     = ii[i+1] - jrow;
385:     sum1  = 0.0;
386:     sum2  = 0.0;
387:     sum3  = 0.0;

389:     nonzerorow += (n>0);
390:     for (j=0; j<n; j++) {
391:       sum1 += v[jrow]*x[3*idx[jrow]];
392:       sum2 += v[jrow]*x[3*idx[jrow]+1];
393:       sum3 += v[jrow]*x[3*idx[jrow]+2];
394:       jrow++;
395:      }
396:     y[3*i]   = sum1;
397:     y[3*i+1] = sum2;
398:     y[3*i+2] = sum3;
399:   }

401:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
402:   VecRestoreArrayRead(xx,&x);
403:   VecRestoreArray(yy,&y);
404:   return(0);
405: }

409: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
410: {
411:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
412:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
413:   const PetscScalar *x,*v;
414:   PetscScalar       *y,alpha1,alpha2,alpha3;
415:   PetscErrorCode    ierr;
416:   PetscInt          n,i;
417:   const PetscInt    m = b->AIJ->rmap->n,*idx;

420:   VecSet(yy,0.0);
421:   VecGetArrayRead(xx,&x);
422:   VecGetArray(yy,&y);

424:   for (i=0; i<m; i++) {
425:     idx    = a->j + a->i[i];
426:     v      = a->a + a->i[i];
427:     n      = a->i[i+1] - a->i[i];
428:     alpha1 = x[3*i];
429:     alpha2 = x[3*i+1];
430:     alpha3 = x[3*i+2];
431:     while (n-->0) {
432:       y[3*(*idx)]   += alpha1*(*v);
433:       y[3*(*idx)+1] += alpha2*(*v);
434:       y[3*(*idx)+2] += alpha3*(*v);
435:       idx++; v++;
436:     }
437:   }
438:   PetscLogFlops(6.0*a->nz);
439:   VecRestoreArrayRead(xx,&x);
440:   VecRestoreArray(yy,&y);
441:   return(0);
442: }

446: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
447: {
448:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
449:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
450:   const PetscScalar *x,*v;
451:   PetscScalar       *y,sum1, sum2, sum3;
452:   PetscErrorCode    ierr;
453:   PetscInt          n,i,jrow,j;
454:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

457:   if (yy != zz) {VecCopy(yy,zz);}
458:   VecGetArrayRead(xx,&x);
459:   VecGetArray(zz,&y);
460:   idx  = a->j;
461:   v    = a->a;
462:   ii   = a->i;

464:   for (i=0; i<m; i++) {
465:     jrow = ii[i];
466:     n    = ii[i+1] - jrow;
467:     sum1 = 0.0;
468:     sum2 = 0.0;
469:     sum3 = 0.0;
470:     for (j=0; j<n; j++) {
471:       sum1 += v[jrow]*x[3*idx[jrow]];
472:       sum2 += v[jrow]*x[3*idx[jrow]+1];
473:       sum3 += v[jrow]*x[3*idx[jrow]+2];
474:       jrow++;
475:     }
476:     y[3*i]   += sum1;
477:     y[3*i+1] += sum2;
478:     y[3*i+2] += sum3;
479:   }

481:   PetscLogFlops(6.0*a->nz);
482:   VecRestoreArrayRead(xx,&x);
483:   VecRestoreArray(zz,&y);
484:   return(0);
485: }
488: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
489: {
490:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
491:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
492:   const PetscScalar *x,*v;
493:   PetscScalar       *y,alpha1,alpha2,alpha3;
494:   PetscErrorCode    ierr;
495:   PetscInt          n,i;
496:   const PetscInt    m = b->AIJ->rmap->n,*idx;

499:   if (yy != zz) {VecCopy(yy,zz);}
500:   VecGetArrayRead(xx,&x);
501:   VecGetArray(zz,&y);
502:   for (i=0; i<m; i++) {
503:     idx    = a->j + a->i[i];
504:     v      = a->a + a->i[i];
505:     n      = a->i[i+1] - a->i[i];
506:     alpha1 = x[3*i];
507:     alpha2 = x[3*i+1];
508:     alpha3 = x[3*i+2];
509:     while (n-->0) {
510:       y[3*(*idx)]   += alpha1*(*v);
511:       y[3*(*idx)+1] += alpha2*(*v);
512:       y[3*(*idx)+2] += alpha3*(*v);
513:       idx++; v++;
514:     }
515:   }
516:   PetscLogFlops(6.0*a->nz);
517:   VecRestoreArrayRead(xx,&x);
518:   VecRestoreArray(zz,&y);
519:   return(0);
520: }

522: /* ------------------------------------------------------------------------------*/
525: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
526: {
527:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
528:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
529:   const PetscScalar *x,*v;
530:   PetscScalar       *y,sum1, sum2, sum3, sum4;
531:   PetscErrorCode    ierr;
532:   PetscInt          nonzerorow=0,n,i,jrow,j;
533:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

536:   VecGetArrayRead(xx,&x);
537:   VecGetArray(yy,&y);
538:   idx  = a->j;
539:   v    = a->a;
540:   ii   = a->i;

542:   for (i=0; i<m; i++) {
543:     jrow        = ii[i];
544:     n           = ii[i+1] - jrow;
545:     sum1        = 0.0;
546:     sum2        = 0.0;
547:     sum3        = 0.0;
548:     sum4        = 0.0;
549:     nonzerorow += (n>0);
550:     for (j=0; j<n; j++) {
551:       sum1 += v[jrow]*x[4*idx[jrow]];
552:       sum2 += v[jrow]*x[4*idx[jrow]+1];
553:       sum3 += v[jrow]*x[4*idx[jrow]+2];
554:       sum4 += v[jrow]*x[4*idx[jrow]+3];
555:       jrow++;
556:     }
557:     y[4*i]   = sum1;
558:     y[4*i+1] = sum2;
559:     y[4*i+2] = sum3;
560:     y[4*i+3] = sum4;
561:   }

563:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
564:   VecRestoreArrayRead(xx,&x);
565:   VecRestoreArray(yy,&y);
566:   return(0);
567: }

571: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
572: {
573:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
574:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
575:   const PetscScalar *x,*v;
576:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
577:   PetscErrorCode    ierr;
578:   PetscInt          n,i;
579:   const PetscInt    m = b->AIJ->rmap->n,*idx;

582:   VecSet(yy,0.0);
583:   VecGetArrayRead(xx,&x);
584:   VecGetArray(yy,&y);
585:   for (i=0; i<m; i++) {
586:     idx    = a->j + a->i[i];
587:     v      = a->a + a->i[i];
588:     n      = a->i[i+1] - a->i[i];
589:     alpha1 = x[4*i];
590:     alpha2 = x[4*i+1];
591:     alpha3 = x[4*i+2];
592:     alpha4 = x[4*i+3];
593:     while (n-->0) {
594:       y[4*(*idx)]   += alpha1*(*v);
595:       y[4*(*idx)+1] += alpha2*(*v);
596:       y[4*(*idx)+2] += alpha3*(*v);
597:       y[4*(*idx)+3] += alpha4*(*v);
598:       idx++; v++;
599:     }
600:   }
601:   PetscLogFlops(8.0*a->nz);
602:   VecRestoreArrayRead(xx,&x);
603:   VecRestoreArray(yy,&y);
604:   return(0);
605: }

609: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
610: {
611:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
612:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
613:   const PetscScalar *x,*v;
614:   PetscScalar       *y,sum1, sum2, sum3, sum4;
615:   PetscErrorCode    ierr;
616:   PetscInt          n,i,jrow,j;
617:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

620:   if (yy != zz) {VecCopy(yy,zz);}
621:   VecGetArrayRead(xx,&x);
622:   VecGetArray(zz,&y);
623:   idx  = a->j;
624:   v    = a->a;
625:   ii   = a->i;

627:   for (i=0; i<m; i++) {
628:     jrow = ii[i];
629:     n    = ii[i+1] - jrow;
630:     sum1 = 0.0;
631:     sum2 = 0.0;
632:     sum3 = 0.0;
633:     sum4 = 0.0;
634:     for (j=0; j<n; j++) {
635:       sum1 += v[jrow]*x[4*idx[jrow]];
636:       sum2 += v[jrow]*x[4*idx[jrow]+1];
637:       sum3 += v[jrow]*x[4*idx[jrow]+2];
638:       sum4 += v[jrow]*x[4*idx[jrow]+3];
639:       jrow++;
640:     }
641:     y[4*i]   += sum1;
642:     y[4*i+1] += sum2;
643:     y[4*i+2] += sum3;
644:     y[4*i+3] += sum4;
645:   }

647:   PetscLogFlops(8.0*a->nz);
648:   VecRestoreArrayRead(xx,&x);
649:   VecRestoreArray(zz,&y);
650:   return(0);
651: }
654: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
655: {
656:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
657:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
658:   const PetscScalar *x,*v;
659:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
660:   PetscErrorCode    ierr;
661:   PetscInt          n,i;
662:   const PetscInt    m = b->AIJ->rmap->n,*idx;

665:   if (yy != zz) {VecCopy(yy,zz);}
666:   VecGetArrayRead(xx,&x);
667:   VecGetArray(zz,&y);

669:   for (i=0; i<m; i++) {
670:     idx    = a->j + a->i[i];
671:     v      = a->a + a->i[i];
672:     n      = a->i[i+1] - a->i[i];
673:     alpha1 = x[4*i];
674:     alpha2 = x[4*i+1];
675:     alpha3 = x[4*i+2];
676:     alpha4 = x[4*i+3];
677:     while (n-->0) {
678:       y[4*(*idx)]   += alpha1*(*v);
679:       y[4*(*idx)+1] += alpha2*(*v);
680:       y[4*(*idx)+2] += alpha3*(*v);
681:       y[4*(*idx)+3] += alpha4*(*v);
682:       idx++; v++;
683:     }
684:   }
685:   PetscLogFlops(8.0*a->nz);
686:   VecRestoreArrayRead(xx,&x);
687:   VecRestoreArray(zz,&y);
688:   return(0);
689: }
690: /* ------------------------------------------------------------------------------*/

694: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
695: {
696:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
697:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
698:   const PetscScalar *x,*v;
699:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
700:   PetscErrorCode    ierr;
701:   PetscInt          nonzerorow=0,n,i,jrow,j;
702:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

705:   VecGetArrayRead(xx,&x);
706:   VecGetArray(yy,&y);
707:   idx  = a->j;
708:   v    = a->a;
709:   ii   = a->i;

711:   for (i=0; i<m; i++) {
712:     jrow  = ii[i];
713:     n     = ii[i+1] - jrow;
714:     sum1  = 0.0;
715:     sum2  = 0.0;
716:     sum3  = 0.0;
717:     sum4  = 0.0;
718:     sum5  = 0.0;

720:     nonzerorow += (n>0);
721:     for (j=0; j<n; j++) {
722:       sum1 += v[jrow]*x[5*idx[jrow]];
723:       sum2 += v[jrow]*x[5*idx[jrow]+1];
724:       sum3 += v[jrow]*x[5*idx[jrow]+2];
725:       sum4 += v[jrow]*x[5*idx[jrow]+3];
726:       sum5 += v[jrow]*x[5*idx[jrow]+4];
727:       jrow++;
728:     }
729:     y[5*i]   = sum1;
730:     y[5*i+1] = sum2;
731:     y[5*i+2] = sum3;
732:     y[5*i+3] = sum4;
733:     y[5*i+4] = sum5;
734:   }

736:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
737:   VecRestoreArrayRead(xx,&x);
738:   VecRestoreArray(yy,&y);
739:   return(0);
740: }

744: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
745: {
746:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
747:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
748:   const PetscScalar *x,*v;
749:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
750:   PetscErrorCode    ierr;
751:   PetscInt          n,i;
752:   const PetscInt    m = b->AIJ->rmap->n,*idx;

755:   VecSet(yy,0.0);
756:   VecGetArrayRead(xx,&x);
757:   VecGetArray(yy,&y);

759:   for (i=0; i<m; i++) {
760:     idx    = a->j + a->i[i];
761:     v      = a->a + a->i[i];
762:     n      = a->i[i+1] - a->i[i];
763:     alpha1 = x[5*i];
764:     alpha2 = x[5*i+1];
765:     alpha3 = x[5*i+2];
766:     alpha4 = x[5*i+3];
767:     alpha5 = x[5*i+4];
768:     while (n-->0) {
769:       y[5*(*idx)]   += alpha1*(*v);
770:       y[5*(*idx)+1] += alpha2*(*v);
771:       y[5*(*idx)+2] += alpha3*(*v);
772:       y[5*(*idx)+3] += alpha4*(*v);
773:       y[5*(*idx)+4] += alpha5*(*v);
774:       idx++; v++;
775:     }
776:   }
777:   PetscLogFlops(10.0*a->nz);
778:   VecRestoreArrayRead(xx,&x);
779:   VecRestoreArray(yy,&y);
780:   return(0);
781: }

785: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
786: {
787:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
788:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
789:   const PetscScalar *x,*v;
790:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
791:   PetscErrorCode    ierr;
792:   PetscInt          n,i,jrow,j;
793:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

796:   if (yy != zz) {VecCopy(yy,zz);}
797:   VecGetArrayRead(xx,&x);
798:   VecGetArray(zz,&y);
799:   idx  = a->j;
800:   v    = a->a;
801:   ii   = a->i;

803:   for (i=0; i<m; i++) {
804:     jrow = ii[i];
805:     n    = ii[i+1] - jrow;
806:     sum1 = 0.0;
807:     sum2 = 0.0;
808:     sum3 = 0.0;
809:     sum4 = 0.0;
810:     sum5 = 0.0;
811:     for (j=0; j<n; j++) {
812:       sum1 += v[jrow]*x[5*idx[jrow]];
813:       sum2 += v[jrow]*x[5*idx[jrow]+1];
814:       sum3 += v[jrow]*x[5*idx[jrow]+2];
815:       sum4 += v[jrow]*x[5*idx[jrow]+3];
816:       sum5 += v[jrow]*x[5*idx[jrow]+4];
817:       jrow++;
818:     }
819:     y[5*i]   += sum1;
820:     y[5*i+1] += sum2;
821:     y[5*i+2] += sum3;
822:     y[5*i+3] += sum4;
823:     y[5*i+4] += sum5;
824:   }

826:   PetscLogFlops(10.0*a->nz);
827:   VecRestoreArrayRead(xx,&x);
828:   VecRestoreArray(zz,&y);
829:   return(0);
830: }

834: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
835: {
836:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
837:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
838:   const PetscScalar *x,*v;
839:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
840:   PetscErrorCode    ierr;
841:   PetscInt          n,i;
842:   const PetscInt    m = b->AIJ->rmap->n,*idx;

845:   if (yy != zz) {VecCopy(yy,zz);}
846:   VecGetArrayRead(xx,&x);
847:   VecGetArray(zz,&y);

849:   for (i=0; i<m; i++) {
850:     idx    = a->j + a->i[i];
851:     v      = a->a + a->i[i];
852:     n      = a->i[i+1] - a->i[i];
853:     alpha1 = x[5*i];
854:     alpha2 = x[5*i+1];
855:     alpha3 = x[5*i+2];
856:     alpha4 = x[5*i+3];
857:     alpha5 = x[5*i+4];
858:     while (n-->0) {
859:       y[5*(*idx)]   += alpha1*(*v);
860:       y[5*(*idx)+1] += alpha2*(*v);
861:       y[5*(*idx)+2] += alpha3*(*v);
862:       y[5*(*idx)+3] += alpha4*(*v);
863:       y[5*(*idx)+4] += alpha5*(*v);
864:       idx++; v++;
865:     }
866:   }
867:   PetscLogFlops(10.0*a->nz);
868:   VecRestoreArrayRead(xx,&x);
869:   VecRestoreArray(zz,&y);
870:   return(0);
871: }

873: /* ------------------------------------------------------------------------------*/
876: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
877: {
878:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
879:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
880:   const PetscScalar *x,*v;
881:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
882:   PetscErrorCode    ierr;
883:   PetscInt          nonzerorow=0,n,i,jrow,j;
884:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

887:   VecGetArrayRead(xx,&x);
888:   VecGetArray(yy,&y);
889:   idx  = a->j;
890:   v    = a->a;
891:   ii   = a->i;

893:   for (i=0; i<m; i++) {
894:     jrow  = ii[i];
895:     n     = ii[i+1] - jrow;
896:     sum1  = 0.0;
897:     sum2  = 0.0;
898:     sum3  = 0.0;
899:     sum4  = 0.0;
900:     sum5  = 0.0;
901:     sum6  = 0.0;

903:     nonzerorow += (n>0);
904:     for (j=0; j<n; j++) {
905:       sum1 += v[jrow]*x[6*idx[jrow]];
906:       sum2 += v[jrow]*x[6*idx[jrow]+1];
907:       sum3 += v[jrow]*x[6*idx[jrow]+2];
908:       sum4 += v[jrow]*x[6*idx[jrow]+3];
909:       sum5 += v[jrow]*x[6*idx[jrow]+4];
910:       sum6 += v[jrow]*x[6*idx[jrow]+5];
911:       jrow++;
912:     }
913:     y[6*i]   = sum1;
914:     y[6*i+1] = sum2;
915:     y[6*i+2] = sum3;
916:     y[6*i+3] = sum4;
917:     y[6*i+4] = sum5;
918:     y[6*i+5] = sum6;
919:   }

921:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
922:   VecRestoreArrayRead(xx,&x);
923:   VecRestoreArray(yy,&y);
924:   return(0);
925: }

929: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
930: {
931:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
932:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
933:   const PetscScalar *x,*v;
934:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
935:   PetscErrorCode    ierr;
936:   PetscInt          n,i;
937:   const PetscInt    m = b->AIJ->rmap->n,*idx;

940:   VecSet(yy,0.0);
941:   VecGetArrayRead(xx,&x);
942:   VecGetArray(yy,&y);

944:   for (i=0; i<m; i++) {
945:     idx    = a->j + a->i[i];
946:     v      = a->a + a->i[i];
947:     n      = a->i[i+1] - a->i[i];
948:     alpha1 = x[6*i];
949:     alpha2 = x[6*i+1];
950:     alpha3 = x[6*i+2];
951:     alpha4 = x[6*i+3];
952:     alpha5 = x[6*i+4];
953:     alpha6 = x[6*i+5];
954:     while (n-->0) {
955:       y[6*(*idx)]   += alpha1*(*v);
956:       y[6*(*idx)+1] += alpha2*(*v);
957:       y[6*(*idx)+2] += alpha3*(*v);
958:       y[6*(*idx)+3] += alpha4*(*v);
959:       y[6*(*idx)+4] += alpha5*(*v);
960:       y[6*(*idx)+5] += alpha6*(*v);
961:       idx++; v++;
962:     }
963:   }
964:   PetscLogFlops(12.0*a->nz);
965:   VecRestoreArrayRead(xx,&x);
966:   VecRestoreArray(yy,&y);
967:   return(0);
968: }

972: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
973: {
974:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
975:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
976:   const PetscScalar *x,*v;
977:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
978:   PetscErrorCode    ierr;
979:   PetscInt          n,i,jrow,j;
980:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

983:   if (yy != zz) {VecCopy(yy,zz);}
984:   VecGetArrayRead(xx,&x);
985:   VecGetArray(zz,&y);
986:   idx  = a->j;
987:   v    = a->a;
988:   ii   = a->i;

990:   for (i=0; i<m; i++) {
991:     jrow = ii[i];
992:     n    = ii[i+1] - jrow;
993:     sum1 = 0.0;
994:     sum2 = 0.0;
995:     sum3 = 0.0;
996:     sum4 = 0.0;
997:     sum5 = 0.0;
998:     sum6 = 0.0;
999:     for (j=0; j<n; j++) {
1000:       sum1 += v[jrow]*x[6*idx[jrow]];
1001:       sum2 += v[jrow]*x[6*idx[jrow]+1];
1002:       sum3 += v[jrow]*x[6*idx[jrow]+2];
1003:       sum4 += v[jrow]*x[6*idx[jrow]+3];
1004:       sum5 += v[jrow]*x[6*idx[jrow]+4];
1005:       sum6 += v[jrow]*x[6*idx[jrow]+5];
1006:       jrow++;
1007:     }
1008:     y[6*i]   += sum1;
1009:     y[6*i+1] += sum2;
1010:     y[6*i+2] += sum3;
1011:     y[6*i+3] += sum4;
1012:     y[6*i+4] += sum5;
1013:     y[6*i+5] += sum6;
1014:   }

1016:   PetscLogFlops(12.0*a->nz);
1017:   VecRestoreArrayRead(xx,&x);
1018:   VecRestoreArray(zz,&y);
1019:   return(0);
1020: }

1024: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1025: {
1026:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1027:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1028:   const PetscScalar *x,*v;
1029:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1030:   PetscErrorCode    ierr;
1031:   PetscInt          n,i;
1032:   const PetscInt    m = b->AIJ->rmap->n,*idx;

1035:   if (yy != zz) {VecCopy(yy,zz);}
1036:   VecGetArrayRead(xx,&x);
1037:   VecGetArray(zz,&y);

1039:   for (i=0; i<m; i++) {
1040:     idx    = a->j + a->i[i];
1041:     v      = a->a + a->i[i];
1042:     n      = a->i[i+1] - a->i[i];
1043:     alpha1 = x[6*i];
1044:     alpha2 = x[6*i+1];
1045:     alpha3 = x[6*i+2];
1046:     alpha4 = x[6*i+3];
1047:     alpha5 = x[6*i+4];
1048:     alpha6 = x[6*i+5];
1049:     while (n-->0) {
1050:       y[6*(*idx)]   += alpha1*(*v);
1051:       y[6*(*idx)+1] += alpha2*(*v);
1052:       y[6*(*idx)+2] += alpha3*(*v);
1053:       y[6*(*idx)+3] += alpha4*(*v);
1054:       y[6*(*idx)+4] += alpha5*(*v);
1055:       y[6*(*idx)+5] += alpha6*(*v);
1056:       idx++; v++;
1057:     }
1058:   }
1059:   PetscLogFlops(12.0*a->nz);
1060:   VecRestoreArrayRead(xx,&x);
1061:   VecRestoreArray(zz,&y);
1062:   return(0);
1063: }

1065: /* ------------------------------------------------------------------------------*/
1068: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1069: {
1070:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1071:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1072:   const PetscScalar *x,*v;
1073:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1074:   PetscErrorCode    ierr;
1075:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1076:   PetscInt          nonzerorow=0,n,i,jrow,j;

1079:   VecGetArrayRead(xx,&x);
1080:   VecGetArray(yy,&y);
1081:   idx  = a->j;
1082:   v    = a->a;
1083:   ii   = a->i;

1085:   for (i=0; i<m; i++) {
1086:     jrow  = ii[i];
1087:     n     = ii[i+1] - jrow;
1088:     sum1  = 0.0;
1089:     sum2  = 0.0;
1090:     sum3  = 0.0;
1091:     sum4  = 0.0;
1092:     sum5  = 0.0;
1093:     sum6  = 0.0;
1094:     sum7  = 0.0;

1096:     nonzerorow += (n>0);
1097:     for (j=0; j<n; j++) {
1098:       sum1 += v[jrow]*x[7*idx[jrow]];
1099:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1100:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1101:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1102:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1103:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1104:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1105:       jrow++;
1106:     }
1107:     y[7*i]   = sum1;
1108:     y[7*i+1] = sum2;
1109:     y[7*i+2] = sum3;
1110:     y[7*i+3] = sum4;
1111:     y[7*i+4] = sum5;
1112:     y[7*i+5] = sum6;
1113:     y[7*i+6] = sum7;
1114:   }

1116:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1117:   VecRestoreArrayRead(xx,&x);
1118:   VecRestoreArray(yy,&y);
1119:   return(0);
1120: }

1124: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1125: {
1126:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1127:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1128:   const PetscScalar *x,*v;
1129:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1130:   PetscErrorCode    ierr;
1131:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1132:   PetscInt          n,i;

1135:   VecSet(yy,0.0);
1136:   VecGetArrayRead(xx,&x);
1137:   VecGetArray(yy,&y);

1139:   for (i=0; i<m; i++) {
1140:     idx    = a->j + a->i[i];
1141:     v      = a->a + a->i[i];
1142:     n      = a->i[i+1] - a->i[i];
1143:     alpha1 = x[7*i];
1144:     alpha2 = x[7*i+1];
1145:     alpha3 = x[7*i+2];
1146:     alpha4 = x[7*i+3];
1147:     alpha5 = x[7*i+4];
1148:     alpha6 = x[7*i+5];
1149:     alpha7 = x[7*i+6];
1150:     while (n-->0) {
1151:       y[7*(*idx)]   += alpha1*(*v);
1152:       y[7*(*idx)+1] += alpha2*(*v);
1153:       y[7*(*idx)+2] += alpha3*(*v);
1154:       y[7*(*idx)+3] += alpha4*(*v);
1155:       y[7*(*idx)+4] += alpha5*(*v);
1156:       y[7*(*idx)+5] += alpha6*(*v);
1157:       y[7*(*idx)+6] += alpha7*(*v);
1158:       idx++; v++;
1159:     }
1160:   }
1161:   PetscLogFlops(14.0*a->nz);
1162:   VecRestoreArrayRead(xx,&x);
1163:   VecRestoreArray(yy,&y);
1164:   return(0);
1165: }

1169: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1170: {
1171:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1172:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1173:   const PetscScalar *x,*v;
1174:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1175:   PetscErrorCode    ierr;
1176:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1177:   PetscInt          n,i,jrow,j;

1180:   if (yy != zz) {VecCopy(yy,zz);}
1181:   VecGetArrayRead(xx,&x);
1182:   VecGetArray(zz,&y);
1183:   idx  = a->j;
1184:   v    = a->a;
1185:   ii   = a->i;

1187:   for (i=0; i<m; i++) {
1188:     jrow = ii[i];
1189:     n    = ii[i+1] - jrow;
1190:     sum1 = 0.0;
1191:     sum2 = 0.0;
1192:     sum3 = 0.0;
1193:     sum4 = 0.0;
1194:     sum5 = 0.0;
1195:     sum6 = 0.0;
1196:     sum7 = 0.0;
1197:     for (j=0; j<n; j++) {
1198:       sum1 += v[jrow]*x[7*idx[jrow]];
1199:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1200:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1201:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1202:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1203:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1204:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1205:       jrow++;
1206:     }
1207:     y[7*i]   += sum1;
1208:     y[7*i+1] += sum2;
1209:     y[7*i+2] += sum3;
1210:     y[7*i+3] += sum4;
1211:     y[7*i+4] += sum5;
1212:     y[7*i+5] += sum6;
1213:     y[7*i+6] += sum7;
1214:   }

1216:   PetscLogFlops(14.0*a->nz);
1217:   VecRestoreArrayRead(xx,&x);
1218:   VecRestoreArray(zz,&y);
1219:   return(0);
1220: }

1224: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1225: {
1226:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1227:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1228:   const PetscScalar *x,*v;
1229:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1230:   PetscErrorCode    ierr;
1231:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1232:   PetscInt          n,i;

1235:   if (yy != zz) {VecCopy(yy,zz);}
1236:   VecGetArrayRead(xx,&x);
1237:   VecGetArray(zz,&y);
1238:   for (i=0; i<m; i++) {
1239:     idx    = a->j + a->i[i];
1240:     v      = a->a + a->i[i];
1241:     n      = a->i[i+1] - a->i[i];
1242:     alpha1 = x[7*i];
1243:     alpha2 = x[7*i+1];
1244:     alpha3 = x[7*i+2];
1245:     alpha4 = x[7*i+3];
1246:     alpha5 = x[7*i+4];
1247:     alpha6 = x[7*i+5];
1248:     alpha7 = x[7*i+6];
1249:     while (n-->0) {
1250:       y[7*(*idx)]   += alpha1*(*v);
1251:       y[7*(*idx)+1] += alpha2*(*v);
1252:       y[7*(*idx)+2] += alpha3*(*v);
1253:       y[7*(*idx)+3] += alpha4*(*v);
1254:       y[7*(*idx)+4] += alpha5*(*v);
1255:       y[7*(*idx)+5] += alpha6*(*v);
1256:       y[7*(*idx)+6] += alpha7*(*v);
1257:       idx++; v++;
1258:     }
1259:   }
1260:   PetscLogFlops(14.0*a->nz);
1261:   VecRestoreArrayRead(xx,&x);
1262:   VecRestoreArray(zz,&y);
1263:   return(0);
1264: }

1268: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1269: {
1270:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1271:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1272:   const PetscScalar *x,*v;
1273:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1274:   PetscErrorCode    ierr;
1275:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1276:   PetscInt          nonzerorow=0,n,i,jrow,j;

1279:   VecGetArrayRead(xx,&x);
1280:   VecGetArray(yy,&y);
1281:   idx  = a->j;
1282:   v    = a->a;
1283:   ii   = a->i;

1285:   for (i=0; i<m; i++) {
1286:     jrow  = ii[i];
1287:     n     = ii[i+1] - jrow;
1288:     sum1  = 0.0;
1289:     sum2  = 0.0;
1290:     sum3  = 0.0;
1291:     sum4  = 0.0;
1292:     sum5  = 0.0;
1293:     sum6  = 0.0;
1294:     sum7  = 0.0;
1295:     sum8  = 0.0;

1297:     nonzerorow += (n>0);
1298:     for (j=0; j<n; j++) {
1299:       sum1 += v[jrow]*x[8*idx[jrow]];
1300:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1301:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1302:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1303:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1304:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1305:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1306:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1307:       jrow++;
1308:     }
1309:     y[8*i]   = sum1;
1310:     y[8*i+1] = sum2;
1311:     y[8*i+2] = sum3;
1312:     y[8*i+3] = sum4;
1313:     y[8*i+4] = sum5;
1314:     y[8*i+5] = sum6;
1315:     y[8*i+6] = sum7;
1316:     y[8*i+7] = sum8;
1317:   }

1319:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1320:   VecRestoreArrayRead(xx,&x);
1321:   VecRestoreArray(yy,&y);
1322:   return(0);
1323: }

1327: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1328: {
1329:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1330:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1331:   const PetscScalar *x,*v;
1332:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1333:   PetscErrorCode    ierr;
1334:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1335:   PetscInt          n,i;

1338:   VecSet(yy,0.0);
1339:   VecGetArrayRead(xx,&x);
1340:   VecGetArray(yy,&y);

1342:   for (i=0; i<m; i++) {
1343:     idx    = a->j + a->i[i];
1344:     v      = a->a + a->i[i];
1345:     n      = a->i[i+1] - a->i[i];
1346:     alpha1 = x[8*i];
1347:     alpha2 = x[8*i+1];
1348:     alpha3 = x[8*i+2];
1349:     alpha4 = x[8*i+3];
1350:     alpha5 = x[8*i+4];
1351:     alpha6 = x[8*i+5];
1352:     alpha7 = x[8*i+6];
1353:     alpha8 = x[8*i+7];
1354:     while (n-->0) {
1355:       y[8*(*idx)]   += alpha1*(*v);
1356:       y[8*(*idx)+1] += alpha2*(*v);
1357:       y[8*(*idx)+2] += alpha3*(*v);
1358:       y[8*(*idx)+3] += alpha4*(*v);
1359:       y[8*(*idx)+4] += alpha5*(*v);
1360:       y[8*(*idx)+5] += alpha6*(*v);
1361:       y[8*(*idx)+6] += alpha7*(*v);
1362:       y[8*(*idx)+7] += alpha8*(*v);
1363:       idx++; v++;
1364:     }
1365:   }
1366:   PetscLogFlops(16.0*a->nz);
1367:   VecRestoreArrayRead(xx,&x);
1368:   VecRestoreArray(yy,&y);
1369:   return(0);
1370: }

1374: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1375: {
1376:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1377:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1378:   const PetscScalar *x,*v;
1379:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1380:   PetscErrorCode    ierr;
1381:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1382:   PetscInt          n,i,jrow,j;

1385:   if (yy != zz) {VecCopy(yy,zz);}
1386:   VecGetArrayRead(xx,&x);
1387:   VecGetArray(zz,&y);
1388:   idx  = a->j;
1389:   v    = a->a;
1390:   ii   = a->i;

1392:   for (i=0; i<m; i++) {
1393:     jrow = ii[i];
1394:     n    = ii[i+1] - jrow;
1395:     sum1 = 0.0;
1396:     sum2 = 0.0;
1397:     sum3 = 0.0;
1398:     sum4 = 0.0;
1399:     sum5 = 0.0;
1400:     sum6 = 0.0;
1401:     sum7 = 0.0;
1402:     sum8 = 0.0;
1403:     for (j=0; j<n; j++) {
1404:       sum1 += v[jrow]*x[8*idx[jrow]];
1405:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1406:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1407:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1408:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1409:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1410:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1411:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1412:       jrow++;
1413:     }
1414:     y[8*i]   += sum1;
1415:     y[8*i+1] += sum2;
1416:     y[8*i+2] += sum3;
1417:     y[8*i+3] += sum4;
1418:     y[8*i+4] += sum5;
1419:     y[8*i+5] += sum6;
1420:     y[8*i+6] += sum7;
1421:     y[8*i+7] += sum8;
1422:   }

1424:   PetscLogFlops(16.0*a->nz);
1425:   VecRestoreArrayRead(xx,&x);
1426:   VecRestoreArray(zz,&y);
1427:   return(0);
1428: }

1432: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1433: {
1434:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1435:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1436:   const PetscScalar *x,*v;
1437:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1438:   PetscErrorCode    ierr;
1439:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1440:   PetscInt          n,i;

1443:   if (yy != zz) {VecCopy(yy,zz);}
1444:   VecGetArrayRead(xx,&x);
1445:   VecGetArray(zz,&y);
1446:   for (i=0; i<m; i++) {
1447:     idx    = a->j + a->i[i];
1448:     v      = a->a + a->i[i];
1449:     n      = a->i[i+1] - a->i[i];
1450:     alpha1 = x[8*i];
1451:     alpha2 = x[8*i+1];
1452:     alpha3 = x[8*i+2];
1453:     alpha4 = x[8*i+3];
1454:     alpha5 = x[8*i+4];
1455:     alpha6 = x[8*i+5];
1456:     alpha7 = x[8*i+6];
1457:     alpha8 = x[8*i+7];
1458:     while (n-->0) {
1459:       y[8*(*idx)]   += alpha1*(*v);
1460:       y[8*(*idx)+1] += alpha2*(*v);
1461:       y[8*(*idx)+2] += alpha3*(*v);
1462:       y[8*(*idx)+3] += alpha4*(*v);
1463:       y[8*(*idx)+4] += alpha5*(*v);
1464:       y[8*(*idx)+5] += alpha6*(*v);
1465:       y[8*(*idx)+6] += alpha7*(*v);
1466:       y[8*(*idx)+7] += alpha8*(*v);
1467:       idx++; v++;
1468:     }
1469:   }
1470:   PetscLogFlops(16.0*a->nz);
1471:   VecRestoreArrayRead(xx,&x);
1472:   VecRestoreArray(zz,&y);
1473:   return(0);
1474: }

1476: /* ------------------------------------------------------------------------------*/
1479: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1480: {
1481:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1482:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1483:   const PetscScalar *x,*v;
1484:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1485:   PetscErrorCode    ierr;
1486:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1487:   PetscInt          nonzerorow=0,n,i,jrow,j;

1490:   VecGetArrayRead(xx,&x);
1491:   VecGetArray(yy,&y);
1492:   idx  = a->j;
1493:   v    = a->a;
1494:   ii   = a->i;

1496:   for (i=0; i<m; i++) {
1497:     jrow  = ii[i];
1498:     n     = ii[i+1] - jrow;
1499:     sum1  = 0.0;
1500:     sum2  = 0.0;
1501:     sum3  = 0.0;
1502:     sum4  = 0.0;
1503:     sum5  = 0.0;
1504:     sum6  = 0.0;
1505:     sum7  = 0.0;
1506:     sum8  = 0.0;
1507:     sum9  = 0.0;

1509:     nonzerorow += (n>0);
1510:     for (j=0; j<n; j++) {
1511:       sum1 += v[jrow]*x[9*idx[jrow]];
1512:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1513:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1514:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1515:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1516:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1517:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1518:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1519:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1520:       jrow++;
1521:     }
1522:     y[9*i]   = sum1;
1523:     y[9*i+1] = sum2;
1524:     y[9*i+2] = sum3;
1525:     y[9*i+3] = sum4;
1526:     y[9*i+4] = sum5;
1527:     y[9*i+5] = sum6;
1528:     y[9*i+6] = sum7;
1529:     y[9*i+7] = sum8;
1530:     y[9*i+8] = sum9;
1531:   }

1533:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1534:   VecRestoreArrayRead(xx,&x);
1535:   VecRestoreArray(yy,&y);
1536:   return(0);
1537: }

1539: /* ------------------------------------------------------------------------------*/

1543: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1544: {
1545:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1546:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1547:   const PetscScalar *x,*v;
1548:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549:   PetscErrorCode    ierr;
1550:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1551:   PetscInt          n,i;

1554:   VecSet(yy,0.0);
1555:   VecGetArrayRead(xx,&x);
1556:   VecGetArray(yy,&y);

1558:   for (i=0; i<m; i++) {
1559:     idx    = a->j + a->i[i];
1560:     v      = a->a + a->i[i];
1561:     n      = a->i[i+1] - a->i[i];
1562:     alpha1 = x[9*i];
1563:     alpha2 = x[9*i+1];
1564:     alpha3 = x[9*i+2];
1565:     alpha4 = x[9*i+3];
1566:     alpha5 = x[9*i+4];
1567:     alpha6 = x[9*i+5];
1568:     alpha7 = x[9*i+6];
1569:     alpha8 = x[9*i+7];
1570:     alpha9 = x[9*i+8];
1571:     while (n-->0) {
1572:       y[9*(*idx)]   += alpha1*(*v);
1573:       y[9*(*idx)+1] += alpha2*(*v);
1574:       y[9*(*idx)+2] += alpha3*(*v);
1575:       y[9*(*idx)+3] += alpha4*(*v);
1576:       y[9*(*idx)+4] += alpha5*(*v);
1577:       y[9*(*idx)+5] += alpha6*(*v);
1578:       y[9*(*idx)+6] += alpha7*(*v);
1579:       y[9*(*idx)+7] += alpha8*(*v);
1580:       y[9*(*idx)+8] += alpha9*(*v);
1581:       idx++; v++;
1582:     }
1583:   }
1584:   PetscLogFlops(18.0*a->nz);
1585:   VecRestoreArrayRead(xx,&x);
1586:   VecRestoreArray(yy,&y);
1587:   return(0);
1588: }

1592: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1593: {
1594:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1595:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1596:   const PetscScalar *x,*v;
1597:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1598:   PetscErrorCode    ierr;
1599:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1600:   PetscInt          n,i,jrow,j;

1603:   if (yy != zz) {VecCopy(yy,zz);}
1604:   VecGetArrayRead(xx,&x);
1605:   VecGetArray(zz,&y);
1606:   idx  = a->j;
1607:   v    = a->a;
1608:   ii   = a->i;

1610:   for (i=0; i<m; i++) {
1611:     jrow = ii[i];
1612:     n    = ii[i+1] - jrow;
1613:     sum1 = 0.0;
1614:     sum2 = 0.0;
1615:     sum3 = 0.0;
1616:     sum4 = 0.0;
1617:     sum5 = 0.0;
1618:     sum6 = 0.0;
1619:     sum7 = 0.0;
1620:     sum8 = 0.0;
1621:     sum9 = 0.0;
1622:     for (j=0; j<n; j++) {
1623:       sum1 += v[jrow]*x[9*idx[jrow]];
1624:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1625:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1626:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1627:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1628:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1629:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1630:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1631:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1632:       jrow++;
1633:     }
1634:     y[9*i]   += sum1;
1635:     y[9*i+1] += sum2;
1636:     y[9*i+2] += sum3;
1637:     y[9*i+3] += sum4;
1638:     y[9*i+4] += sum5;
1639:     y[9*i+5] += sum6;
1640:     y[9*i+6] += sum7;
1641:     y[9*i+7] += sum8;
1642:     y[9*i+8] += sum9;
1643:   }

1645:   PetscLogFlops(18*a->nz);
1646:   VecRestoreArrayRead(xx,&x);
1647:   VecRestoreArray(zz,&y);
1648:   return(0);
1649: }

1653: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1654: {
1655:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1656:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1657:   const PetscScalar *x,*v;
1658:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1659:   PetscErrorCode    ierr;
1660:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1661:   PetscInt          n,i;

1664:   if (yy != zz) {VecCopy(yy,zz);}
1665:   VecGetArrayRead(xx,&x);
1666:   VecGetArray(zz,&y);
1667:   for (i=0; i<m; i++) {
1668:     idx    = a->j + a->i[i];
1669:     v      = a->a + a->i[i];
1670:     n      = a->i[i+1] - a->i[i];
1671:     alpha1 = x[9*i];
1672:     alpha2 = x[9*i+1];
1673:     alpha3 = x[9*i+2];
1674:     alpha4 = x[9*i+3];
1675:     alpha5 = x[9*i+4];
1676:     alpha6 = x[9*i+5];
1677:     alpha7 = x[9*i+6];
1678:     alpha8 = x[9*i+7];
1679:     alpha9 = x[9*i+8];
1680:     while (n-->0) {
1681:       y[9*(*idx)]   += alpha1*(*v);
1682:       y[9*(*idx)+1] += alpha2*(*v);
1683:       y[9*(*idx)+2] += alpha3*(*v);
1684:       y[9*(*idx)+3] += alpha4*(*v);
1685:       y[9*(*idx)+4] += alpha5*(*v);
1686:       y[9*(*idx)+5] += alpha6*(*v);
1687:       y[9*(*idx)+6] += alpha7*(*v);
1688:       y[9*(*idx)+7] += alpha8*(*v);
1689:       y[9*(*idx)+8] += alpha9*(*v);
1690:       idx++; v++;
1691:     }
1692:   }
1693:   PetscLogFlops(18.0*a->nz);
1694:   VecRestoreArrayRead(xx,&x);
1695:   VecRestoreArray(zz,&y);
1696:   return(0);
1697: }
1700: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1701: {
1702:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1703:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1704:   const PetscScalar *x,*v;
1705:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1706:   PetscErrorCode    ierr;
1707:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1708:   PetscInt          nonzerorow=0,n,i,jrow,j;

1711:   VecGetArrayRead(xx,&x);
1712:   VecGetArray(yy,&y);
1713:   idx  = a->j;
1714:   v    = a->a;
1715:   ii   = a->i;

1717:   for (i=0; i<m; i++) {
1718:     jrow  = ii[i];
1719:     n     = ii[i+1] - jrow;
1720:     sum1  = 0.0;
1721:     sum2  = 0.0;
1722:     sum3  = 0.0;
1723:     sum4  = 0.0;
1724:     sum5  = 0.0;
1725:     sum6  = 0.0;
1726:     sum7  = 0.0;
1727:     sum8  = 0.0;
1728:     sum9  = 0.0;
1729:     sum10 = 0.0;

1731:     nonzerorow += (n>0);
1732:     for (j=0; j<n; j++) {
1733:       sum1  += v[jrow]*x[10*idx[jrow]];
1734:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1735:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1736:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1737:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1738:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1739:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1740:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1741:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1742:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1743:       jrow++;
1744:     }
1745:     y[10*i]   = sum1;
1746:     y[10*i+1] = sum2;
1747:     y[10*i+2] = sum3;
1748:     y[10*i+3] = sum4;
1749:     y[10*i+4] = sum5;
1750:     y[10*i+5] = sum6;
1751:     y[10*i+6] = sum7;
1752:     y[10*i+7] = sum8;
1753:     y[10*i+8] = sum9;
1754:     y[10*i+9] = sum10;
1755:   }

1757:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1758:   VecRestoreArrayRead(xx,&x);
1759:   VecRestoreArray(yy,&y);
1760:   return(0);
1761: }

1765: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1766: {
1767:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1768:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1769:   const PetscScalar *x,*v;
1770:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1771:   PetscErrorCode    ierr;
1772:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1773:   PetscInt          n,i,jrow,j;

1776:   if (yy != zz) {VecCopy(yy,zz);}
1777:   VecGetArrayRead(xx,&x);
1778:   VecGetArray(zz,&y);
1779:   idx  = a->j;
1780:   v    = a->a;
1781:   ii   = a->i;

1783:   for (i=0; i<m; i++) {
1784:     jrow  = ii[i];
1785:     n     = ii[i+1] - jrow;
1786:     sum1  = 0.0;
1787:     sum2  = 0.0;
1788:     sum3  = 0.0;
1789:     sum4  = 0.0;
1790:     sum5  = 0.0;
1791:     sum6  = 0.0;
1792:     sum7  = 0.0;
1793:     sum8  = 0.0;
1794:     sum9  = 0.0;
1795:     sum10 = 0.0;
1796:     for (j=0; j<n; j++) {
1797:       sum1  += v[jrow]*x[10*idx[jrow]];
1798:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1799:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1800:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1801:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1802:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1803:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1804:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1805:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1806:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1807:       jrow++;
1808:     }
1809:     y[10*i]   += sum1;
1810:     y[10*i+1] += sum2;
1811:     y[10*i+2] += sum3;
1812:     y[10*i+3] += sum4;
1813:     y[10*i+4] += sum5;
1814:     y[10*i+5] += sum6;
1815:     y[10*i+6] += sum7;
1816:     y[10*i+7] += sum8;
1817:     y[10*i+8] += sum9;
1818:     y[10*i+9] += sum10;
1819:   }

1821:   PetscLogFlops(20.0*a->nz);
1822:   VecRestoreArrayRead(xx,&x);
1823:   VecRestoreArray(yy,&y);
1824:   return(0);
1825: }

1829: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1830: {
1831:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1832:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1833:   const PetscScalar *x,*v;
1834:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1835:   PetscErrorCode    ierr;
1836:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1837:   PetscInt          n,i;

1840:   VecSet(yy,0.0);
1841:   VecGetArrayRead(xx,&x);
1842:   VecGetArray(yy,&y);

1844:   for (i=0; i<m; i++) {
1845:     idx     = a->j + a->i[i];
1846:     v       = a->a + a->i[i];
1847:     n       = a->i[i+1] - a->i[i];
1848:     alpha1  = x[10*i];
1849:     alpha2  = x[10*i+1];
1850:     alpha3  = x[10*i+2];
1851:     alpha4  = x[10*i+3];
1852:     alpha5  = x[10*i+4];
1853:     alpha6  = x[10*i+5];
1854:     alpha7  = x[10*i+6];
1855:     alpha8  = x[10*i+7];
1856:     alpha9  = x[10*i+8];
1857:     alpha10 = x[10*i+9];
1858:     while (n-->0) {
1859:       y[10*(*idx)]   += alpha1*(*v);
1860:       y[10*(*idx)+1] += alpha2*(*v);
1861:       y[10*(*idx)+2] += alpha3*(*v);
1862:       y[10*(*idx)+3] += alpha4*(*v);
1863:       y[10*(*idx)+4] += alpha5*(*v);
1864:       y[10*(*idx)+5] += alpha6*(*v);
1865:       y[10*(*idx)+6] += alpha7*(*v);
1866:       y[10*(*idx)+7] += alpha8*(*v);
1867:       y[10*(*idx)+8] += alpha9*(*v);
1868:       y[10*(*idx)+9] += alpha10*(*v);
1869:       idx++; v++;
1870:     }
1871:   }
1872:   PetscLogFlops(20.0*a->nz);
1873:   VecRestoreArrayRead(xx,&x);
1874:   VecRestoreArray(yy,&y);
1875:   return(0);
1876: }

1880: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1881: {
1882:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1883:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1884:   const PetscScalar *x,*v;
1885:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1886:   PetscErrorCode    ierr;
1887:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1888:   PetscInt          n,i;

1891:   if (yy != zz) {VecCopy(yy,zz);}
1892:   VecGetArrayRead(xx,&x);
1893:   VecGetArray(zz,&y);
1894:   for (i=0; i<m; i++) {
1895:     idx     = a->j + a->i[i];
1896:     v       = a->a + a->i[i];
1897:     n       = a->i[i+1] - a->i[i];
1898:     alpha1  = x[10*i];
1899:     alpha2  = x[10*i+1];
1900:     alpha3  = x[10*i+2];
1901:     alpha4  = x[10*i+3];
1902:     alpha5  = x[10*i+4];
1903:     alpha6  = x[10*i+5];
1904:     alpha7  = x[10*i+6];
1905:     alpha8  = x[10*i+7];
1906:     alpha9  = x[10*i+8];
1907:     alpha10 = x[10*i+9];
1908:     while (n-->0) {
1909:       y[10*(*idx)]   += alpha1*(*v);
1910:       y[10*(*idx)+1] += alpha2*(*v);
1911:       y[10*(*idx)+2] += alpha3*(*v);
1912:       y[10*(*idx)+3] += alpha4*(*v);
1913:       y[10*(*idx)+4] += alpha5*(*v);
1914:       y[10*(*idx)+5] += alpha6*(*v);
1915:       y[10*(*idx)+6] += alpha7*(*v);
1916:       y[10*(*idx)+7] += alpha8*(*v);
1917:       y[10*(*idx)+8] += alpha9*(*v);
1918:       y[10*(*idx)+9] += alpha10*(*v);
1919:       idx++; v++;
1920:     }
1921:   }
1922:   PetscLogFlops(20.0*a->nz);
1923:   VecRestoreArrayRead(xx,&x);
1924:   VecRestoreArray(zz,&y);
1925:   return(0);
1926: }


1929: /*--------------------------------------------------------------------------------------------*/
1932: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1933: {
1934:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1935:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1936:   const PetscScalar *x,*v;
1937:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1938:   PetscErrorCode    ierr;
1939:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1940:   PetscInt          nonzerorow=0,n,i,jrow,j;

1943:   VecGetArrayRead(xx,&x);
1944:   VecGetArray(yy,&y);
1945:   idx  = a->j;
1946:   v    = a->a;
1947:   ii   = a->i;

1949:   for (i=0; i<m; i++) {
1950:     jrow  = ii[i];
1951:     n     = ii[i+1] - jrow;
1952:     sum1  = 0.0;
1953:     sum2  = 0.0;
1954:     sum3  = 0.0;
1955:     sum4  = 0.0;
1956:     sum5  = 0.0;
1957:     sum6  = 0.0;
1958:     sum7  = 0.0;
1959:     sum8  = 0.0;
1960:     sum9  = 0.0;
1961:     sum10 = 0.0;
1962:     sum11 = 0.0;

1964:     nonzerorow += (n>0);
1965:     for (j=0; j<n; j++) {
1966:       sum1  += v[jrow]*x[11*idx[jrow]];
1967:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1968:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1969:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1970:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1971:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1972:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1973:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1974:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1975:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1976:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1977:       jrow++;
1978:     }
1979:     y[11*i]    = sum1;
1980:     y[11*i+1]  = sum2;
1981:     y[11*i+2]  = sum3;
1982:     y[11*i+3]  = sum4;
1983:     y[11*i+4]  = sum5;
1984:     y[11*i+5]  = sum6;
1985:     y[11*i+6]  = sum7;
1986:     y[11*i+7]  = sum8;
1987:     y[11*i+8]  = sum9;
1988:     y[11*i+9]  = sum10;
1989:     y[11*i+10] = sum11;
1990:   }

1992:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1993:   VecRestoreArrayRead(xx,&x);
1994:   VecRestoreArray(yy,&y);
1995:   return(0);
1996: }

2000: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2001: {
2002:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2003:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2004:   const PetscScalar *x,*v;
2005:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2006:   PetscErrorCode    ierr;
2007:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2008:   PetscInt          n,i,jrow,j;

2011:   if (yy != zz) {VecCopy(yy,zz);}
2012:   VecGetArrayRead(xx,&x);
2013:   VecGetArray(zz,&y);
2014:   idx  = a->j;
2015:   v    = a->a;
2016:   ii   = a->i;

2018:   for (i=0; i<m; i++) {
2019:     jrow  = ii[i];
2020:     n     = ii[i+1] - jrow;
2021:     sum1  = 0.0;
2022:     sum2  = 0.0;
2023:     sum3  = 0.0;
2024:     sum4  = 0.0;
2025:     sum5  = 0.0;
2026:     sum6  = 0.0;
2027:     sum7  = 0.0;
2028:     sum8  = 0.0;
2029:     sum9  = 0.0;
2030:     sum10 = 0.0;
2031:     sum11 = 0.0;
2032:     for (j=0; j<n; j++) {
2033:       sum1  += v[jrow]*x[11*idx[jrow]];
2034:       sum2  += v[jrow]*x[11*idx[jrow]+1];
2035:       sum3  += v[jrow]*x[11*idx[jrow]+2];
2036:       sum4  += v[jrow]*x[11*idx[jrow]+3];
2037:       sum5  += v[jrow]*x[11*idx[jrow]+4];
2038:       sum6  += v[jrow]*x[11*idx[jrow]+5];
2039:       sum7  += v[jrow]*x[11*idx[jrow]+6];
2040:       sum8  += v[jrow]*x[11*idx[jrow]+7];
2041:       sum9  += v[jrow]*x[11*idx[jrow]+8];
2042:       sum10 += v[jrow]*x[11*idx[jrow]+9];
2043:       sum11 += v[jrow]*x[11*idx[jrow]+10];
2044:       jrow++;
2045:     }
2046:     y[11*i]    += sum1;
2047:     y[11*i+1]  += sum2;
2048:     y[11*i+2]  += sum3;
2049:     y[11*i+3]  += sum4;
2050:     y[11*i+4]  += sum5;
2051:     y[11*i+5]  += sum6;
2052:     y[11*i+6]  += sum7;
2053:     y[11*i+7]  += sum8;
2054:     y[11*i+8]  += sum9;
2055:     y[11*i+9]  += sum10;
2056:     y[11*i+10] += sum11;
2057:   }

2059:   PetscLogFlops(22*a->nz);
2060:   VecRestoreArrayRead(xx,&x);
2061:   VecRestoreArray(yy,&y);
2062:   return(0);
2063: }

2067: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2068: {
2069:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2070:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2071:   const PetscScalar *x,*v;
2072:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2073:   PetscErrorCode    ierr;
2074:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2075:   PetscInt          n,i;

2078:   VecSet(yy,0.0);
2079:   VecGetArrayRead(xx,&x);
2080:   VecGetArray(yy,&y);

2082:   for (i=0; i<m; i++) {
2083:     idx     = a->j + a->i[i];
2084:     v       = a->a + a->i[i];
2085:     n       = a->i[i+1] - a->i[i];
2086:     alpha1  = x[11*i];
2087:     alpha2  = x[11*i+1];
2088:     alpha3  = x[11*i+2];
2089:     alpha4  = x[11*i+3];
2090:     alpha5  = x[11*i+4];
2091:     alpha6  = x[11*i+5];
2092:     alpha7  = x[11*i+6];
2093:     alpha8  = x[11*i+7];
2094:     alpha9  = x[11*i+8];
2095:     alpha10 = x[11*i+9];
2096:     alpha11 = x[11*i+10];
2097:     while (n-->0) {
2098:       y[11*(*idx)]    += alpha1*(*v);
2099:       y[11*(*idx)+1]  += alpha2*(*v);
2100:       y[11*(*idx)+2]  += alpha3*(*v);
2101:       y[11*(*idx)+3]  += alpha4*(*v);
2102:       y[11*(*idx)+4]  += alpha5*(*v);
2103:       y[11*(*idx)+5]  += alpha6*(*v);
2104:       y[11*(*idx)+6]  += alpha7*(*v);
2105:       y[11*(*idx)+7]  += alpha8*(*v);
2106:       y[11*(*idx)+8]  += alpha9*(*v);
2107:       y[11*(*idx)+9]  += alpha10*(*v);
2108:       y[11*(*idx)+10] += alpha11*(*v);
2109:       idx++; v++;
2110:     }
2111:   }
2112:   PetscLogFlops(22*a->nz);
2113:   VecRestoreArrayRead(xx,&x);
2114:   VecRestoreArray(yy,&y);
2115:   return(0);
2116: }

2120: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2121: {
2122:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2123:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2124:   const PetscScalar *x,*v;
2125:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2126:   PetscErrorCode    ierr;
2127:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2128:   PetscInt          n,i;

2131:   if (yy != zz) {VecCopy(yy,zz);}
2132:   VecGetArrayRead(xx,&x);
2133:   VecGetArray(zz,&y);
2134:   for (i=0; i<m; i++) {
2135:     idx     = a->j + a->i[i];
2136:     v       = a->a + a->i[i];
2137:     n       = a->i[i+1] - a->i[i];
2138:     alpha1  = x[11*i];
2139:     alpha2  = x[11*i+1];
2140:     alpha3  = x[11*i+2];
2141:     alpha4  = x[11*i+3];
2142:     alpha5  = x[11*i+4];
2143:     alpha6  = x[11*i+5];
2144:     alpha7  = x[11*i+6];
2145:     alpha8  = x[11*i+7];
2146:     alpha9  = x[11*i+8];
2147:     alpha10 = x[11*i+9];
2148:     alpha11 = x[11*i+10];
2149:     while (n-->0) {
2150:       y[11*(*idx)]    += alpha1*(*v);
2151:       y[11*(*idx)+1]  += alpha2*(*v);
2152:       y[11*(*idx)+2]  += alpha3*(*v);
2153:       y[11*(*idx)+3]  += alpha4*(*v);
2154:       y[11*(*idx)+4]  += alpha5*(*v);
2155:       y[11*(*idx)+5]  += alpha6*(*v);
2156:       y[11*(*idx)+6]  += alpha7*(*v);
2157:       y[11*(*idx)+7]  += alpha8*(*v);
2158:       y[11*(*idx)+8]  += alpha9*(*v);
2159:       y[11*(*idx)+9]  += alpha10*(*v);
2160:       y[11*(*idx)+10] += alpha11*(*v);
2161:       idx++; v++;
2162:     }
2163:   }
2164:   PetscLogFlops(22*a->nz);
2165:   VecRestoreArrayRead(xx,&x);
2166:   VecRestoreArray(zz,&y);
2167:   return(0);
2168: }


2171: /*--------------------------------------------------------------------------------------------*/
2174: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2175: {
2176:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2177:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2178:   const PetscScalar *x,*v;
2179:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2180:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2181:   PetscErrorCode    ierr;
2182:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2183:   PetscInt          nonzerorow=0,n,i,jrow,j;

2186:   VecGetArrayRead(xx,&x);
2187:   VecGetArray(yy,&y);
2188:   idx  = a->j;
2189:   v    = a->a;
2190:   ii   = a->i;

2192:   for (i=0; i<m; i++) {
2193:     jrow  = ii[i];
2194:     n     = ii[i+1] - jrow;
2195:     sum1  = 0.0;
2196:     sum2  = 0.0;
2197:     sum3  = 0.0;
2198:     sum4  = 0.0;
2199:     sum5  = 0.0;
2200:     sum6  = 0.0;
2201:     sum7  = 0.0;
2202:     sum8  = 0.0;
2203:     sum9  = 0.0;
2204:     sum10 = 0.0;
2205:     sum11 = 0.0;
2206:     sum12 = 0.0;
2207:     sum13 = 0.0;
2208:     sum14 = 0.0;
2209:     sum15 = 0.0;
2210:     sum16 = 0.0;

2212:     nonzerorow += (n>0);
2213:     for (j=0; j<n; j++) {
2214:       sum1  += v[jrow]*x[16*idx[jrow]];
2215:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2216:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2217:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2218:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2219:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2220:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2221:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2222:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2223:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2224:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2225:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2226:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2227:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2228:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2229:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2230:       jrow++;
2231:     }
2232:     y[16*i]    = sum1;
2233:     y[16*i+1]  = sum2;
2234:     y[16*i+2]  = sum3;
2235:     y[16*i+3]  = sum4;
2236:     y[16*i+4]  = sum5;
2237:     y[16*i+5]  = sum6;
2238:     y[16*i+6]  = sum7;
2239:     y[16*i+7]  = sum8;
2240:     y[16*i+8]  = sum9;
2241:     y[16*i+9]  = sum10;
2242:     y[16*i+10] = sum11;
2243:     y[16*i+11] = sum12;
2244:     y[16*i+12] = sum13;
2245:     y[16*i+13] = sum14;
2246:     y[16*i+14] = sum15;
2247:     y[16*i+15] = sum16;
2248:   }

2250:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2251:   VecRestoreArrayRead(xx,&x);
2252:   VecRestoreArray(yy,&y);
2253:   return(0);
2254: }

2258: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2259: {
2260:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2261:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2262:   const PetscScalar *x,*v;
2263:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2264:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2265:   PetscErrorCode    ierr;
2266:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2267:   PetscInt          n,i;

2270:   VecSet(yy,0.0);
2271:   VecGetArrayRead(xx,&x);
2272:   VecGetArray(yy,&y);

2274:   for (i=0; i<m; i++) {
2275:     idx     = a->j + a->i[i];
2276:     v       = a->a + a->i[i];
2277:     n       = a->i[i+1] - a->i[i];
2278:     alpha1  = x[16*i];
2279:     alpha2  = x[16*i+1];
2280:     alpha3  = x[16*i+2];
2281:     alpha4  = x[16*i+3];
2282:     alpha5  = x[16*i+4];
2283:     alpha6  = x[16*i+5];
2284:     alpha7  = x[16*i+6];
2285:     alpha8  = x[16*i+7];
2286:     alpha9  = x[16*i+8];
2287:     alpha10 = x[16*i+9];
2288:     alpha11 = x[16*i+10];
2289:     alpha12 = x[16*i+11];
2290:     alpha13 = x[16*i+12];
2291:     alpha14 = x[16*i+13];
2292:     alpha15 = x[16*i+14];
2293:     alpha16 = x[16*i+15];
2294:     while (n-->0) {
2295:       y[16*(*idx)]    += alpha1*(*v);
2296:       y[16*(*idx)+1]  += alpha2*(*v);
2297:       y[16*(*idx)+2]  += alpha3*(*v);
2298:       y[16*(*idx)+3]  += alpha4*(*v);
2299:       y[16*(*idx)+4]  += alpha5*(*v);
2300:       y[16*(*idx)+5]  += alpha6*(*v);
2301:       y[16*(*idx)+6]  += alpha7*(*v);
2302:       y[16*(*idx)+7]  += alpha8*(*v);
2303:       y[16*(*idx)+8]  += alpha9*(*v);
2304:       y[16*(*idx)+9]  += alpha10*(*v);
2305:       y[16*(*idx)+10] += alpha11*(*v);
2306:       y[16*(*idx)+11] += alpha12*(*v);
2307:       y[16*(*idx)+12] += alpha13*(*v);
2308:       y[16*(*idx)+13] += alpha14*(*v);
2309:       y[16*(*idx)+14] += alpha15*(*v);
2310:       y[16*(*idx)+15] += alpha16*(*v);
2311:       idx++; v++;
2312:     }
2313:   }
2314:   PetscLogFlops(32.0*a->nz);
2315:   VecRestoreArrayRead(xx,&x);
2316:   VecRestoreArray(yy,&y);
2317:   return(0);
2318: }

2322: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2323: {
2324:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2325:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2326:   const PetscScalar *x,*v;
2327:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2328:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2329:   PetscErrorCode    ierr;
2330:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2331:   PetscInt          n,i,jrow,j;

2334:   if (yy != zz) {VecCopy(yy,zz);}
2335:   VecGetArrayRead(xx,&x);
2336:   VecGetArray(zz,&y);
2337:   idx  = a->j;
2338:   v    = a->a;
2339:   ii   = a->i;

2341:   for (i=0; i<m; i++) {
2342:     jrow  = ii[i];
2343:     n     = ii[i+1] - jrow;
2344:     sum1  = 0.0;
2345:     sum2  = 0.0;
2346:     sum3  = 0.0;
2347:     sum4  = 0.0;
2348:     sum5  = 0.0;
2349:     sum6  = 0.0;
2350:     sum7  = 0.0;
2351:     sum8  = 0.0;
2352:     sum9  = 0.0;
2353:     sum10 = 0.0;
2354:     sum11 = 0.0;
2355:     sum12 = 0.0;
2356:     sum13 = 0.0;
2357:     sum14 = 0.0;
2358:     sum15 = 0.0;
2359:     sum16 = 0.0;
2360:     for (j=0; j<n; j++) {
2361:       sum1  += v[jrow]*x[16*idx[jrow]];
2362:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2363:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2364:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2365:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2366:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2367:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2368:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2369:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2370:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2371:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2372:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2373:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2374:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2375:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2376:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2377:       jrow++;
2378:     }
2379:     y[16*i]    += sum1;
2380:     y[16*i+1]  += sum2;
2381:     y[16*i+2]  += sum3;
2382:     y[16*i+3]  += sum4;
2383:     y[16*i+4]  += sum5;
2384:     y[16*i+5]  += sum6;
2385:     y[16*i+6]  += sum7;
2386:     y[16*i+7]  += sum8;
2387:     y[16*i+8]  += sum9;
2388:     y[16*i+9]  += sum10;
2389:     y[16*i+10] += sum11;
2390:     y[16*i+11] += sum12;
2391:     y[16*i+12] += sum13;
2392:     y[16*i+13] += sum14;
2393:     y[16*i+14] += sum15;
2394:     y[16*i+15] += sum16;
2395:   }

2397:   PetscLogFlops(32.0*a->nz);
2398:   VecRestoreArrayRead(xx,&x);
2399:   VecRestoreArray(zz,&y);
2400:   return(0);
2401: }

2405: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2406: {
2407:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2408:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2409:   const PetscScalar *x,*v;
2410:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2411:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2412:   PetscErrorCode    ierr;
2413:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2414:   PetscInt          n,i;

2417:   if (yy != zz) {VecCopy(yy,zz);}
2418:   VecGetArrayRead(xx,&x);
2419:   VecGetArray(zz,&y);
2420:   for (i=0; i<m; i++) {
2421:     idx     = a->j + a->i[i];
2422:     v       = a->a + a->i[i];
2423:     n       = a->i[i+1] - a->i[i];
2424:     alpha1  = x[16*i];
2425:     alpha2  = x[16*i+1];
2426:     alpha3  = x[16*i+2];
2427:     alpha4  = x[16*i+3];
2428:     alpha5  = x[16*i+4];
2429:     alpha6  = x[16*i+5];
2430:     alpha7  = x[16*i+6];
2431:     alpha8  = x[16*i+7];
2432:     alpha9  = x[16*i+8];
2433:     alpha10 = x[16*i+9];
2434:     alpha11 = x[16*i+10];
2435:     alpha12 = x[16*i+11];
2436:     alpha13 = x[16*i+12];
2437:     alpha14 = x[16*i+13];
2438:     alpha15 = x[16*i+14];
2439:     alpha16 = x[16*i+15];
2440:     while (n-->0) {
2441:       y[16*(*idx)]    += alpha1*(*v);
2442:       y[16*(*idx)+1]  += alpha2*(*v);
2443:       y[16*(*idx)+2]  += alpha3*(*v);
2444:       y[16*(*idx)+3]  += alpha4*(*v);
2445:       y[16*(*idx)+4]  += alpha5*(*v);
2446:       y[16*(*idx)+5]  += alpha6*(*v);
2447:       y[16*(*idx)+6]  += alpha7*(*v);
2448:       y[16*(*idx)+7]  += alpha8*(*v);
2449:       y[16*(*idx)+8]  += alpha9*(*v);
2450:       y[16*(*idx)+9]  += alpha10*(*v);
2451:       y[16*(*idx)+10] += alpha11*(*v);
2452:       y[16*(*idx)+11] += alpha12*(*v);
2453:       y[16*(*idx)+12] += alpha13*(*v);
2454:       y[16*(*idx)+13] += alpha14*(*v);
2455:       y[16*(*idx)+14] += alpha15*(*v);
2456:       y[16*(*idx)+15] += alpha16*(*v);
2457:       idx++; v++;
2458:     }
2459:   }
2460:   PetscLogFlops(32.0*a->nz);
2461:   VecRestoreArrayRead(xx,&x);
2462:   VecRestoreArray(zz,&y);
2463:   return(0);
2464: }

2466: /*--------------------------------------------------------------------------------------------*/
2469: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2470: {
2471:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2472:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2473:   const PetscScalar *x,*v;
2474:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2475:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2476:   PetscErrorCode    ierr;
2477:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2478:   PetscInt          nonzerorow=0,n,i,jrow,j;

2481:   VecGetArrayRead(xx,&x);
2482:   VecGetArray(yy,&y);
2483:   idx  = a->j;
2484:   v    = a->a;
2485:   ii   = a->i;

2487:   for (i=0; i<m; i++) {
2488:     jrow  = ii[i];
2489:     n     = ii[i+1] - jrow;
2490:     sum1  = 0.0;
2491:     sum2  = 0.0;
2492:     sum3  = 0.0;
2493:     sum4  = 0.0;
2494:     sum5  = 0.0;
2495:     sum6  = 0.0;
2496:     sum7  = 0.0;
2497:     sum8  = 0.0;
2498:     sum9  = 0.0;
2499:     sum10 = 0.0;
2500:     sum11 = 0.0;
2501:     sum12 = 0.0;
2502:     sum13 = 0.0;
2503:     sum14 = 0.0;
2504:     sum15 = 0.0;
2505:     sum16 = 0.0;
2506:     sum17 = 0.0;
2507:     sum18 = 0.0;

2509:     nonzerorow += (n>0);
2510:     for (j=0; j<n; j++) {
2511:       sum1  += v[jrow]*x[18*idx[jrow]];
2512:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2513:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2514:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2515:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2516:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2517:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2518:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2519:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2520:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2521:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2522:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2523:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2524:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2525:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2526:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2527:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2528:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2529:       jrow++;
2530:     }
2531:     y[18*i]    = sum1;
2532:     y[18*i+1]  = sum2;
2533:     y[18*i+2]  = sum3;
2534:     y[18*i+3]  = sum4;
2535:     y[18*i+4]  = sum5;
2536:     y[18*i+5]  = sum6;
2537:     y[18*i+6]  = sum7;
2538:     y[18*i+7]  = sum8;
2539:     y[18*i+8]  = sum9;
2540:     y[18*i+9]  = sum10;
2541:     y[18*i+10] = sum11;
2542:     y[18*i+11] = sum12;
2543:     y[18*i+12] = sum13;
2544:     y[18*i+13] = sum14;
2545:     y[18*i+14] = sum15;
2546:     y[18*i+15] = sum16;
2547:     y[18*i+16] = sum17;
2548:     y[18*i+17] = sum18;
2549:   }

2551:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2552:   VecRestoreArrayRead(xx,&x);
2553:   VecRestoreArray(yy,&y);
2554:   return(0);
2555: }

2559: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2560: {
2561:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2562:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2563:   const PetscScalar *x,*v;
2564:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2565:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2566:   PetscErrorCode    ierr;
2567:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2568:   PetscInt          n,i;

2571:   VecSet(yy,0.0);
2572:   VecGetArrayRead(xx,&x);
2573:   VecGetArray(yy,&y);

2575:   for (i=0; i<m; i++) {
2576:     idx     = a->j + a->i[i];
2577:     v       = a->a + a->i[i];
2578:     n       = a->i[i+1] - a->i[i];
2579:     alpha1  = x[18*i];
2580:     alpha2  = x[18*i+1];
2581:     alpha3  = x[18*i+2];
2582:     alpha4  = x[18*i+3];
2583:     alpha5  = x[18*i+4];
2584:     alpha6  = x[18*i+5];
2585:     alpha7  = x[18*i+6];
2586:     alpha8  = x[18*i+7];
2587:     alpha9  = x[18*i+8];
2588:     alpha10 = x[18*i+9];
2589:     alpha11 = x[18*i+10];
2590:     alpha12 = x[18*i+11];
2591:     alpha13 = x[18*i+12];
2592:     alpha14 = x[18*i+13];
2593:     alpha15 = x[18*i+14];
2594:     alpha16 = x[18*i+15];
2595:     alpha17 = x[18*i+16];
2596:     alpha18 = x[18*i+17];
2597:     while (n-->0) {
2598:       y[18*(*idx)]    += alpha1*(*v);
2599:       y[18*(*idx)+1]  += alpha2*(*v);
2600:       y[18*(*idx)+2]  += alpha3*(*v);
2601:       y[18*(*idx)+3]  += alpha4*(*v);
2602:       y[18*(*idx)+4]  += alpha5*(*v);
2603:       y[18*(*idx)+5]  += alpha6*(*v);
2604:       y[18*(*idx)+6]  += alpha7*(*v);
2605:       y[18*(*idx)+7]  += alpha8*(*v);
2606:       y[18*(*idx)+8]  += alpha9*(*v);
2607:       y[18*(*idx)+9]  += alpha10*(*v);
2608:       y[18*(*idx)+10] += alpha11*(*v);
2609:       y[18*(*idx)+11] += alpha12*(*v);
2610:       y[18*(*idx)+12] += alpha13*(*v);
2611:       y[18*(*idx)+13] += alpha14*(*v);
2612:       y[18*(*idx)+14] += alpha15*(*v);
2613:       y[18*(*idx)+15] += alpha16*(*v);
2614:       y[18*(*idx)+16] += alpha17*(*v);
2615:       y[18*(*idx)+17] += alpha18*(*v);
2616:       idx++; v++;
2617:     }
2618:   }
2619:   PetscLogFlops(36.0*a->nz);
2620:   VecRestoreArrayRead(xx,&x);
2621:   VecRestoreArray(yy,&y);
2622:   return(0);
2623: }

2627: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2628: {
2629:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2630:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2631:   const PetscScalar *x,*v;
2632:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2633:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2634:   PetscErrorCode    ierr;
2635:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2636:   PetscInt          n,i,jrow,j;

2639:   if (yy != zz) {VecCopy(yy,zz);}
2640:   VecGetArrayRead(xx,&x);
2641:   VecGetArray(zz,&y);
2642:   idx  = a->j;
2643:   v    = a->a;
2644:   ii   = a->i;

2646:   for (i=0; i<m; i++) {
2647:     jrow  = ii[i];
2648:     n     = ii[i+1] - jrow;
2649:     sum1  = 0.0;
2650:     sum2  = 0.0;
2651:     sum3  = 0.0;
2652:     sum4  = 0.0;
2653:     sum5  = 0.0;
2654:     sum6  = 0.0;
2655:     sum7  = 0.0;
2656:     sum8  = 0.0;
2657:     sum9  = 0.0;
2658:     sum10 = 0.0;
2659:     sum11 = 0.0;
2660:     sum12 = 0.0;
2661:     sum13 = 0.0;
2662:     sum14 = 0.0;
2663:     sum15 = 0.0;
2664:     sum16 = 0.0;
2665:     sum17 = 0.0;
2666:     sum18 = 0.0;
2667:     for (j=0; j<n; j++) {
2668:       sum1  += v[jrow]*x[18*idx[jrow]];
2669:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2670:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2671:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2672:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2673:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2674:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2675:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2676:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2677:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2678:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2679:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2680:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2681:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2682:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2683:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2684:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2685:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2686:       jrow++;
2687:     }
2688:     y[18*i]    += sum1;
2689:     y[18*i+1]  += sum2;
2690:     y[18*i+2]  += sum3;
2691:     y[18*i+3]  += sum4;
2692:     y[18*i+4]  += sum5;
2693:     y[18*i+5]  += sum6;
2694:     y[18*i+6]  += sum7;
2695:     y[18*i+7]  += sum8;
2696:     y[18*i+8]  += sum9;
2697:     y[18*i+9]  += sum10;
2698:     y[18*i+10] += sum11;
2699:     y[18*i+11] += sum12;
2700:     y[18*i+12] += sum13;
2701:     y[18*i+13] += sum14;
2702:     y[18*i+14] += sum15;
2703:     y[18*i+15] += sum16;
2704:     y[18*i+16] += sum17;
2705:     y[18*i+17] += sum18;
2706:   }

2708:   PetscLogFlops(36.0*a->nz);
2709:   VecRestoreArrayRead(xx,&x);
2710:   VecRestoreArray(zz,&y);
2711:   return(0);
2712: }

2716: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2717: {
2718:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2719:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2720:   const PetscScalar *x,*v;
2721:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2722:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2723:   PetscErrorCode    ierr;
2724:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2725:   PetscInt          n,i;

2728:   if (yy != zz) {VecCopy(yy,zz);}
2729:   VecGetArrayRead(xx,&x);
2730:   VecGetArray(zz,&y);
2731:   for (i=0; i<m; i++) {
2732:     idx     = a->j + a->i[i];
2733:     v       = a->a + a->i[i];
2734:     n       = a->i[i+1] - a->i[i];
2735:     alpha1  = x[18*i];
2736:     alpha2  = x[18*i+1];
2737:     alpha3  = x[18*i+2];
2738:     alpha4  = x[18*i+3];
2739:     alpha5  = x[18*i+4];
2740:     alpha6  = x[18*i+5];
2741:     alpha7  = x[18*i+6];
2742:     alpha8  = x[18*i+7];
2743:     alpha9  = x[18*i+8];
2744:     alpha10 = x[18*i+9];
2745:     alpha11 = x[18*i+10];
2746:     alpha12 = x[18*i+11];
2747:     alpha13 = x[18*i+12];
2748:     alpha14 = x[18*i+13];
2749:     alpha15 = x[18*i+14];
2750:     alpha16 = x[18*i+15];
2751:     alpha17 = x[18*i+16];
2752:     alpha18 = x[18*i+17];
2753:     while (n-->0) {
2754:       y[18*(*idx)]    += alpha1*(*v);
2755:       y[18*(*idx)+1]  += alpha2*(*v);
2756:       y[18*(*idx)+2]  += alpha3*(*v);
2757:       y[18*(*idx)+3]  += alpha4*(*v);
2758:       y[18*(*idx)+4]  += alpha5*(*v);
2759:       y[18*(*idx)+5]  += alpha6*(*v);
2760:       y[18*(*idx)+6]  += alpha7*(*v);
2761:       y[18*(*idx)+7]  += alpha8*(*v);
2762:       y[18*(*idx)+8]  += alpha9*(*v);
2763:       y[18*(*idx)+9]  += alpha10*(*v);
2764:       y[18*(*idx)+10] += alpha11*(*v);
2765:       y[18*(*idx)+11] += alpha12*(*v);
2766:       y[18*(*idx)+12] += alpha13*(*v);
2767:       y[18*(*idx)+13] += alpha14*(*v);
2768:       y[18*(*idx)+14] += alpha15*(*v);
2769:       y[18*(*idx)+15] += alpha16*(*v);
2770:       y[18*(*idx)+16] += alpha17*(*v);
2771:       y[18*(*idx)+17] += alpha18*(*v);
2772:       idx++; v++;
2773:     }
2774:   }
2775:   PetscLogFlops(36.0*a->nz);
2776:   VecRestoreArrayRead(xx,&x);
2777:   VecRestoreArray(zz,&y);
2778:   return(0);
2779: }

2783: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2784: {
2785:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2786:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2787:   const PetscScalar *x,*v;
2788:   PetscScalar       *y,*sums;
2789:   PetscErrorCode    ierr;
2790:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2791:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2794:   VecGetArrayRead(xx,&x);
2795:   VecSet(yy,0.0);
2796:   VecGetArray(yy,&y);
2797:   idx  = a->j;
2798:   v    = a->a;
2799:   ii   = a->i;

2801:   for (i=0; i<m; i++) {
2802:     jrow = ii[i];
2803:     n    = ii[i+1] - jrow;
2804:     sums = y + dof*i;
2805:     for (j=0; j<n; j++) {
2806:       for (k=0; k<dof; k++) {
2807:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2808:       }
2809:       jrow++;
2810:     }
2811:   }

2813:   PetscLogFlops(2.0*dof*a->nz);
2814:   VecRestoreArrayRead(xx,&x);
2815:   VecRestoreArray(yy,&y);
2816:   return(0);
2817: }

2821: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2822: {
2823:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2824:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2825:   const PetscScalar *x,*v;
2826:   PetscScalar       *y,*sums;
2827:   PetscErrorCode    ierr;
2828:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2829:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2832:   if (yy != zz) {VecCopy(yy,zz);}
2833:   VecGetArrayRead(xx,&x);
2834:   VecGetArray(zz,&y);
2835:   idx  = a->j;
2836:   v    = a->a;
2837:   ii   = a->i;

2839:   for (i=0; i<m; i++) {
2840:     jrow = ii[i];
2841:     n    = ii[i+1] - jrow;
2842:     sums = y + dof*i;
2843:     for (j=0; j<n; j++) {
2844:       for (k=0; k<dof; k++) {
2845:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2846:       }
2847:       jrow++;
2848:     }
2849:   }

2851:   PetscLogFlops(2.0*dof*a->nz);
2852:   VecRestoreArrayRead(xx,&x);
2853:   VecRestoreArray(zz,&y);
2854:   return(0);
2855: }

2859: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2860: {
2861:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2862:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2863:   const PetscScalar *x,*v,*alpha;
2864:   PetscScalar       *y;
2865:   PetscErrorCode    ierr;
2866:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2867:   PetscInt          n,i,k;

2870:   VecGetArrayRead(xx,&x);
2871:   VecSet(yy,0.0);
2872:   VecGetArray(yy,&y);
2873:   for (i=0; i<m; i++) {
2874:     idx   = a->j + a->i[i];
2875:     v     = a->a + a->i[i];
2876:     n     = a->i[i+1] - a->i[i];
2877:     alpha = x + dof*i;
2878:     while (n-->0) {
2879:       for (k=0; k<dof; k++) {
2880:         y[dof*(*idx)+k] += alpha[k]*(*v);
2881:       }
2882:       idx++; v++;
2883:     }
2884:   }
2885:   PetscLogFlops(2.0*dof*a->nz);
2886:   VecRestoreArrayRead(xx,&x);
2887:   VecRestoreArray(yy,&y);
2888:   return(0);
2889: }

2893: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2894: {
2895:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2896:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2897:   const PetscScalar *x,*v,*alpha;
2898:   PetscScalar       *y;
2899:   PetscErrorCode    ierr;
2900:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2901:   PetscInt          n,i,k;

2904:   if (yy != zz) {VecCopy(yy,zz);}
2905:   VecGetArrayRead(xx,&x);
2906:   VecGetArray(zz,&y);
2907:   for (i=0; i<m; i++) {
2908:     idx   = a->j + a->i[i];
2909:     v     = a->a + a->i[i];
2910:     n     = a->i[i+1] - a->i[i];
2911:     alpha = x + dof*i;
2912:     while (n-->0) {
2913:       for (k=0; k<dof; k++) {
2914:         y[dof*(*idx)+k] += alpha[k]*(*v);
2915:       }
2916:       idx++; v++;
2917:     }
2918:   }
2919:   PetscLogFlops(2.0*dof*a->nz);
2920:   VecRestoreArrayRead(xx,&x);
2921:   VecRestoreArray(zz,&y);
2922:   return(0);
2923: }

2925: /*===================================================================================*/
2928: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2929: {
2930:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2934:   /* start the scatter */
2935:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2936:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2937:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2938:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2939:   return(0);
2940: }

2944: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2945: {
2946:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2950:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2951:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2952:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2953:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2954:   return(0);
2955: }

2959: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2960: {
2961:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2965:   /* start the scatter */
2966:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2967:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2968:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2969:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2970:   return(0);
2971: }

2975: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2976: {
2977:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2981:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2982:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2983:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2984:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2985:   return(0);
2986: }

2988: /* ----------------------------------------------------------------*/
2991: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2992: {
2993:   PetscErrorCode     ierr;
2994:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2995:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2996:   Mat                P         =pp->AIJ;
2997:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2998:   PetscInt           *pti,*ptj,*ptJ;
2999:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3000:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3001:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3002:   MatScalar          *ca;
3003:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

3006:   /* Get ij structure of P^T */
3007:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

3009:   cn = pn*ppdof;
3010:   /* Allocate ci array, arrays for fill computation and */
3011:   /* free space for accumulating nonzero column info */
3012:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
3013:   ci[0] = 0;

3015:   /* Work arrays for rows of P^T*A */
3016:   PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);
3017:   PetscMemzero(ptadenserow,an*sizeof(PetscInt));
3018:   PetscMemzero(denserow,cn*sizeof(PetscInt));

3020:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3021:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3022:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3023:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3024:   current_space = free_space;

3026:   /* Determine symbolic info for each row of C: */
3027:   for (i=0; i<pn; i++) {
3028:     ptnzi = pti[i+1] - pti[i];
3029:     ptJ   = ptj + pti[i];
3030:     for (dof=0; dof<ppdof; dof++) {
3031:       ptanzi = 0;
3032:       /* Determine symbolic row of PtA: */
3033:       for (j=0; j<ptnzi; j++) {
3034:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3035:         arow = ptJ[j]*ppdof + dof;
3036:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3037:         anzj = ai[arow+1] - ai[arow];
3038:         ajj  = aj + ai[arow];
3039:         for (k=0; k<anzj; k++) {
3040:           if (!ptadenserow[ajj[k]]) {
3041:             ptadenserow[ajj[k]]    = -1;
3042:             ptasparserow[ptanzi++] = ajj[k];
3043:           }
3044:         }
3045:       }
3046:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3047:       ptaj = ptasparserow;
3048:       cnzi = 0;
3049:       for (j=0; j<ptanzi; j++) {
3050:         /* Get offset within block of P */
3051:         pshift = *ptaj%ppdof;
3052:         /* Get block row of P */
3053:         prow = (*ptaj++)/ppdof; /* integer division */
3054:         /* P has same number of nonzeros per row as the compressed form */
3055:         pnzj = pi[prow+1] - pi[prow];
3056:         pjj  = pj + pi[prow];
3057:         for (k=0;k<pnzj;k++) {
3058:           /* Locations in C are shifted by the offset within the block */
3059:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3060:           if (!denserow[pjj[k]*ppdof+pshift]) {
3061:             denserow[pjj[k]*ppdof+pshift] = -1;
3062:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3063:           }
3064:         }
3065:       }

3067:       /* sort sparserow */
3068:       PetscSortInt(cnzi,sparserow);

3070:       /* If free space is not available, make more free space */
3071:       /* Double the amount of total space in the list */
3072:       if (current_space->local_remaining<cnzi) {
3073:         PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
3074:       }

3076:       /* Copy data into free space, and zero out denserows */
3077:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));

3079:       current_space->array           += cnzi;
3080:       current_space->local_used      += cnzi;
3081:       current_space->local_remaining -= cnzi;

3083:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3084:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

3086:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3087:       /*        For now, we will recompute what is needed. */
3088:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3089:     }
3090:   }
3091:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3092:   /* Allocate space for cj, initialize cj, and */
3093:   /* destroy list of free space and other temporary array(s) */
3094:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3095:   PetscFreeSpaceContiguous(&free_space,cj);
3096:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

3098:   /* Allocate space for ca */
3099:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3100:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));

3102:   /* put together the new matrix */
3103:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);

3105:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3106:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3107:   c          = (Mat_SeqAIJ*)((*C)->data);
3108:   c->free_a  = PETSC_TRUE;
3109:   c->free_ij = PETSC_TRUE;
3110:   c->nonew   = 0;

3112:   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;

3114:   /* Clean up. */
3115:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3116:   return(0);
3117: }

3121: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3122: {
3123:   /* This routine requires testing -- first draft only */
3124:   PetscErrorCode  ierr;
3125:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3126:   Mat             P  =pp->AIJ;
3127:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3128:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3129:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3130:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3131:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3132:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3133:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3134:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3135:   MatScalar       *ca=c->a,*caj,*apa;

3138:   /* Allocate temporary array for storage of one row of A*P */
3139:   PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3140:   PetscMemzero(apa,cn*sizeof(MatScalar));
3141:   PetscMemzero(apj,cn*sizeof(PetscInt));
3142:   PetscMemzero(apjdense,cn*sizeof(PetscInt));

3144:   /* Clear old values in C */
3145:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

3147:   for (i=0; i<am; i++) {
3148:     /* Form sparse row of A*P */
3149:     anzi  = ai[i+1] - ai[i];
3150:     apnzj = 0;
3151:     for (j=0; j<anzi; j++) {
3152:       /* Get offset within block of P */
3153:       pshift = *aj%ppdof;
3154:       /* Get block row of P */
3155:       prow = *aj++/ppdof;   /* integer division */
3156:       pnzj = pi[prow+1] - pi[prow];
3157:       pjj  = pj + pi[prow];
3158:       paj  = pa + pi[prow];
3159:       for (k=0; k<pnzj; k++) {
3160:         poffset = pjj[k]*ppdof+pshift;
3161:         if (!apjdense[poffset]) {
3162:           apjdense[poffset] = -1;
3163:           apj[apnzj++]      = poffset;
3164:         }
3165:         apa[poffset] += (*aa)*paj[k];
3166:       }
3167:       PetscLogFlops(2.0*pnzj);
3168:       aa++;
3169:     }

3171:     /* Sort the j index array for quick sparse axpy. */
3172:     /* Note: a array does not need sorting as it is in dense storage locations. */
3173:     PetscSortInt(apnzj,apj);

3175:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3176:     prow    = i/ppdof; /* integer division */
3177:     pshift  = i%ppdof;
3178:     poffset = pi[prow];
3179:     pnzi    = pi[prow+1] - poffset;
3180:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3181:     pJ = pj+poffset;
3182:     pA = pa+poffset;
3183:     for (j=0; j<pnzi; j++) {
3184:       crow = (*pJ)*ppdof+pshift;
3185:       cjj  = cj + ci[crow];
3186:       caj  = ca + ci[crow];
3187:       pJ++;
3188:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3189:       for (k=0,nextap=0; nextap<apnzj; k++) {
3190:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3191:       }
3192:       PetscLogFlops(2.0*apnzj);
3193:       pA++;
3194:     }

3196:     /* Zero the current row info for A*P */
3197:     for (j=0; j<apnzj; j++) {
3198:       apa[apj[j]]      = 0.;
3199:       apjdense[apj[j]] = 0;
3200:     }
3201:   }

3203:   /* Assemble the final matrix and clean up */
3204:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3205:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3206:   PetscFree3(apa,apj,apjdense);
3207:   return(0);
3208: }

3212: PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3213: {

3217:   if (scall == MAT_INITIAL_MATRIX) {
3218:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3219:     MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3220:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3221:   }
3222:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3223:   MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3224:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3225:   return(0);
3226: }

3230: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3231: {

3235:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3236:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3237:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3238:   return(0);
3239: }

3243: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3244: {
3246:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3247:   return(0);
3248: }

3252: PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3253: {

3257:   if (scall == MAT_INITIAL_MATRIX) {
3258:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3259:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);
3260:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3261:   }
3262:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3263:   ((*C)->ops->ptapnumeric)(A,P,*C);
3264:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3265:   return(0);
3266: }

3270: PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3271: {
3272:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3273:   Mat            a    = b->AIJ,B;
3274:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3276:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3277:   PetscInt       *cols;
3278:   PetscScalar    *vals;

3281:   MatGetSize(a,&m,&n);
3282:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3283:   for (i=0; i<m; i++) {
3284:     nmax = PetscMax(nmax,aij->ilen[i]);
3285:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3286:   }
3287:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3288:   PetscFree(ilen);
3289:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
3290:   ii   = 0;
3291:   for (i=0; i<m; i++) {
3292:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3293:     for (j=0; j<dof; j++) {
3294:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3295:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3296:       ii++;
3297:     }
3298:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3299:   }
3300:   PetscFree(icols);
3301:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3302:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3304:   if (reuse == MAT_REUSE_MATRIX) {
3305:     MatHeaderReplace(A,B);
3306:   } else {
3307:     *newmat = B;
3308:   }
3309:   return(0);
3310: }

3312: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3316: PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3317: {
3318:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3319:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3320:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3321:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3322:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3323:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3324:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3325:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3326:   PetscInt       rstart,cstart,*garray,ii,k;
3328:   PetscScalar    *vals,*ovals;

3331:   PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3332:   for (i=0; i<A->rmap->n/dof; i++) {
3333:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3334:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3335:     for (j=0; j<dof; j++) {
3336:       dnz[dof*i+j] = AIJ->ilen[i];
3337:       onz[dof*i+j] = OAIJ->ilen[i];
3338:     }
3339:   }
3340:   MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3341:   PetscFree2(dnz,onz);

3343:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3344:   rstart = dof*maij->A->rmap->rstart;
3345:   cstart = dof*maij->A->cmap->rstart;
3346:   garray = mpiaij->garray;

3348:   ii = rstart;
3349:   for (i=0; i<A->rmap->n/dof; i++) {
3350:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3351:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3352:     for (j=0; j<dof; j++) {
3353:       for (k=0; k<ncols; k++) {
3354:         icols[k] = cstart + dof*cols[k]+j;
3355:       }
3356:       for (k=0; k<oncols; k++) {
3357:         oicols[k] = dof*garray[ocols[k]]+j;
3358:       }
3359:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3360:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3361:       ii++;
3362:     }
3363:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3364:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3365:   }
3366:   PetscFree2(icols,oicols);

3368:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3369:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3371:   if (reuse == MAT_REUSE_MATRIX) {
3372:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3373:     ((PetscObject)A)->refct = 1;

3375:     MatHeaderReplace(A,B);

3377:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3378:   } else {
3379:     *newmat = B;
3380:   }
3381:   return(0);
3382: }

3386: PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3387: {
3389:   Mat            A;

3392:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3393:   MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3394:   MatDestroy(&A);
3395:   return(0);
3396: }

3398: /* ---------------------------------------------------------------------------------- */
3401: /*@C
3402:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3403:   operations for multicomponent problems.  It interpolates each component the same
3404:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3405:   and MATMPIAIJ for distributed matrices.

3407:   Collective

3409:   Input Parameters:
3410: + A - the AIJ matrix describing the action on blocks
3411: - dof - the block size (number of components per node)

3413:   Output Parameter:
3414: . maij - the new MAIJ matrix

3416:   Operations provided:
3417: + MatMult
3418: . MatMultTranspose
3419: . MatMultAdd
3420: . MatMultTransposeAdd
3421: - MatView

3423:   Level: advanced

3425: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3426: @*/
3427: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3428: {
3430:   PetscMPIInt    size;
3431:   PetscInt       n;
3432:   Mat            B;

3435:   PetscObjectReference((PetscObject)A);

3437:   if (dof == 1) *maij = A;
3438:   else {
3439:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3440:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3441:     PetscLayoutSetBlockSize(B->rmap,dof);
3442:     PetscLayoutSetBlockSize(B->cmap,dof);
3443:     PetscLayoutSetUp(B->rmap);
3444:     PetscLayoutSetUp(B->cmap);

3446:     B->assembled = PETSC_TRUE;

3448:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3449:     if (size == 1) {
3450:       Mat_SeqMAIJ *b;

3452:       MatSetType(B,MATSEQMAIJ);

3454:       B->ops->setup   = NULL;
3455:       B->ops->destroy = MatDestroy_SeqMAIJ;
3456:       B->ops->view    = MatView_SeqMAIJ;
3457:       b               = (Mat_SeqMAIJ*)B->data;
3458:       b->dof          = dof;
3459:       b->AIJ          = A;

3461:       if (dof == 2) {
3462:         B->ops->mult             = MatMult_SeqMAIJ_2;
3463:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3464:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3465:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3466:       } else if (dof == 3) {
3467:         B->ops->mult             = MatMult_SeqMAIJ_3;
3468:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3469:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3470:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3471:       } else if (dof == 4) {
3472:         B->ops->mult             = MatMult_SeqMAIJ_4;
3473:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3474:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3475:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3476:       } else if (dof == 5) {
3477:         B->ops->mult             = MatMult_SeqMAIJ_5;
3478:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3479:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3480:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3481:       } else if (dof == 6) {
3482:         B->ops->mult             = MatMult_SeqMAIJ_6;
3483:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3484:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3485:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3486:       } else if (dof == 7) {
3487:         B->ops->mult             = MatMult_SeqMAIJ_7;
3488:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3489:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3490:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3491:       } else if (dof == 8) {
3492:         B->ops->mult             = MatMult_SeqMAIJ_8;
3493:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3494:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3495:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3496:       } else if (dof == 9) {
3497:         B->ops->mult             = MatMult_SeqMAIJ_9;
3498:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3499:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3500:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3501:       } else if (dof == 10) {
3502:         B->ops->mult             = MatMult_SeqMAIJ_10;
3503:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3504:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3505:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3506:       } else if (dof == 11) {
3507:         B->ops->mult             = MatMult_SeqMAIJ_11;
3508:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3509:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3510:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3511:       } else if (dof == 16) {
3512:         B->ops->mult             = MatMult_SeqMAIJ_16;
3513:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3514:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3515:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3516:       } else if (dof == 18) {
3517:         B->ops->mult             = MatMult_SeqMAIJ_18;
3518:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3519:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3520:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3521:       } else {
3522:         B->ops->mult             = MatMult_SeqMAIJ_N;
3523:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3524:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3525:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3526:       }
3527:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3528:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3529:     } else {
3530:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3531:       Mat_MPIMAIJ *b;
3532:       IS          from,to;
3533:       Vec         gvec;

3535:       MatSetType(B,MATMPIMAIJ);

3537:       B->ops->setup   = NULL;
3538:       B->ops->destroy = MatDestroy_MPIMAIJ;
3539:       B->ops->view    = MatView_MPIMAIJ;

3541:       b      = (Mat_MPIMAIJ*)B->data;
3542:       b->dof = dof;
3543:       b->A   = A;

3545:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3546:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3548:       VecGetSize(mpiaij->lvec,&n);
3549:       VecCreate(PETSC_COMM_SELF,&b->w);
3550:       VecSetSizes(b->w,n*dof,n*dof);
3551:       VecSetBlockSize(b->w,dof);
3552:       VecSetType(b->w,VECSEQ);

3554:       /* create two temporary Index sets for build scatter gather */
3555:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3556:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3558:       /* create temporary global vector to generate scatter context */
3559:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3561:       /* generate the scatter context */
3562:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3564:       ISDestroy(&from);
3565:       ISDestroy(&to);
3566:       VecDestroy(&gvec);

3568:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3569:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3570:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3571:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3573:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3574:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3575:     }
3576:     B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3577:     MatSetUp(B);
3578:     *maij = B;
3579:     MatViewFromOptions(B,NULL,"-mat_view");
3580:   }
3581:   return(0);
3582: }