Actual source code: maij.c

petsc-3.3-p7 2013-05-11
  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for 
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
 20: #include <../src/mat/utils/freespace.h>
 21: #include <petsc-private/vecimpl.h>

 25: /*@C
 26:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 28:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 30:    Input Parameter:
 31: .  A - the MAIJ matrix

 33:    Output Parameter:
 34: .  B - the AIJ matrix

 36:    Level: advanced

 38:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 40: .seealso: MatCreateMAIJ()
 41: @*/
 42: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 43: {
 45:   PetscBool      ismpimaij,isseqmaij;

 48:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 49:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 50:   if (ismpimaij) {
 51:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 53:     *B = b->A;
 54:   } else if (isseqmaij) {
 55:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 57:     *B = b->AIJ;
 58:   } else {
 59:     *B = A;
 60:   }
 61:   return(0);
 62: }

 66: /*@C
 67:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 69:    Logically Collective

 71:    Input Parameter:
 72: +  A - the MAIJ matrix
 73: -  dof - the block size for the new matrix

 75:    Output Parameter:
 76: .  B - the new MAIJ matrix

 78:    Level: advanced

 80: .seealso: MatCreateMAIJ()
 81: @*/
 82: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 83: {
 85:   Mat            Aij = PETSC_NULL;

 89:   MatMAIJGetAIJ(A,&Aij);
 90:   MatCreateMAIJ(Aij,dof,B);
 91:   return(0);
 92: }

 96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 97: {
 99:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

102:   MatDestroy(&b->AIJ);
103:   PetscFree(A->data);
104:   return(0);
105: }

109: PetscErrorCode MatSetUp_MAIJ(Mat A)
110: {
112:   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
113:   return(0);
114: }

118: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
119: {
121:   Mat            B;

124:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
125:   MatView(B,viewer);
126:   MatDestroy(&B);
127:   return(0);
128: }

132: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
133: {
135:   Mat            B;

138:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
139:   MatView(B,viewer);
140:   MatDestroy(&B);
141:   return(0);
142: }

146: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
147: {
149:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

152:   MatDestroy(&b->AIJ);
153:   MatDestroy(&b->OAIJ);
154:   MatDestroy(&b->A);
155:   VecScatterDestroy(&b->ctx);
156:   VecDestroy(&b->w);
157:   PetscFree(A->data);
158:   PetscObjectChangeTypeName((PetscObject)A,0);
159:   return(0);
160: }

162: /*MC
163:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
164:   multicomponent problems, interpolating or restricting each component the same way independently.
165:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

167:   Operations provided:
168: . MatMult
169: . MatMultTranspose
170: . MatMultAdd
171: . MatMultTransposeAdd

173:   Level: advanced

175: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
176: M*/

178: EXTERN_C_BEGIN
181: PetscErrorCode  MatCreate_MAIJ(Mat A)
182: {
184:   Mat_MPIMAIJ    *b;
185:   PetscMPIInt    size;

188:   PetscNewLog(A,Mat_MPIMAIJ,&b);
189:   A->data  = (void*)b;
190:   PetscMemzero(A->ops,sizeof(struct _MatOps));
191:   A->ops->setup = MatSetUp_MAIJ;

193:   b->AIJ  = 0;
194:   b->dof  = 0;
195:   b->OAIJ = 0;
196:   b->ctx  = 0;
197:   b->w    = 0;
198:   MPI_Comm_size(((PetscObject)A)->comm,&size);
199:   if (size == 1){
200:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
201:   } else {
202:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
203:   }
204:   return(0);
205: }
206: EXTERN_C_END

208: /* --------------------------------------------------------------------------------------*/
211: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
212: {
213:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
214:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
215:   const PetscScalar *x,*v;
216:   PetscScalar       *y, sum1, sum2;
217:   PetscErrorCode    ierr;
218:   PetscInt          nonzerorow=0,n,i,jrow,j;
219:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

222:   VecGetArrayRead(xx,&x);
223:   VecGetArray(yy,&y);
224:   idx  = a->j;
225:   v    = a->a;
226:   ii   = a->i;

228:   for (i=0; i<m; i++) {
229:     jrow = ii[i];
230:     n    = ii[i+1] - jrow;
231:     sum1  = 0.0;
232:     sum2  = 0.0;
233:     nonzerorow += (n>0);
234:     for (j=0; j<n; j++) {
235:       sum1 += v[jrow]*x[2*idx[jrow]];
236:       sum2 += v[jrow]*x[2*idx[jrow]+1];
237:       jrow++;
238:      }
239:     y[2*i]   = sum1;
240:     y[2*i+1] = sum2;
241:   }

243:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
244:   VecRestoreArrayRead(xx,&x);
245:   VecRestoreArray(yy,&y);
246:   return(0);
247: }

251: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
252: {
253:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
254:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
255:   const PetscScalar *x,*v;
256:   PetscScalar       *y,alpha1,alpha2;
257:   PetscErrorCode    ierr;
258:   PetscInt          n,i;
259:   const PetscInt    m = b->AIJ->rmap->n,*idx;

262:   VecSet(yy,0.0);
263:   VecGetArrayRead(xx,&x);
264:   VecGetArray(yy,&y);
265: 
266:   for (i=0; i<m; i++) {
267:     idx    = a->j + a->i[i] ;
268:     v      = a->a + a->i[i] ;
269:     n      = a->i[i+1] - a->i[i];
270:     alpha1 = x[2*i];
271:     alpha2 = x[2*i+1];
272:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
273:   }
274:   PetscLogFlops(4.0*a->nz);
275:   VecRestoreArrayRead(xx,&x);
276:   VecRestoreArray(yy,&y);
277:   return(0);
278: }

282: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
283: {
284:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
285:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
286:   const PetscScalar *x,*v;
287:   PetscScalar       *y,sum1, sum2;
288:   PetscErrorCode    ierr;
289:   PetscInt          n,i,jrow,j;
290:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

293:   if (yy != zz) {VecCopy(yy,zz);}
294:   VecGetArrayRead(xx,&x);
295:   VecGetArray(zz,&y);
296:   idx  = a->j;
297:   v    = a->a;
298:   ii   = a->i;

300:   for (i=0; i<m; i++) {
301:     jrow = ii[i];
302:     n    = ii[i+1] - jrow;
303:     sum1  = 0.0;
304:     sum2  = 0.0;
305:     for (j=0; j<n; j++) {
306:       sum1 += v[jrow]*x[2*idx[jrow]];
307:       sum2 += v[jrow]*x[2*idx[jrow]+1];
308:       jrow++;
309:      }
310:     y[2*i]   += sum1;
311:     y[2*i+1] += sum2;
312:   }

314:   PetscLogFlops(4.0*a->nz);
315:   VecRestoreArrayRead(xx,&x);
316:   VecRestoreArray(zz,&y);
317:   return(0);
318: }
321: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
322: {
323:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
324:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
325:   const PetscScalar *x,*v;
326:   PetscScalar       *y,alpha1,alpha2;
327:   PetscErrorCode    ierr;
328:   PetscInt          n,i;
329:   const PetscInt    m = b->AIJ->rmap->n,*idx;

332:   if (yy != zz) {VecCopy(yy,zz);}
333:   VecGetArrayRead(xx,&x);
334:   VecGetArray(zz,&y);
335: 
336:   for (i=0; i<m; i++) {
337:     idx   = a->j + a->i[i] ;
338:     v     = a->a + a->i[i] ;
339:     n     = a->i[i+1] - a->i[i];
340:     alpha1 = x[2*i];
341:     alpha2 = x[2*i+1];
342:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
343:   }
344:   PetscLogFlops(4.0*a->nz);
345:   VecRestoreArrayRead(xx,&x);
346:   VecRestoreArray(zz,&y);
347:   return(0);
348: }
349: /* --------------------------------------------------------------------------------------*/
352: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
353: {
354:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
355:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
356:   const PetscScalar *x,*v;
357:   PetscScalar       *y,sum1, sum2, sum3;
358:   PetscErrorCode    ierr;
359:   PetscInt          nonzerorow=0,n,i,jrow,j;
360:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

363:   VecGetArrayRead(xx,&x);
364:   VecGetArray(yy,&y);
365:   idx  = a->j;
366:   v    = a->a;
367:   ii   = a->i;

369:   for (i=0; i<m; i++) {
370:     jrow = ii[i];
371:     n    = ii[i+1] - jrow;
372:     sum1  = 0.0;
373:     sum2  = 0.0;
374:     sum3  = 0.0;
375:     nonzerorow += (n>0);
376:     for (j=0; j<n; j++) {
377:       sum1 += v[jrow]*x[3*idx[jrow]];
378:       sum2 += v[jrow]*x[3*idx[jrow]+1];
379:       sum3 += v[jrow]*x[3*idx[jrow]+2];
380:       jrow++;
381:      }
382:     y[3*i]   = sum1;
383:     y[3*i+1] = sum2;
384:     y[3*i+2] = sum3;
385:   }

387:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
388:   VecRestoreArrayRead(xx,&x);
389:   VecRestoreArray(yy,&y);
390:   return(0);
391: }

395: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
396: {
397:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
398:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
399:   const PetscScalar *x,*v;
400:   PetscScalar       *y,alpha1,alpha2,alpha3;
401:   PetscErrorCode    ierr;
402:   PetscInt          n,i;
403:   const PetscInt    m = b->AIJ->rmap->n,*idx;

406:   VecSet(yy,0.0);
407:   VecGetArrayRead(xx,&x);
408:   VecGetArray(yy,&y);
409: 
410:   for (i=0; i<m; i++) {
411:     idx    = a->j + a->i[i];
412:     v      = a->a + a->i[i];
413:     n      = a->i[i+1] - a->i[i];
414:     alpha1 = x[3*i];
415:     alpha2 = x[3*i+1];
416:     alpha3 = x[3*i+2];
417:     while (n-->0) {
418:       y[3*(*idx)]   += alpha1*(*v);
419:       y[3*(*idx)+1] += alpha2*(*v);
420:       y[3*(*idx)+2] += alpha3*(*v);
421:       idx++; v++;
422:     }
423:   }
424:   PetscLogFlops(6.0*a->nz);
425:   VecRestoreArrayRead(xx,&x);
426:   VecRestoreArray(yy,&y);
427:   return(0);
428: }

432: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433: {
434:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
435:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
436:   const PetscScalar *x,*v;
437:   PetscScalar       *y,sum1, sum2, sum3;
438:   PetscErrorCode    ierr;
439:   PetscInt          n,i,jrow,j;
440:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

443:   if (yy != zz) {VecCopy(yy,zz);}
444:   VecGetArrayRead(xx,&x);
445:   VecGetArray(zz,&y);
446:   idx  = a->j;
447:   v    = a->a;
448:   ii   = a->i;

450:   for (i=0; i<m; i++) {
451:     jrow = ii[i];
452:     n    = ii[i+1] - jrow;
453:     sum1  = 0.0;
454:     sum2  = 0.0;
455:     sum3  = 0.0;
456:     for (j=0; j<n; j++) {
457:       sum1 += v[jrow]*x[3*idx[jrow]];
458:       sum2 += v[jrow]*x[3*idx[jrow]+1];
459:       sum3 += v[jrow]*x[3*idx[jrow]+2];
460:       jrow++;
461:      }
462:     y[3*i]   += sum1;
463:     y[3*i+1] += sum2;
464:     y[3*i+2] += sum3;
465:   }

467:   PetscLogFlops(6.0*a->nz);
468:   VecRestoreArrayRead(xx,&x);
469:   VecRestoreArray(zz,&y);
470:   return(0);
471: }
474: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
475: {
476:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
477:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
478:   const PetscScalar *x,*v;
479:   PetscScalar       *y,alpha1,alpha2,alpha3;
480:   PetscErrorCode    ierr;
481:   PetscInt          n,i;
482:   const PetscInt    m = b->AIJ->rmap->n,*idx;

485:   if (yy != zz) {VecCopy(yy,zz);}
486:   VecGetArrayRead(xx,&x);
487:   VecGetArray(zz,&y);
488:   for (i=0; i<m; i++) {
489:     idx    = a->j + a->i[i] ;
490:     v      = a->a + a->i[i] ;
491:     n      = a->i[i+1] - a->i[i];
492:     alpha1 = x[3*i];
493:     alpha2 = x[3*i+1];
494:     alpha3 = x[3*i+2];
495:     while (n-->0) {
496:       y[3*(*idx)]   += alpha1*(*v);
497:       y[3*(*idx)+1] += alpha2*(*v);
498:       y[3*(*idx)+2] += alpha3*(*v);
499:       idx++; v++;
500:     }
501:   }
502:   PetscLogFlops(6.0*a->nz);
503:   VecRestoreArrayRead(xx,&x);
504:   VecRestoreArray(zz,&y);
505:   return(0);
506: }

508: /* ------------------------------------------------------------------------------*/
511: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512: {
513:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
514:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
515:   const PetscScalar *x,*v;
516:   PetscScalar       *y,sum1, sum2, sum3, sum4;
517:   PetscErrorCode    ierr;
518:   PetscInt          nonzerorow=0,n,i,jrow,j;
519:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

522:   VecGetArrayRead(xx,&x);
523:   VecGetArray(yy,&y);
524:   idx  = a->j;
525:   v    = a->a;
526:   ii   = a->i;

528:   for (i=0; i<m; i++) {
529:     jrow = ii[i];
530:     n    = ii[i+1] - jrow;
531:     sum1  = 0.0;
532:     sum2  = 0.0;
533:     sum3  = 0.0;
534:     sum4  = 0.0;
535:     nonzerorow += (n>0);
536:     for (j=0; j<n; j++) {
537:       sum1 += v[jrow]*x[4*idx[jrow]];
538:       sum2 += v[jrow]*x[4*idx[jrow]+1];
539:       sum3 += v[jrow]*x[4*idx[jrow]+2];
540:       sum4 += v[jrow]*x[4*idx[jrow]+3];
541:       jrow++;
542:      }
543:     y[4*i]   = sum1;
544:     y[4*i+1] = sum2;
545:     y[4*i+2] = sum3;
546:     y[4*i+3] = sum4;
547:   }

549:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
550:   VecRestoreArrayRead(xx,&x);
551:   VecRestoreArray(yy,&y);
552:   return(0);
553: }

557: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
558: {
559:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
560:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
561:   const PetscScalar *x,*v;
562:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
563:   PetscErrorCode    ierr;
564:   PetscInt          n,i;
565:   const PetscInt    m = b->AIJ->rmap->n,*idx;

568:   VecSet(yy,0.0);
569:   VecGetArrayRead(xx,&x);
570:   VecGetArray(yy,&y);
571:   for (i=0; i<m; i++) {
572:     idx    = a->j + a->i[i] ;
573:     v      = a->a + a->i[i] ;
574:     n      = a->i[i+1] - a->i[i];
575:     alpha1 = x[4*i];
576:     alpha2 = x[4*i+1];
577:     alpha3 = x[4*i+2];
578:     alpha4 = x[4*i+3];
579:     while (n-->0) {
580:       y[4*(*idx)]   += alpha1*(*v);
581:       y[4*(*idx)+1] += alpha2*(*v);
582:       y[4*(*idx)+2] += alpha3*(*v);
583:       y[4*(*idx)+3] += alpha4*(*v);
584:       idx++; v++;
585:     }
586:   }
587:   PetscLogFlops(8.0*a->nz);
588:   VecRestoreArrayRead(xx,&x);
589:   VecRestoreArray(yy,&y);
590:   return(0);
591: }

595: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
598:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
599:   const PetscScalar *x,*v;
600:   PetscScalar       *y,sum1, sum2, sum3, sum4;
601:   PetscErrorCode    ierr;
602:   PetscInt          n,i,jrow,j;
603:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

606:   if (yy != zz) {VecCopy(yy,zz);}
607:   VecGetArrayRead(xx,&x);
608:   VecGetArray(zz,&y);
609:   idx  = a->j;
610:   v    = a->a;
611:   ii   = a->i;

613:   for (i=0; i<m; i++) {
614:     jrow = ii[i];
615:     n    = ii[i+1] - jrow;
616:     sum1  = 0.0;
617:     sum2  = 0.0;
618:     sum3  = 0.0;
619:     sum4  = 0.0;
620:     for (j=0; j<n; j++) {
621:       sum1 += v[jrow]*x[4*idx[jrow]];
622:       sum2 += v[jrow]*x[4*idx[jrow]+1];
623:       sum3 += v[jrow]*x[4*idx[jrow]+2];
624:       sum4 += v[jrow]*x[4*idx[jrow]+3];
625:       jrow++;
626:      }
627:     y[4*i]   += sum1;
628:     y[4*i+1] += sum2;
629:     y[4*i+2] += sum3;
630:     y[4*i+3] += sum4;
631:   }

633:   PetscLogFlops(8.0*a->nz);
634:   VecRestoreArrayRead(xx,&x);
635:   VecRestoreArray(zz,&y);
636:   return(0);
637: }
640: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
641: {
642:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
643:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
644:   const PetscScalar *x,*v;
645:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
646:   PetscErrorCode    ierr;
647:   PetscInt          n,i;
648:   const PetscInt    m = b->AIJ->rmap->n,*idx;

651:   if (yy != zz) {VecCopy(yy,zz);}
652:   VecGetArrayRead(xx,&x);
653:   VecGetArray(zz,&y);
654: 
655:   for (i=0; i<m; i++) {
656:     idx    = a->j + a->i[i] ;
657:     v      = a->a + a->i[i] ;
658:     n      = a->i[i+1] - a->i[i];
659:     alpha1 = x[4*i];
660:     alpha2 = x[4*i+1];
661:     alpha3 = x[4*i+2];
662:     alpha4 = x[4*i+3];
663:     while (n-->0) {
664:       y[4*(*idx)]   += alpha1*(*v);
665:       y[4*(*idx)+1] += alpha2*(*v);
666:       y[4*(*idx)+2] += alpha3*(*v);
667:       y[4*(*idx)+3] += alpha4*(*v);
668:       idx++; v++;
669:     }
670:   }
671:   PetscLogFlops(8.0*a->nz);
672:   VecRestoreArrayRead(xx,&x);
673:   VecRestoreArray(zz,&y);
674:   return(0);
675: }
676: /* ------------------------------------------------------------------------------*/

680: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681: {
682:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
683:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
684:   const PetscScalar *x,*v;
685:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
686:   PetscErrorCode    ierr;
687:   PetscInt          nonzerorow=0,n,i,jrow,j;
688:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

691:   VecGetArrayRead(xx,&x);
692:   VecGetArray(yy,&y);
693:   idx  = a->j;
694:   v    = a->a;
695:   ii   = a->i;

697:   for (i=0; i<m; i++) {
698:     jrow = ii[i];
699:     n    = ii[i+1] - jrow;
700:     sum1  = 0.0;
701:     sum2  = 0.0;
702:     sum3  = 0.0;
703:     sum4  = 0.0;
704:     sum5  = 0.0;
705:     nonzerorow += (n>0);
706:     for (j=0; j<n; j++) {
707:       sum1 += v[jrow]*x[5*idx[jrow]];
708:       sum2 += v[jrow]*x[5*idx[jrow]+1];
709:       sum3 += v[jrow]*x[5*idx[jrow]+2];
710:       sum4 += v[jrow]*x[5*idx[jrow]+3];
711:       sum5 += v[jrow]*x[5*idx[jrow]+4];
712:       jrow++;
713:      }
714:     y[5*i]   = sum1;
715:     y[5*i+1] = sum2;
716:     y[5*i+2] = sum3;
717:     y[5*i+3] = sum4;
718:     y[5*i+4] = sum5;
719:   }

721:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
722:   VecRestoreArrayRead(xx,&x);
723:   VecRestoreArray(yy,&y);
724:   return(0);
725: }

729: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
730: {
731:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
732:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
733:   const PetscScalar *x,*v;
734:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
735:   PetscErrorCode    ierr;
736:   PetscInt          n,i;
737:   const PetscInt    m = b->AIJ->rmap->n,*idx;

740:   VecSet(yy,0.0);
741:   VecGetArrayRead(xx,&x);
742:   VecGetArray(yy,&y);
743: 
744:   for (i=0; i<m; i++) {
745:     idx    = a->j + a->i[i] ;
746:     v      = a->a + a->i[i] ;
747:     n      = a->i[i+1] - a->i[i];
748:     alpha1 = x[5*i];
749:     alpha2 = x[5*i+1];
750:     alpha3 = x[5*i+2];
751:     alpha4 = x[5*i+3];
752:     alpha5 = x[5*i+4];
753:     while (n-->0) {
754:       y[5*(*idx)]   += alpha1*(*v);
755:       y[5*(*idx)+1] += alpha2*(*v);
756:       y[5*(*idx)+2] += alpha3*(*v);
757:       y[5*(*idx)+3] += alpha4*(*v);
758:       y[5*(*idx)+4] += alpha5*(*v);
759:       idx++; v++;
760:     }
761:   }
762:   PetscLogFlops(10.0*a->nz);
763:   VecRestoreArrayRead(xx,&x);
764:   VecRestoreArray(yy,&y);
765:   return(0);
766: }

770: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
771: {
772:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
773:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
774:   const PetscScalar *x,*v;
775:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
776:   PetscErrorCode    ierr;
777:   PetscInt          n,i,jrow,j;
778:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

781:   if (yy != zz) {VecCopy(yy,zz);}
782:   VecGetArrayRead(xx,&x);
783:   VecGetArray(zz,&y);
784:   idx  = a->j;
785:   v    = a->a;
786:   ii   = a->i;

788:   for (i=0; i<m; i++) {
789:     jrow = ii[i];
790:     n    = ii[i+1] - jrow;
791:     sum1  = 0.0;
792:     sum2  = 0.0;
793:     sum3  = 0.0;
794:     sum4  = 0.0;
795:     sum5  = 0.0;
796:     for (j=0; j<n; j++) {
797:       sum1 += v[jrow]*x[5*idx[jrow]];
798:       sum2 += v[jrow]*x[5*idx[jrow]+1];
799:       sum3 += v[jrow]*x[5*idx[jrow]+2];
800:       sum4 += v[jrow]*x[5*idx[jrow]+3];
801:       sum5 += v[jrow]*x[5*idx[jrow]+4];
802:       jrow++;
803:      }
804:     y[5*i]   += sum1;
805:     y[5*i+1] += sum2;
806:     y[5*i+2] += sum3;
807:     y[5*i+3] += sum4;
808:     y[5*i+4] += sum5;
809:   }

811:   PetscLogFlops(10.0*a->nz);
812:   VecRestoreArrayRead(xx,&x);
813:   VecRestoreArray(zz,&y);
814:   return(0);
815: }

819: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
820: {
821:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
822:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
823:   const PetscScalar *x,*v;
824:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
825:   PetscErrorCode    ierr;
826:   PetscInt          n,i;
827:   const PetscInt    m = b->AIJ->rmap->n,*idx;

830:   if (yy != zz) {VecCopy(yy,zz);}
831:   VecGetArrayRead(xx,&x);
832:   VecGetArray(zz,&y);
833: 
834:   for (i=0; i<m; i++) {
835:     idx    = a->j + a->i[i] ;
836:     v      = a->a + a->i[i] ;
837:     n      = a->i[i+1] - a->i[i];
838:     alpha1 = x[5*i];
839:     alpha2 = x[5*i+1];
840:     alpha3 = x[5*i+2];
841:     alpha4 = x[5*i+3];
842:     alpha5 = x[5*i+4];
843:     while (n-->0) {
844:       y[5*(*idx)]   += alpha1*(*v);
845:       y[5*(*idx)+1] += alpha2*(*v);
846:       y[5*(*idx)+2] += alpha3*(*v);
847:       y[5*(*idx)+3] += alpha4*(*v);
848:       y[5*(*idx)+4] += alpha5*(*v);
849:       idx++; v++;
850:     }
851:   }
852:   PetscLogFlops(10.0*a->nz);
853:   VecRestoreArrayRead(xx,&x);
854:   VecRestoreArray(zz,&y);
855:   return(0);
856: }

858: /* ------------------------------------------------------------------------------*/
861: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
862: {
863:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
864:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
865:   const PetscScalar *x,*v;
866:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
867:   PetscErrorCode    ierr;
868:   PetscInt          nonzerorow=0,n,i,jrow,j;
869:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

872:   VecGetArrayRead(xx,&x);
873:   VecGetArray(yy,&y);
874:   idx  = a->j;
875:   v    = a->a;
876:   ii   = a->i;

878:   for (i=0; i<m; i++) {
879:     jrow = ii[i];
880:     n    = ii[i+1] - jrow;
881:     sum1  = 0.0;
882:     sum2  = 0.0;
883:     sum3  = 0.0;
884:     sum4  = 0.0;
885:     sum5  = 0.0;
886:     sum6  = 0.0;
887:     nonzerorow += (n>0);
888:     for (j=0; j<n; j++) {
889:       sum1 += v[jrow]*x[6*idx[jrow]];
890:       sum2 += v[jrow]*x[6*idx[jrow]+1];
891:       sum3 += v[jrow]*x[6*idx[jrow]+2];
892:       sum4 += v[jrow]*x[6*idx[jrow]+3];
893:       sum5 += v[jrow]*x[6*idx[jrow]+4];
894:       sum6 += v[jrow]*x[6*idx[jrow]+5];
895:       jrow++;
896:      }
897:     y[6*i]   = sum1;
898:     y[6*i+1] = sum2;
899:     y[6*i+2] = sum3;
900:     y[6*i+3] = sum4;
901:     y[6*i+4] = sum5;
902:     y[6*i+5] = sum6;
903:   }

905:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
906:   VecRestoreArrayRead(xx,&x);
907:   VecRestoreArray(yy,&y);
908:   return(0);
909: }

913: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
914: {
915:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
916:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
917:   const PetscScalar *x,*v;
918:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
919:   PetscErrorCode    ierr;
920:   PetscInt          n,i;
921:   const PetscInt    m = b->AIJ->rmap->n,*idx;

924:   VecSet(yy,0.0);
925:   VecGetArrayRead(xx,&x);
926:   VecGetArray(yy,&y);

928:   for (i=0; i<m; i++) {
929:     idx    = a->j + a->i[i] ;
930:     v      = a->a + a->i[i] ;
931:     n      = a->i[i+1] - a->i[i];
932:     alpha1 = x[6*i];
933:     alpha2 = x[6*i+1];
934:     alpha3 = x[6*i+2];
935:     alpha4 = x[6*i+3];
936:     alpha5 = x[6*i+4];
937:     alpha6 = x[6*i+5];
938:     while (n-->0) {
939:       y[6*(*idx)]   += alpha1*(*v);
940:       y[6*(*idx)+1] += alpha2*(*v);
941:       y[6*(*idx)+2] += alpha3*(*v);
942:       y[6*(*idx)+3] += alpha4*(*v);
943:       y[6*(*idx)+4] += alpha5*(*v);
944:       y[6*(*idx)+5] += alpha6*(*v);
945:       idx++; v++;
946:     }
947:   }
948:   PetscLogFlops(12.0*a->nz);
949:   VecRestoreArrayRead(xx,&x);
950:   VecRestoreArray(yy,&y);
951:   return(0);
952: }

956: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
957: {
958:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
959:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
960:   const PetscScalar *x,*v;
961:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
962:   PetscErrorCode    ierr;
963:   PetscInt          n,i,jrow,j;
964:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

967:   if (yy != zz) {VecCopy(yy,zz);}
968:   VecGetArrayRead(xx,&x);
969:   VecGetArray(zz,&y);
970:   idx  = a->j;
971:   v    = a->a;
972:   ii   = a->i;

974:   for (i=0; i<m; i++) {
975:     jrow = ii[i];
976:     n    = ii[i+1] - jrow;
977:     sum1  = 0.0;
978:     sum2  = 0.0;
979:     sum3  = 0.0;
980:     sum4  = 0.0;
981:     sum5  = 0.0;
982:     sum6  = 0.0;
983:     for (j=0; j<n; j++) {
984:       sum1 += v[jrow]*x[6*idx[jrow]];
985:       sum2 += v[jrow]*x[6*idx[jrow]+1];
986:       sum3 += v[jrow]*x[6*idx[jrow]+2];
987:       sum4 += v[jrow]*x[6*idx[jrow]+3];
988:       sum5 += v[jrow]*x[6*idx[jrow]+4];
989:       sum6 += v[jrow]*x[6*idx[jrow]+5];
990:       jrow++;
991:      }
992:     y[6*i]   += sum1;
993:     y[6*i+1] += sum2;
994:     y[6*i+2] += sum3;
995:     y[6*i+3] += sum4;
996:     y[6*i+4] += sum5;
997:     y[6*i+5] += sum6;
998:   }

1000:   PetscLogFlops(12.0*a->nz);
1001:   VecRestoreArrayRead(xx,&x);
1002:   VecRestoreArray(zz,&y);
1003:   return(0);
1004: }

1008: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1009: {
1010:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1011:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1012:   const PetscScalar *x,*v;
1013:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1014:   PetscErrorCode    ierr;
1015:   PetscInt          n,i;
1016:   const PetscInt    m = b->AIJ->rmap->n,*idx;

1019:   if (yy != zz) {VecCopy(yy,zz);}
1020:   VecGetArrayRead(xx,&x);
1021:   VecGetArray(zz,&y);
1022: 
1023:   for (i=0; i<m; i++) {
1024:     idx    = a->j + a->i[i] ;
1025:     v      = a->a + a->i[i] ;
1026:     n      = a->i[i+1] - a->i[i];
1027:     alpha1 = x[6*i];
1028:     alpha2 = x[6*i+1];
1029:     alpha3 = x[6*i+2];
1030:     alpha4 = x[6*i+3];
1031:     alpha5 = x[6*i+4];
1032:     alpha6 = x[6*i+5];
1033:     while (n-->0) {
1034:       y[6*(*idx)]   += alpha1*(*v);
1035:       y[6*(*idx)+1] += alpha2*(*v);
1036:       y[6*(*idx)+2] += alpha3*(*v);
1037:       y[6*(*idx)+3] += alpha4*(*v);
1038:       y[6*(*idx)+4] += alpha5*(*v);
1039:       y[6*(*idx)+5] += alpha6*(*v);
1040:       idx++; v++;
1041:     }
1042:   }
1043:   PetscLogFlops(12.0*a->nz);
1044:   VecRestoreArrayRead(xx,&x);
1045:   VecRestoreArray(zz,&y);
1046:   return(0);
1047: }

1049: /* ------------------------------------------------------------------------------*/
1052: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1053: {
1054:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1055:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1056:   const PetscScalar *x,*v;
1057:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1058:   PetscErrorCode    ierr;
1059:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1060:   PetscInt          nonzerorow=0,n,i,jrow,j;

1063:   VecGetArrayRead(xx,&x);
1064:   VecGetArray(yy,&y);
1065:   idx  = a->j;
1066:   v    = a->a;
1067:   ii   = a->i;

1069:   for (i=0; i<m; i++) {
1070:     jrow = ii[i];
1071:     n    = ii[i+1] - jrow;
1072:     sum1  = 0.0;
1073:     sum2  = 0.0;
1074:     sum3  = 0.0;
1075:     sum4  = 0.0;
1076:     sum5  = 0.0;
1077:     sum6  = 0.0;
1078:     sum7  = 0.0;
1079:     nonzerorow += (n>0);
1080:     for (j=0; j<n; j++) {
1081:       sum1 += v[jrow]*x[7*idx[jrow]];
1082:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1083:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1084:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1085:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1086:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1087:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1088:       jrow++;
1089:      }
1090:     y[7*i]   = sum1;
1091:     y[7*i+1] = sum2;
1092:     y[7*i+2] = sum3;
1093:     y[7*i+3] = sum4;
1094:     y[7*i+4] = sum5;
1095:     y[7*i+5] = sum6;
1096:     y[7*i+6] = sum7;
1097:   }

1099:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1100:   VecRestoreArrayRead(xx,&x);
1101:   VecRestoreArray(yy,&y);
1102:   return(0);
1103: }

1107: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1108: {
1109:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1110:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1111:   const PetscScalar *x,*v;
1112:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1113:   PetscErrorCode    ierr;
1114:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1115:   PetscInt          n,i;

1118:   VecSet(yy,0.0);
1119:   VecGetArrayRead(xx,&x);
1120:   VecGetArray(yy,&y);

1122:   for (i=0; i<m; i++) {
1123:     idx    = a->j + a->i[i] ;
1124:     v      = a->a + a->i[i] ;
1125:     n      = a->i[i+1] - a->i[i];
1126:     alpha1 = x[7*i];
1127:     alpha2 = x[7*i+1];
1128:     alpha3 = x[7*i+2];
1129:     alpha4 = x[7*i+3];
1130:     alpha5 = x[7*i+4];
1131:     alpha6 = x[7*i+5];
1132:     alpha7 = x[7*i+6];
1133:     while (n-->0) {
1134:       y[7*(*idx)]   += alpha1*(*v);
1135:       y[7*(*idx)+1] += alpha2*(*v);
1136:       y[7*(*idx)+2] += alpha3*(*v);
1137:       y[7*(*idx)+3] += alpha4*(*v);
1138:       y[7*(*idx)+4] += alpha5*(*v);
1139:       y[7*(*idx)+5] += alpha6*(*v);
1140:       y[7*(*idx)+6] += alpha7*(*v);
1141:       idx++; v++;
1142:     }
1143:   }
1144:   PetscLogFlops(14.0*a->nz);
1145:   VecRestoreArrayRead(xx,&x);
1146:   VecRestoreArray(yy,&y);
1147:   return(0);
1148: }

1152: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1153: {
1154:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1155:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1156:   const PetscScalar *x,*v;
1157:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1158:   PetscErrorCode    ierr;
1159:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1160:   PetscInt          n,i,jrow,j;

1163:   if (yy != zz) {VecCopy(yy,zz);}
1164:   VecGetArrayRead(xx,&x);
1165:   VecGetArray(zz,&y);
1166:   idx  = a->j;
1167:   v    = a->a;
1168:   ii   = a->i;

1170:   for (i=0; i<m; i++) {
1171:     jrow = ii[i];
1172:     n    = ii[i+1] - jrow;
1173:     sum1  = 0.0;
1174:     sum2  = 0.0;
1175:     sum3  = 0.0;
1176:     sum4  = 0.0;
1177:     sum5  = 0.0;
1178:     sum6  = 0.0;
1179:     sum7  = 0.0;
1180:     for (j=0; j<n; j++) {
1181:       sum1 += v[jrow]*x[7*idx[jrow]];
1182:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1183:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1184:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1185:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1186:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1187:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1188:       jrow++;
1189:      }
1190:     y[7*i]   += sum1;
1191:     y[7*i+1] += sum2;
1192:     y[7*i+2] += sum3;
1193:     y[7*i+3] += sum4;
1194:     y[7*i+4] += sum5;
1195:     y[7*i+5] += sum6;
1196:     y[7*i+6] += sum7;
1197:   }

1199:   PetscLogFlops(14.0*a->nz);
1200:   VecRestoreArrayRead(xx,&x);
1201:   VecRestoreArray(zz,&y);
1202:   return(0);
1203: }

1207: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1208: {
1209:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1210:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1211:   const PetscScalar *x,*v;
1212:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1213:   PetscErrorCode    ierr;
1214:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1215:   PetscInt          n,i;

1218:   if (yy != zz) {VecCopy(yy,zz);}
1219:   VecGetArrayRead(xx,&x);
1220:   VecGetArray(zz,&y);
1221:   for (i=0; i<m; i++) {
1222:     idx    = a->j + a->i[i] ;
1223:     v      = a->a + a->i[i] ;
1224:     n      = a->i[i+1] - a->i[i];
1225:     alpha1 = x[7*i];
1226:     alpha2 = x[7*i+1];
1227:     alpha3 = x[7*i+2];
1228:     alpha4 = x[7*i+3];
1229:     alpha5 = x[7*i+4];
1230:     alpha6 = x[7*i+5];
1231:     alpha7 = x[7*i+6];
1232:     while (n-->0) {
1233:       y[7*(*idx)]   += alpha1*(*v);
1234:       y[7*(*idx)+1] += alpha2*(*v);
1235:       y[7*(*idx)+2] += alpha3*(*v);
1236:       y[7*(*idx)+3] += alpha4*(*v);
1237:       y[7*(*idx)+4] += alpha5*(*v);
1238:       y[7*(*idx)+5] += alpha6*(*v);
1239:       y[7*(*idx)+6] += alpha7*(*v);
1240:       idx++; v++;
1241:     }
1242:   }
1243:   PetscLogFlops(14.0*a->nz);
1244:   VecRestoreArrayRead(xx,&x);
1245:   VecRestoreArray(zz,&y);
1246:   return(0);
1247: }

1251: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1252: {
1253:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1254:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1255:   const PetscScalar *x,*v;
1256:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1257:   PetscErrorCode    ierr;
1258:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1259:   PetscInt          nonzerorow=0,n,i,jrow,j;

1262:   VecGetArrayRead(xx,&x);
1263:   VecGetArray(yy,&y);
1264:   idx  = a->j;
1265:   v    = a->a;
1266:   ii   = a->i;

1268:   for (i=0; i<m; i++) {
1269:     jrow = ii[i];
1270:     n    = ii[i+1] - jrow;
1271:     sum1  = 0.0;
1272:     sum2  = 0.0;
1273:     sum3  = 0.0;
1274:     sum4  = 0.0;
1275:     sum5  = 0.0;
1276:     sum6  = 0.0;
1277:     sum7  = 0.0;
1278:     sum8  = 0.0;
1279:     nonzerorow += (n>0);
1280:     for (j=0; j<n; j++) {
1281:       sum1 += v[jrow]*x[8*idx[jrow]];
1282:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1283:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1284:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1285:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1286:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1287:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1288:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1289:       jrow++;
1290:      }
1291:     y[8*i]   = sum1;
1292:     y[8*i+1] = sum2;
1293:     y[8*i+2] = sum3;
1294:     y[8*i+3] = sum4;
1295:     y[8*i+4] = sum5;
1296:     y[8*i+5] = sum6;
1297:     y[8*i+6] = sum7;
1298:     y[8*i+7] = sum8;
1299:   }

1301:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1302:   VecRestoreArrayRead(xx,&x);
1303:   VecRestoreArray(yy,&y);
1304:   return(0);
1305: }

1309: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1310: {
1311:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1312:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1313:   const PetscScalar *x,*v;
1314:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1315:   PetscErrorCode    ierr;
1316:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1317:   PetscInt          n,i;

1320:   VecSet(yy,0.0);
1321:   VecGetArrayRead(xx,&x);
1322:   VecGetArray(yy,&y);

1324:   for (i=0; i<m; i++) {
1325:     idx    = a->j + a->i[i] ;
1326:     v      = a->a + a->i[i] ;
1327:     n      = a->i[i+1] - a->i[i];
1328:     alpha1 = x[8*i];
1329:     alpha2 = x[8*i+1];
1330:     alpha3 = x[8*i+2];
1331:     alpha4 = x[8*i+3];
1332:     alpha5 = x[8*i+4];
1333:     alpha6 = x[8*i+5];
1334:     alpha7 = x[8*i+6];
1335:     alpha8 = x[8*i+7];
1336:     while (n-->0) {
1337:       y[8*(*idx)]   += alpha1*(*v);
1338:       y[8*(*idx)+1] += alpha2*(*v);
1339:       y[8*(*idx)+2] += alpha3*(*v);
1340:       y[8*(*idx)+3] += alpha4*(*v);
1341:       y[8*(*idx)+4] += alpha5*(*v);
1342:       y[8*(*idx)+5] += alpha6*(*v);
1343:       y[8*(*idx)+6] += alpha7*(*v);
1344:       y[8*(*idx)+7] += alpha8*(*v);
1345:       idx++; v++;
1346:     }
1347:   }
1348:   PetscLogFlops(16.0*a->nz);
1349:   VecRestoreArrayRead(xx,&x);
1350:   VecRestoreArray(yy,&y);
1351:   return(0);
1352: }

1356: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1357: {
1358:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1359:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1360:   const PetscScalar *x,*v;
1361:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1362:   PetscErrorCode    ierr;
1363:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1364:   PetscInt          n,i,jrow,j;

1367:   if (yy != zz) {VecCopy(yy,zz);}
1368:   VecGetArrayRead(xx,&x);
1369:   VecGetArray(zz,&y);
1370:   idx  = a->j;
1371:   v    = a->a;
1372:   ii   = a->i;

1374:   for (i=0; i<m; i++) {
1375:     jrow = ii[i];
1376:     n    = ii[i+1] - jrow;
1377:     sum1  = 0.0;
1378:     sum2  = 0.0;
1379:     sum3  = 0.0;
1380:     sum4  = 0.0;
1381:     sum5  = 0.0;
1382:     sum6  = 0.0;
1383:     sum7  = 0.0;
1384:     sum8  = 0.0;
1385:     for (j=0; j<n; j++) {
1386:       sum1 += v[jrow]*x[8*idx[jrow]];
1387:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1388:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1389:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1390:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1391:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1392:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1393:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1394:       jrow++;
1395:      }
1396:     y[8*i]   += sum1;
1397:     y[8*i+1] += sum2;
1398:     y[8*i+2] += sum3;
1399:     y[8*i+3] += sum4;
1400:     y[8*i+4] += sum5;
1401:     y[8*i+5] += sum6;
1402:     y[8*i+6] += sum7;
1403:     y[8*i+7] += sum8;
1404:   }

1406:   PetscLogFlops(16.0*a->nz);
1407:   VecRestoreArrayRead(xx,&x);
1408:   VecRestoreArray(zz,&y);
1409:   return(0);
1410: }

1414: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1415: {
1416:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1417:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1418:   const PetscScalar *x,*v;
1419:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1420:   PetscErrorCode    ierr;
1421:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1422:   PetscInt          n,i;

1425:   if (yy != zz) {VecCopy(yy,zz);}
1426:   VecGetArrayRead(xx,&x);
1427:   VecGetArray(zz,&y);
1428:   for (i=0; i<m; i++) {
1429:     idx    = a->j + a->i[i] ;
1430:     v      = a->a + a->i[i] ;
1431:     n      = a->i[i+1] - a->i[i];
1432:     alpha1 = x[8*i];
1433:     alpha2 = x[8*i+1];
1434:     alpha3 = x[8*i+2];
1435:     alpha4 = x[8*i+3];
1436:     alpha5 = x[8*i+4];
1437:     alpha6 = x[8*i+5];
1438:     alpha7 = x[8*i+6];
1439:     alpha8 = x[8*i+7];
1440:     while (n-->0) {
1441:       y[8*(*idx)]   += alpha1*(*v);
1442:       y[8*(*idx)+1] += alpha2*(*v);
1443:       y[8*(*idx)+2] += alpha3*(*v);
1444:       y[8*(*idx)+3] += alpha4*(*v);
1445:       y[8*(*idx)+4] += alpha5*(*v);
1446:       y[8*(*idx)+5] += alpha6*(*v);
1447:       y[8*(*idx)+6] += alpha7*(*v);
1448:       y[8*(*idx)+7] += alpha8*(*v);
1449:       idx++; v++;
1450:     }
1451:   }
1452:   PetscLogFlops(16.0*a->nz);
1453:   VecRestoreArrayRead(xx,&x);
1454:   VecRestoreArray(zz,&y);
1455:   return(0);
1456: }

1458: /* ------------------------------------------------------------------------------*/
1461: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1462: {
1463:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1464:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1465:   const PetscScalar *x,*v;
1466:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1467:   PetscErrorCode    ierr;
1468:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1469:   PetscInt          nonzerorow=0,n,i,jrow,j;

1472:   VecGetArrayRead(xx,&x);
1473:   VecGetArray(yy,&y);
1474:   idx  = a->j;
1475:   v    = a->a;
1476:   ii   = a->i;

1478:   for (i=0; i<m; i++) {
1479:     jrow = ii[i];
1480:     n    = ii[i+1] - jrow;
1481:     sum1  = 0.0;
1482:     sum2  = 0.0;
1483:     sum3  = 0.0;
1484:     sum4  = 0.0;
1485:     sum5  = 0.0;
1486:     sum6  = 0.0;
1487:     sum7  = 0.0;
1488:     sum8  = 0.0;
1489:     sum9  = 0.0;
1490:     nonzerorow += (n>0);
1491:     for (j=0; j<n; j++) {
1492:       sum1 += v[jrow]*x[9*idx[jrow]];
1493:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1494:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1495:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1496:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1497:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1498:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1499:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1500:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1501:       jrow++;
1502:      }
1503:     y[9*i]   = sum1;
1504:     y[9*i+1] = sum2;
1505:     y[9*i+2] = sum3;
1506:     y[9*i+3] = sum4;
1507:     y[9*i+4] = sum5;
1508:     y[9*i+5] = sum6;
1509:     y[9*i+6] = sum7;
1510:     y[9*i+7] = sum8;
1511:     y[9*i+8] = sum9;
1512:   }

1514:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1515:   VecRestoreArrayRead(xx,&x);
1516:   VecRestoreArray(yy,&y);
1517:   return(0);
1518: }

1520: /* ------------------------------------------------------------------------------*/

1524: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1525: {
1526:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1527:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1528:   const PetscScalar *x,*v;
1529:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1530:   PetscErrorCode    ierr;
1531:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1532:   PetscInt          n,i;

1535:   VecSet(yy,0.0);
1536:   VecGetArrayRead(xx,&x);
1537:   VecGetArray(yy,&y);

1539:   for (i=0; i<m; i++) {
1540:     idx    = a->j + a->i[i] ;
1541:     v      = a->a + a->i[i] ;
1542:     n      = a->i[i+1] - a->i[i];
1543:     alpha1 = x[9*i];
1544:     alpha2 = x[9*i+1];
1545:     alpha3 = x[9*i+2];
1546:     alpha4 = x[9*i+3];
1547:     alpha5 = x[9*i+4];
1548:     alpha6 = x[9*i+5];
1549:     alpha7 = x[9*i+6];
1550:     alpha8 = x[9*i+7];
1551:     alpha9 = x[9*i+8];
1552:     while (n-->0) {
1553:       y[9*(*idx)]   += alpha1*(*v);
1554:       y[9*(*idx)+1] += alpha2*(*v);
1555:       y[9*(*idx)+2] += alpha3*(*v);
1556:       y[9*(*idx)+3] += alpha4*(*v);
1557:       y[9*(*idx)+4] += alpha5*(*v);
1558:       y[9*(*idx)+5] += alpha6*(*v);
1559:       y[9*(*idx)+6] += alpha7*(*v);
1560:       y[9*(*idx)+7] += alpha8*(*v);
1561:       y[9*(*idx)+8] += alpha9*(*v);
1562:       idx++; v++;
1563:     }
1564:   }
1565:   PetscLogFlops(18.0*a->nz);
1566:   VecRestoreArrayRead(xx,&x);
1567:   VecRestoreArray(yy,&y);
1568:   return(0);
1569: }

1573: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1574: {
1575:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1576:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1577:   const PetscScalar *x,*v;
1578:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1579:   PetscErrorCode    ierr;
1580:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1581:   PetscInt          n,i,jrow,j;

1584:   if (yy != zz) {VecCopy(yy,zz);}
1585:   VecGetArrayRead(xx,&x);
1586:   VecGetArray(zz,&y);
1587:   idx  = a->j;
1588:   v    = a->a;
1589:   ii   = a->i;

1591:   for (i=0; i<m; i++) {
1592:     jrow = ii[i];
1593:     n    = ii[i+1] - jrow;
1594:     sum1  = 0.0;
1595:     sum2  = 0.0;
1596:     sum3  = 0.0;
1597:     sum4  = 0.0;
1598:     sum5  = 0.0;
1599:     sum6  = 0.0;
1600:     sum7  = 0.0;
1601:     sum8  = 0.0;
1602:     sum9  = 0.0;
1603:     for (j=0; j<n; j++) {
1604:       sum1 += v[jrow]*x[9*idx[jrow]];
1605:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1606:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1607:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1608:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1609:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1610:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1611:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1612:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1613:       jrow++;
1614:      }
1615:     y[9*i]   += sum1;
1616:     y[9*i+1] += sum2;
1617:     y[9*i+2] += sum3;
1618:     y[9*i+3] += sum4;
1619:     y[9*i+4] += sum5;
1620:     y[9*i+5] += sum6;
1621:     y[9*i+6] += sum7;
1622:     y[9*i+7] += sum8;
1623:     y[9*i+8] += sum9;
1624:   }

1626:   PetscLogFlops(18*a->nz);
1627:   VecRestoreArrayRead(xx,&x);
1628:   VecRestoreArray(zz,&y);
1629:   return(0);
1630: }

1634: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1635: {
1636:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1637:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1638:   const PetscScalar *x,*v;
1639:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1640:   PetscErrorCode    ierr;
1641:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1642:   PetscInt          n,i;

1645:   if (yy != zz) {VecCopy(yy,zz);}
1646:   VecGetArrayRead(xx,&x);
1647:   VecGetArray(zz,&y);
1648:   for (i=0; i<m; i++) {
1649:     idx    = a->j + a->i[i] ;
1650:     v      = a->a + a->i[i] ;
1651:     n      = a->i[i+1] - a->i[i];
1652:     alpha1 = x[9*i];
1653:     alpha2 = x[9*i+1];
1654:     alpha3 = x[9*i+2];
1655:     alpha4 = x[9*i+3];
1656:     alpha5 = x[9*i+4];
1657:     alpha6 = x[9*i+5];
1658:     alpha7 = x[9*i+6];
1659:     alpha8 = x[9*i+7];
1660:     alpha9 = x[9*i+8];
1661:     while (n-->0) {
1662:       y[9*(*idx)]   += alpha1*(*v);
1663:       y[9*(*idx)+1] += alpha2*(*v);
1664:       y[9*(*idx)+2] += alpha3*(*v);
1665:       y[9*(*idx)+3] += alpha4*(*v);
1666:       y[9*(*idx)+4] += alpha5*(*v);
1667:       y[9*(*idx)+5] += alpha6*(*v);
1668:       y[9*(*idx)+6] += alpha7*(*v);
1669:       y[9*(*idx)+7] += alpha8*(*v);
1670:       y[9*(*idx)+8] += alpha9*(*v);
1671:       idx++; v++;
1672:     }
1673:   }
1674:   PetscLogFlops(18.0*a->nz);
1675:   VecRestoreArrayRead(xx,&x);
1676:   VecRestoreArray(zz,&y);
1677:   return(0);
1678: }
1681: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1682: {
1683:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1684:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1685:   const PetscScalar *x,*v;
1686:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1687:   PetscErrorCode    ierr;
1688:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1689:   PetscInt          nonzerorow=0,n,i,jrow,j;

1692:   VecGetArrayRead(xx,&x);
1693:   VecGetArray(yy,&y);
1694:   idx  = a->j;
1695:   v    = a->a;
1696:   ii   = a->i;

1698:   for (i=0; i<m; i++) {
1699:     jrow = ii[i];
1700:     n    = ii[i+1] - jrow;
1701:     sum1  = 0.0;
1702:     sum2  = 0.0;
1703:     sum3  = 0.0;
1704:     sum4  = 0.0;
1705:     sum5  = 0.0;
1706:     sum6  = 0.0;
1707:     sum7  = 0.0;
1708:     sum8  = 0.0;
1709:     sum9  = 0.0;
1710:     sum10 = 0.0;
1711:     nonzerorow += (n>0);
1712:     for (j=0; j<n; j++) {
1713:       sum1  += v[jrow]*x[10*idx[jrow]];
1714:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1715:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1716:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1717:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1718:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1719:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1720:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1721:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1722:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1723:       jrow++;
1724:      }
1725:     y[10*i]   = sum1;
1726:     y[10*i+1] = sum2;
1727:     y[10*i+2] = sum3;
1728:     y[10*i+3] = sum4;
1729:     y[10*i+4] = sum5;
1730:     y[10*i+5] = sum6;
1731:     y[10*i+6] = sum7;
1732:     y[10*i+7] = sum8;
1733:     y[10*i+8] = sum9;
1734:     y[10*i+9] = sum10;
1735:   }

1737:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1738:   VecRestoreArrayRead(xx,&x);
1739:   VecRestoreArray(yy,&y);
1740:   return(0);
1741: }

1745: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1746: {
1747:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1748:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1749:   const PetscScalar *x,*v;
1750:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1751:   PetscErrorCode    ierr;
1752:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1753:   PetscInt          n,i,jrow,j;

1756:   if (yy != zz) {VecCopy(yy,zz);}
1757:   VecGetArrayRead(xx,&x);
1758:   VecGetArray(zz,&y);
1759:   idx  = a->j;
1760:   v    = a->a;
1761:   ii   = a->i;

1763:   for (i=0; i<m; i++) {
1764:     jrow = ii[i];
1765:     n    = ii[i+1] - jrow;
1766:     sum1  = 0.0;
1767:     sum2  = 0.0;
1768:     sum3  = 0.0;
1769:     sum4  = 0.0;
1770:     sum5  = 0.0;
1771:     sum6  = 0.0;
1772:     sum7  = 0.0;
1773:     sum8  = 0.0;
1774:     sum9  = 0.0;
1775:     sum10 = 0.0;
1776:     for (j=0; j<n; j++) {
1777:       sum1  += v[jrow]*x[10*idx[jrow]];
1778:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1779:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1780:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1781:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1782:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1783:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1784:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1785:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1786:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1787:       jrow++;
1788:      }
1789:     y[10*i]   += sum1;
1790:     y[10*i+1] += sum2;
1791:     y[10*i+2] += sum3;
1792:     y[10*i+3] += sum4;
1793:     y[10*i+4] += sum5;
1794:     y[10*i+5] += sum6;
1795:     y[10*i+6] += sum7;
1796:     y[10*i+7] += sum8;
1797:     y[10*i+8] += sum9;
1798:     y[10*i+9] += sum10;
1799:   }

1801:   PetscLogFlops(20.0*a->nz);
1802:   VecRestoreArrayRead(xx,&x);
1803:   VecRestoreArray(yy,&y);
1804:   return(0);
1805: }

1809: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1810: {
1811:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1812:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1813:   const PetscScalar *x,*v;
1814:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1815:   PetscErrorCode    ierr;
1816:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1817:   PetscInt          n,i;

1820:   VecSet(yy,0.0);
1821:   VecGetArrayRead(xx,&x);
1822:   VecGetArray(yy,&y);

1824:   for (i=0; i<m; i++) {
1825:     idx    = a->j + a->i[i] ;
1826:     v      = a->a + a->i[i] ;
1827:     n      = a->i[i+1] - a->i[i];
1828:     alpha1 = x[10*i];
1829:     alpha2 = x[10*i+1];
1830:     alpha3 = x[10*i+2];
1831:     alpha4 = x[10*i+3];
1832:     alpha5 = x[10*i+4];
1833:     alpha6 = x[10*i+5];
1834:     alpha7 = x[10*i+6];
1835:     alpha8 = x[10*i+7];
1836:     alpha9 = x[10*i+8];
1837:     alpha10 = x[10*i+9];
1838:     while (n-->0) {
1839:       y[10*(*idx)]   += alpha1*(*v);
1840:       y[10*(*idx)+1] += alpha2*(*v);
1841:       y[10*(*idx)+2] += alpha3*(*v);
1842:       y[10*(*idx)+3] += alpha4*(*v);
1843:       y[10*(*idx)+4] += alpha5*(*v);
1844:       y[10*(*idx)+5] += alpha6*(*v);
1845:       y[10*(*idx)+6] += alpha7*(*v);
1846:       y[10*(*idx)+7] += alpha8*(*v);
1847:       y[10*(*idx)+8] += alpha9*(*v);
1848:       y[10*(*idx)+9] += alpha10*(*v);
1849:       idx++; v++;
1850:     }
1851:   }
1852:   PetscLogFlops(20.0*a->nz);
1853:   VecRestoreArrayRead(xx,&x);
1854:   VecRestoreArray(yy,&y);
1855:   return(0);
1856: }

1860: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1861: {
1862:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1863:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1864:   const PetscScalar *x,*v;
1865:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1866:   PetscErrorCode    ierr;
1867:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1868:   PetscInt          n,i;

1871:   if (yy != zz) {VecCopy(yy,zz);}
1872:   VecGetArrayRead(xx,&x);
1873:   VecGetArray(zz,&y);
1874:   for (i=0; i<m; i++) {
1875:     idx    = a->j + a->i[i] ;
1876:     v      = a->a + a->i[i] ;
1877:     n      = a->i[i+1] - a->i[i];
1878:     alpha1 = x[10*i];
1879:     alpha2 = x[10*i+1];
1880:     alpha3 = x[10*i+2];
1881:     alpha4 = x[10*i+3];
1882:     alpha5 = x[10*i+4];
1883:     alpha6 = x[10*i+5];
1884:     alpha7 = x[10*i+6];
1885:     alpha8 = x[10*i+7];
1886:     alpha9 = x[10*i+8];
1887:     alpha10 = x[10*i+9];
1888:     while (n-->0) {
1889:       y[10*(*idx)]   += alpha1*(*v);
1890:       y[10*(*idx)+1] += alpha2*(*v);
1891:       y[10*(*idx)+2] += alpha3*(*v);
1892:       y[10*(*idx)+3] += alpha4*(*v);
1893:       y[10*(*idx)+4] += alpha5*(*v);
1894:       y[10*(*idx)+5] += alpha6*(*v);
1895:       y[10*(*idx)+6] += alpha7*(*v);
1896:       y[10*(*idx)+7] += alpha8*(*v);
1897:       y[10*(*idx)+8] += alpha9*(*v);
1898:       y[10*(*idx)+9] += alpha10*(*v);
1899:       idx++; v++;
1900:     }
1901:   }
1902:   PetscLogFlops(20.0*a->nz);
1903:   VecRestoreArrayRead(xx,&x);
1904:   VecRestoreArray(zz,&y);
1905:   return(0);
1906: }


1909: /*--------------------------------------------------------------------------------------------*/
1912: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1913: {
1914:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1915:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1916:   const PetscScalar *x,*v;
1917:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918:   PetscErrorCode    ierr;
1919:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1920:   PetscInt          nonzerorow=0,n,i,jrow,j;

1923:   VecGetArrayRead(xx,&x);
1924:   VecGetArray(yy,&y);
1925:   idx  = a->j;
1926:   v    = a->a;
1927:   ii   = a->i;

1929:   for (i=0; i<m; i++) {
1930:     jrow = ii[i];
1931:     n    = ii[i+1] - jrow;
1932:     sum1  = 0.0;
1933:     sum2  = 0.0;
1934:     sum3  = 0.0;
1935:     sum4  = 0.0;
1936:     sum5  = 0.0;
1937:     sum6  = 0.0;
1938:     sum7  = 0.0;
1939:     sum8  = 0.0;
1940:     sum9  = 0.0;
1941:     sum10 = 0.0;
1942:     sum11 = 0.0;
1943:     nonzerorow += (n>0);
1944:     for (j=0; j<n; j++) {
1945:       sum1  += v[jrow]*x[11*idx[jrow]];
1946:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1947:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1948:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1949:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1950:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1951:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1952:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1953:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1954:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1955:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1956:       jrow++;
1957:      }
1958:     y[11*i]   = sum1;
1959:     y[11*i+1] = sum2;
1960:     y[11*i+2] = sum3;
1961:     y[11*i+3] = sum4;
1962:     y[11*i+4] = sum5;
1963:     y[11*i+5] = sum6;
1964:     y[11*i+6] = sum7;
1965:     y[11*i+7] = sum8;
1966:     y[11*i+8] = sum9;
1967:     y[11*i+9] = sum10;
1968:     y[11*i+10] = sum11;
1969:   }

1971:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1972:   VecRestoreArrayRead(xx,&x);
1973:   VecRestoreArray(yy,&y);
1974:   return(0);
1975: }

1979: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1980: {
1981:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1982:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1983:   const PetscScalar *x,*v;
1984:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1985:   PetscErrorCode    ierr;
1986:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1987:   PetscInt          n,i,jrow,j;

1990:   if (yy != zz) {VecCopy(yy,zz);}
1991:   VecGetArrayRead(xx,&x);
1992:   VecGetArray(zz,&y);
1993:   idx  = a->j;
1994:   v    = a->a;
1995:   ii   = a->i;

1997:   for (i=0; i<m; i++) {
1998:     jrow = ii[i];
1999:     n    = ii[i+1] - jrow;
2000:     sum1  = 0.0;
2001:     sum2  = 0.0;
2002:     sum3  = 0.0;
2003:     sum4  = 0.0;
2004:     sum5  = 0.0;
2005:     sum6  = 0.0;
2006:     sum7  = 0.0;
2007:     sum8  = 0.0;
2008:     sum9  = 0.0;
2009:     sum10 = 0.0;
2010:     sum11 = 0.0;
2011:     for (j=0; j<n; j++) {
2012:       sum1  += v[jrow]*x[11*idx[jrow]];
2013:       sum2  += v[jrow]*x[11*idx[jrow]+1];
2014:       sum3  += v[jrow]*x[11*idx[jrow]+2];
2015:       sum4  += v[jrow]*x[11*idx[jrow]+3];
2016:       sum5  += v[jrow]*x[11*idx[jrow]+4];
2017:       sum6  += v[jrow]*x[11*idx[jrow]+5];
2018:       sum7  += v[jrow]*x[11*idx[jrow]+6];
2019:       sum8  += v[jrow]*x[11*idx[jrow]+7];
2020:       sum9  += v[jrow]*x[11*idx[jrow]+8];
2021:       sum10 += v[jrow]*x[11*idx[jrow]+9];
2022:       sum11 += v[jrow]*x[11*idx[jrow]+10];
2023:       jrow++;
2024:      }
2025:     y[11*i]   += sum1;
2026:     y[11*i+1] += sum2;
2027:     y[11*i+2] += sum3;
2028:     y[11*i+3] += sum4;
2029:     y[11*i+4] += sum5;
2030:     y[11*i+5] += sum6;
2031:     y[11*i+6] += sum7;
2032:     y[11*i+7] += sum8;
2033:     y[11*i+8] += sum9;
2034:     y[11*i+9] += sum10;
2035:     y[11*i+10] += sum11;
2036:   }

2038:   PetscLogFlops(22*a->nz);
2039:   VecRestoreArrayRead(xx,&x);
2040:   VecRestoreArray(yy,&y);
2041:   return(0);
2042: }

2046: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2047: {
2048:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2049:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2050:   const PetscScalar *x,*v;
2051:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2052:   PetscErrorCode    ierr;
2053:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2054:   PetscInt          n,i;

2057:   VecSet(yy,0.0);
2058:   VecGetArrayRead(xx,&x);
2059:   VecGetArray(yy,&y);

2061:   for (i=0; i<m; i++) {
2062:     idx    = a->j + a->i[i] ;
2063:     v      = a->a + a->i[i] ;
2064:     n      = a->i[i+1] - a->i[i];
2065:     alpha1 = x[11*i];
2066:     alpha2 = x[11*i+1];
2067:     alpha3 = x[11*i+2];
2068:     alpha4 = x[11*i+3];
2069:     alpha5 = x[11*i+4];
2070:     alpha6 = x[11*i+5];
2071:     alpha7 = x[11*i+6];
2072:     alpha8 = x[11*i+7];
2073:     alpha9 = x[11*i+8];
2074:     alpha10 = x[11*i+9];
2075:     alpha11 = x[11*i+10];
2076:     while (n-->0) {
2077:       y[11*(*idx)]   += alpha1*(*v);
2078:       y[11*(*idx)+1] += alpha2*(*v);
2079:       y[11*(*idx)+2] += alpha3*(*v);
2080:       y[11*(*idx)+3] += alpha4*(*v);
2081:       y[11*(*idx)+4] += alpha5*(*v);
2082:       y[11*(*idx)+5] += alpha6*(*v);
2083:       y[11*(*idx)+6] += alpha7*(*v);
2084:       y[11*(*idx)+7] += alpha8*(*v);
2085:       y[11*(*idx)+8] += alpha9*(*v);
2086:       y[11*(*idx)+9] += alpha10*(*v);
2087:       y[11*(*idx)+10] += alpha11*(*v);
2088:       idx++; v++;
2089:     }
2090:   }
2091:   PetscLogFlops(22*a->nz);
2092:   VecRestoreArrayRead(xx,&x);
2093:   VecRestoreArray(yy,&y);
2094:   return(0);
2095: }

2099: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2100: {
2101:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2102:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2103:   const PetscScalar *x,*v;
2104:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2105:   PetscErrorCode    ierr;
2106:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2107:   PetscInt          n,i;

2110:   if (yy != zz) {VecCopy(yy,zz);}
2111:   VecGetArrayRead(xx,&x);
2112:   VecGetArray(zz,&y);
2113:   for (i=0; i<m; i++) {
2114:     idx    = a->j + a->i[i] ;
2115:     v      = a->a + a->i[i] ;
2116:     n      = a->i[i+1] - a->i[i];
2117:     alpha1 = x[11*i];
2118:     alpha2 = x[11*i+1];
2119:     alpha3 = x[11*i+2];
2120:     alpha4 = x[11*i+3];
2121:     alpha5 = x[11*i+4];
2122:     alpha6 = x[11*i+5];
2123:     alpha7 = x[11*i+6];
2124:     alpha8 = x[11*i+7];
2125:     alpha9 = x[11*i+8];
2126:     alpha10 = x[11*i+9];
2127:     alpha11 = x[11*i+10];
2128:     while (n-->0) {
2129:       y[11*(*idx)]   += alpha1*(*v);
2130:       y[11*(*idx)+1] += alpha2*(*v);
2131:       y[11*(*idx)+2] += alpha3*(*v);
2132:       y[11*(*idx)+3] += alpha4*(*v);
2133:       y[11*(*idx)+4] += alpha5*(*v);
2134:       y[11*(*idx)+5] += alpha6*(*v);
2135:       y[11*(*idx)+6] += alpha7*(*v);
2136:       y[11*(*idx)+7] += alpha8*(*v);
2137:       y[11*(*idx)+8] += alpha9*(*v);
2138:       y[11*(*idx)+9] += alpha10*(*v);
2139:       y[11*(*idx)+10] += alpha11*(*v);
2140:       idx++; v++;
2141:     }
2142:   }
2143:   PetscLogFlops(22*a->nz);
2144:   VecRestoreArrayRead(xx,&x);
2145:   VecRestoreArray(zz,&y);
2146:   return(0);
2147: }


2150: /*--------------------------------------------------------------------------------------------*/
2153: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2154: {
2155:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2156:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2157:   const PetscScalar *x,*v;
2158:   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2159:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2160:   PetscErrorCode     ierr;
2161:   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2162:   PetscInt           nonzerorow=0,n,i,jrow,j;

2165:   VecGetArrayRead(xx,&x);
2166:   VecGetArray(yy,&y);
2167:   idx  = a->j;
2168:   v    = a->a;
2169:   ii   = a->i;

2171:   for (i=0; i<m; i++) {
2172:     jrow = ii[i];
2173:     n    = ii[i+1] - jrow;
2174:     sum1  = 0.0;
2175:     sum2  = 0.0;
2176:     sum3  = 0.0;
2177:     sum4  = 0.0;
2178:     sum5  = 0.0;
2179:     sum6  = 0.0;
2180:     sum7  = 0.0;
2181:     sum8  = 0.0;
2182:     sum9  = 0.0;
2183:     sum10 = 0.0;
2184:     sum11 = 0.0;
2185:     sum12 = 0.0;
2186:     sum13 = 0.0;
2187:     sum14 = 0.0;
2188:     sum15 = 0.0;
2189:     sum16 = 0.0;
2190:     nonzerorow += (n>0);
2191:     for (j=0; j<n; j++) {
2192:       sum1  += v[jrow]*x[16*idx[jrow]];
2193:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2194:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2195:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2196:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2197:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2198:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2199:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2200:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2201:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2202:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2203:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2204:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2205:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2206:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2207:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2208:       jrow++;
2209:      }
2210:     y[16*i]    = sum1;
2211:     y[16*i+1]  = sum2;
2212:     y[16*i+2]  = sum3;
2213:     y[16*i+3]  = sum4;
2214:     y[16*i+4]  = sum5;
2215:     y[16*i+5]  = sum6;
2216:     y[16*i+6]  = sum7;
2217:     y[16*i+7]  = sum8;
2218:     y[16*i+8]  = sum9;
2219:     y[16*i+9]  = sum10;
2220:     y[16*i+10] = sum11;
2221:     y[16*i+11] = sum12;
2222:     y[16*i+12] = sum13;
2223:     y[16*i+13] = sum14;
2224:     y[16*i+14] = sum15;
2225:     y[16*i+15] = sum16;
2226:   }

2228:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2229:   VecRestoreArrayRead(xx,&x);
2230:   VecRestoreArray(yy,&y);
2231:   return(0);
2232: }

2236: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2237: {
2238:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2239:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2240:   const PetscScalar *x,*v;
2241:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2242:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2243:   PetscErrorCode    ierr;
2244:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2245:   PetscInt          n,i;

2248:   VecSet(yy,0.0);
2249:   VecGetArrayRead(xx,&x);
2250:   VecGetArray(yy,&y);

2252:   for (i=0; i<m; i++) {
2253:     idx    = a->j + a->i[i] ;
2254:     v      = a->a + a->i[i] ;
2255:     n      = a->i[i+1] - a->i[i];
2256:     alpha1  = x[16*i];
2257:     alpha2  = x[16*i+1];
2258:     alpha3  = x[16*i+2];
2259:     alpha4  = x[16*i+3];
2260:     alpha5  = x[16*i+4];
2261:     alpha6  = x[16*i+5];
2262:     alpha7  = x[16*i+6];
2263:     alpha8  = x[16*i+7];
2264:     alpha9  = x[16*i+8];
2265:     alpha10 = x[16*i+9];
2266:     alpha11 = x[16*i+10];
2267:     alpha12 = x[16*i+11];
2268:     alpha13 = x[16*i+12];
2269:     alpha14 = x[16*i+13];
2270:     alpha15 = x[16*i+14];
2271:     alpha16 = x[16*i+15];
2272:     while (n-->0) {
2273:       y[16*(*idx)]    += alpha1*(*v);
2274:       y[16*(*idx)+1]  += alpha2*(*v);
2275:       y[16*(*idx)+2]  += alpha3*(*v);
2276:       y[16*(*idx)+3]  += alpha4*(*v);
2277:       y[16*(*idx)+4]  += alpha5*(*v);
2278:       y[16*(*idx)+5]  += alpha6*(*v);
2279:       y[16*(*idx)+6]  += alpha7*(*v);
2280:       y[16*(*idx)+7]  += alpha8*(*v);
2281:       y[16*(*idx)+8]  += alpha9*(*v);
2282:       y[16*(*idx)+9]  += alpha10*(*v);
2283:       y[16*(*idx)+10] += alpha11*(*v);
2284:       y[16*(*idx)+11] += alpha12*(*v);
2285:       y[16*(*idx)+12] += alpha13*(*v);
2286:       y[16*(*idx)+13] += alpha14*(*v);
2287:       y[16*(*idx)+14] += alpha15*(*v);
2288:       y[16*(*idx)+15] += alpha16*(*v);
2289:       idx++; v++;
2290:     }
2291:   }
2292:   PetscLogFlops(32.0*a->nz);
2293:   VecRestoreArrayRead(xx,&x);
2294:   VecRestoreArray(yy,&y);
2295:   return(0);
2296: }

2300: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2301: {
2302:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2303:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2304:   const PetscScalar *x,*v;
2305:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2306:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2307:   PetscErrorCode    ierr;
2308:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2309:   PetscInt          n,i,jrow,j;

2312:   if (yy != zz) {VecCopy(yy,zz);}
2313:   VecGetArrayRead(xx,&x);
2314:   VecGetArray(zz,&y);
2315:   idx  = a->j;
2316:   v    = a->a;
2317:   ii   = a->i;

2319:   for (i=0; i<m; i++) {
2320:     jrow = ii[i];
2321:     n    = ii[i+1] - jrow;
2322:     sum1  = 0.0;
2323:     sum2  = 0.0;
2324:     sum3  = 0.0;
2325:     sum4  = 0.0;
2326:     sum5  = 0.0;
2327:     sum6  = 0.0;
2328:     sum7  = 0.0;
2329:     sum8  = 0.0;
2330:     sum9  = 0.0;
2331:     sum10 = 0.0;
2332:     sum11 = 0.0;
2333:     sum12 = 0.0;
2334:     sum13 = 0.0;
2335:     sum14 = 0.0;
2336:     sum15 = 0.0;
2337:     sum16 = 0.0;
2338:     for (j=0; j<n; j++) {
2339:       sum1  += v[jrow]*x[16*idx[jrow]];
2340:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2341:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2342:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2343:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2344:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2345:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2346:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2347:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2348:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2349:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2350:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2351:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2352:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2353:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2354:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2355:       jrow++;
2356:      }
2357:     y[16*i]    += sum1;
2358:     y[16*i+1]  += sum2;
2359:     y[16*i+2]  += sum3;
2360:     y[16*i+3]  += sum4;
2361:     y[16*i+4]  += sum5;
2362:     y[16*i+5]  += sum6;
2363:     y[16*i+6]  += sum7;
2364:     y[16*i+7]  += sum8;
2365:     y[16*i+8]  += sum9;
2366:     y[16*i+9]  += sum10;
2367:     y[16*i+10] += sum11;
2368:     y[16*i+11] += sum12;
2369:     y[16*i+12] += sum13;
2370:     y[16*i+13] += sum14;
2371:     y[16*i+14] += sum15;
2372:     y[16*i+15] += sum16;
2373:   }

2375:   PetscLogFlops(32.0*a->nz);
2376:   VecRestoreArrayRead(xx,&x);
2377:   VecRestoreArray(zz,&y);
2378:   return(0);
2379: }

2383: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2384: {
2385:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2386:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2387:   const PetscScalar *x,*v;
2388:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2389:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2390:   PetscErrorCode    ierr;
2391:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2392:   PetscInt          n,i;

2395:   if (yy != zz) {VecCopy(yy,zz);}
2396:   VecGetArrayRead(xx,&x);
2397:   VecGetArray(zz,&y);
2398:   for (i=0; i<m; i++) {
2399:     idx    = a->j + a->i[i] ;
2400:     v      = a->a + a->i[i] ;
2401:     n      = a->i[i+1] - a->i[i];
2402:     alpha1 = x[16*i];
2403:     alpha2 = x[16*i+1];
2404:     alpha3 = x[16*i+2];
2405:     alpha4 = x[16*i+3];
2406:     alpha5 = x[16*i+4];
2407:     alpha6 = x[16*i+5];
2408:     alpha7 = x[16*i+6];
2409:     alpha8 = x[16*i+7];
2410:     alpha9  = x[16*i+8];
2411:     alpha10 = x[16*i+9];
2412:     alpha11 = x[16*i+10];
2413:     alpha12 = x[16*i+11];
2414:     alpha13 = x[16*i+12];
2415:     alpha14 = x[16*i+13];
2416:     alpha15 = x[16*i+14];
2417:     alpha16 = x[16*i+15];
2418:     while (n-->0) {
2419:       y[16*(*idx)]   += alpha1*(*v);
2420:       y[16*(*idx)+1] += alpha2*(*v);
2421:       y[16*(*idx)+2] += alpha3*(*v);
2422:       y[16*(*idx)+3] += alpha4*(*v);
2423:       y[16*(*idx)+4] += alpha5*(*v);
2424:       y[16*(*idx)+5] += alpha6*(*v);
2425:       y[16*(*idx)+6] += alpha7*(*v);
2426:       y[16*(*idx)+7] += alpha8*(*v);
2427:       y[16*(*idx)+8]  += alpha9*(*v);
2428:       y[16*(*idx)+9]  += alpha10*(*v);
2429:       y[16*(*idx)+10] += alpha11*(*v);
2430:       y[16*(*idx)+11] += alpha12*(*v);
2431:       y[16*(*idx)+12] += alpha13*(*v);
2432:       y[16*(*idx)+13] += alpha14*(*v);
2433:       y[16*(*idx)+14] += alpha15*(*v);
2434:       y[16*(*idx)+15] += alpha16*(*v);
2435:       idx++; v++;
2436:     }
2437:   }
2438:   PetscLogFlops(32.0*a->nz);
2439:   VecRestoreArrayRead(xx,&x);
2440:   VecRestoreArray(zz,&y);
2441:   return(0);
2442: }

2444: /*--------------------------------------------------------------------------------------------*/
2447: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2448: {
2449:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2450:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2451:   const PetscScalar *x,*v;
2452:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2453:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2454:   PetscErrorCode    ierr;
2455:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2456:   PetscInt          nonzerorow=0,n,i,jrow,j;

2459:   VecGetArrayRead(xx,&x);
2460:   VecGetArray(yy,&y);
2461:   idx  = a->j;
2462:   v    = a->a;
2463:   ii   = a->i;

2465:   for (i=0; i<m; i++) {
2466:     jrow = ii[i];
2467:     n    = ii[i+1] - jrow;
2468:     sum1  = 0.0;
2469:     sum2  = 0.0;
2470:     sum3  = 0.0;
2471:     sum4  = 0.0;
2472:     sum5  = 0.0;
2473:     sum6  = 0.0;
2474:     sum7  = 0.0;
2475:     sum8  = 0.0;
2476:     sum9  = 0.0;
2477:     sum10 = 0.0;
2478:     sum11 = 0.0;
2479:     sum12 = 0.0;
2480:     sum13 = 0.0;
2481:     sum14 = 0.0;
2482:     sum15 = 0.0;
2483:     sum16 = 0.0;
2484:     sum17 = 0.0;
2485:     sum18 = 0.0;
2486:     nonzerorow += (n>0);
2487:     for (j=0; j<n; j++) {
2488:       sum1  += v[jrow]*x[18*idx[jrow]];
2489:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2490:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2491:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2492:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2493:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2494:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2495:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2496:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2497:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2498:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2499:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2500:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2501:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2502:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2503:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2504:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2505:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2506:       jrow++;
2507:      }
2508:     y[18*i]    = sum1;
2509:     y[18*i+1]  = sum2;
2510:     y[18*i+2]  = sum3;
2511:     y[18*i+3]  = sum4;
2512:     y[18*i+4]  = sum5;
2513:     y[18*i+5]  = sum6;
2514:     y[18*i+6]  = sum7;
2515:     y[18*i+7]  = sum8;
2516:     y[18*i+8]  = sum9;
2517:     y[18*i+9]  = sum10;
2518:     y[18*i+10] = sum11;
2519:     y[18*i+11] = sum12;
2520:     y[18*i+12] = sum13;
2521:     y[18*i+13] = sum14;
2522:     y[18*i+14] = sum15;
2523:     y[18*i+15] = sum16;
2524:     y[18*i+16] = sum17;
2525:     y[18*i+17] = sum18;
2526:   }

2528:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2529:   VecRestoreArrayRead(xx,&x);
2530:   VecRestoreArray(yy,&y);
2531:   return(0);
2532: }

2536: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2537: {
2538:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2539:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2540:   const PetscScalar *x,*v;
2541:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2542:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2543:   PetscErrorCode    ierr;
2544:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2545:   PetscInt          n,i;

2548:   VecSet(yy,0.0);
2549:   VecGetArrayRead(xx,&x);
2550:   VecGetArray(yy,&y);

2552:   for (i=0; i<m; i++) {
2553:     idx    = a->j + a->i[i] ;
2554:     v      = a->a + a->i[i] ;
2555:     n      = a->i[i+1] - a->i[i];
2556:     alpha1  = x[18*i];
2557:     alpha2  = x[18*i+1];
2558:     alpha3  = x[18*i+2];
2559:     alpha4  = x[18*i+3];
2560:     alpha5  = x[18*i+4];
2561:     alpha6  = x[18*i+5];
2562:     alpha7  = x[18*i+6];
2563:     alpha8  = x[18*i+7];
2564:     alpha9  = x[18*i+8];
2565:     alpha10 = x[18*i+9];
2566:     alpha11 = x[18*i+10];
2567:     alpha12 = x[18*i+11];
2568:     alpha13 = x[18*i+12];
2569:     alpha14 = x[18*i+13];
2570:     alpha15 = x[18*i+14];
2571:     alpha16 = x[18*i+15];
2572:     alpha17 = x[18*i+16];
2573:     alpha18 = x[18*i+17];
2574:     while (n-->0) {
2575:       y[18*(*idx)]    += alpha1*(*v);
2576:       y[18*(*idx)+1]  += alpha2*(*v);
2577:       y[18*(*idx)+2]  += alpha3*(*v);
2578:       y[18*(*idx)+3]  += alpha4*(*v);
2579:       y[18*(*idx)+4]  += alpha5*(*v);
2580:       y[18*(*idx)+5]  += alpha6*(*v);
2581:       y[18*(*idx)+6]  += alpha7*(*v);
2582:       y[18*(*idx)+7]  += alpha8*(*v);
2583:       y[18*(*idx)+8]  += alpha9*(*v);
2584:       y[18*(*idx)+9]  += alpha10*(*v);
2585:       y[18*(*idx)+10] += alpha11*(*v);
2586:       y[18*(*idx)+11] += alpha12*(*v);
2587:       y[18*(*idx)+12] += alpha13*(*v);
2588:       y[18*(*idx)+13] += alpha14*(*v);
2589:       y[18*(*idx)+14] += alpha15*(*v);
2590:       y[18*(*idx)+15] += alpha16*(*v);
2591:       y[18*(*idx)+16] += alpha17*(*v);
2592:       y[18*(*idx)+17] += alpha18*(*v);
2593:       idx++; v++;
2594:     }
2595:   }
2596:   PetscLogFlops(36.0*a->nz);
2597:   VecRestoreArrayRead(xx,&x);
2598:   VecRestoreArray(yy,&y);
2599:   return(0);
2600: }

2604: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2605: {
2606:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2607:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2608:   const PetscScalar *x,*v;
2609:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2610:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2611:   PetscErrorCode    ierr;
2612:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2613:   PetscInt          n,i,jrow,j;

2616:   if (yy != zz) {VecCopy(yy,zz);}
2617:   VecGetArrayRead(xx,&x);
2618:   VecGetArray(zz,&y);
2619:   idx  = a->j;
2620:   v    = a->a;
2621:   ii   = a->i;

2623:   for (i=0; i<m; i++) {
2624:     jrow = ii[i];
2625:     n    = ii[i+1] - jrow;
2626:     sum1  = 0.0;
2627:     sum2  = 0.0;
2628:     sum3  = 0.0;
2629:     sum4  = 0.0;
2630:     sum5  = 0.0;
2631:     sum6  = 0.0;
2632:     sum7  = 0.0;
2633:     sum8  = 0.0;
2634:     sum9  = 0.0;
2635:     sum10 = 0.0;
2636:     sum11 = 0.0;
2637:     sum12 = 0.0;
2638:     sum13 = 0.0;
2639:     sum14 = 0.0;
2640:     sum15 = 0.0;
2641:     sum16 = 0.0;
2642:     sum17 = 0.0;
2643:     sum18 = 0.0;
2644:     for (j=0; j<n; j++) {
2645:       sum1  += v[jrow]*x[18*idx[jrow]];
2646:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2647:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2648:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2649:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2650:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2651:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2652:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2653:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2654:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2655:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2656:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2657:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2658:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2659:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2660:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2661:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2662:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2663:       jrow++;
2664:      }
2665:     y[18*i]    += sum1;
2666:     y[18*i+1]  += sum2;
2667:     y[18*i+2]  += sum3;
2668:     y[18*i+3]  += sum4;
2669:     y[18*i+4]  += sum5;
2670:     y[18*i+5]  += sum6;
2671:     y[18*i+6]  += sum7;
2672:     y[18*i+7]  += sum8;
2673:     y[18*i+8]  += sum9;
2674:     y[18*i+9]  += sum10;
2675:     y[18*i+10] += sum11;
2676:     y[18*i+11] += sum12;
2677:     y[18*i+12] += sum13;
2678:     y[18*i+13] += sum14;
2679:     y[18*i+14] += sum15;
2680:     y[18*i+15] += sum16;
2681:     y[18*i+16] += sum17;
2682:     y[18*i+17] += sum18;
2683:   }

2685:   PetscLogFlops(36.0*a->nz);
2686:   VecRestoreArrayRead(xx,&x);
2687:   VecRestoreArray(zz,&y);
2688:   return(0);
2689: }

2693: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2694: {
2695:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2696:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2697:   const PetscScalar *x,*v;
2698:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2699:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2700:   PetscErrorCode    ierr;
2701:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2702:   PetscInt          n,i;

2705:   if (yy != zz) {VecCopy(yy,zz);}
2706:   VecGetArrayRead(xx,&x);
2707:   VecGetArray(zz,&y);
2708:   for (i=0; i<m; i++) {
2709:     idx    = a->j + a->i[i] ;
2710:     v      = a->a + a->i[i] ;
2711:     n      = a->i[i+1] - a->i[i];
2712:     alpha1 = x[18*i];
2713:     alpha2 = x[18*i+1];
2714:     alpha3 = x[18*i+2];
2715:     alpha4 = x[18*i+3];
2716:     alpha5 = x[18*i+4];
2717:     alpha6 = x[18*i+5];
2718:     alpha7 = x[18*i+6];
2719:     alpha8 = x[18*i+7];
2720:     alpha9  = x[18*i+8];
2721:     alpha10 = x[18*i+9];
2722:     alpha11 = x[18*i+10];
2723:     alpha12 = x[18*i+11];
2724:     alpha13 = x[18*i+12];
2725:     alpha14 = x[18*i+13];
2726:     alpha15 = x[18*i+14];
2727:     alpha16 = x[18*i+15];
2728:     alpha17 = x[18*i+16];
2729:     alpha18 = x[18*i+17];
2730:     while (n-->0) {
2731:       y[18*(*idx)]   += alpha1*(*v);
2732:       y[18*(*idx)+1] += alpha2*(*v);
2733:       y[18*(*idx)+2] += alpha3*(*v);
2734:       y[18*(*idx)+3] += alpha4*(*v);
2735:       y[18*(*idx)+4] += alpha5*(*v);
2736:       y[18*(*idx)+5] += alpha6*(*v);
2737:       y[18*(*idx)+6] += alpha7*(*v);
2738:       y[18*(*idx)+7] += alpha8*(*v);
2739:       y[18*(*idx)+8]  += alpha9*(*v);
2740:       y[18*(*idx)+9]  += alpha10*(*v);
2741:       y[18*(*idx)+10] += alpha11*(*v);
2742:       y[18*(*idx)+11] += alpha12*(*v);
2743:       y[18*(*idx)+12] += alpha13*(*v);
2744:       y[18*(*idx)+13] += alpha14*(*v);
2745:       y[18*(*idx)+14] += alpha15*(*v);
2746:       y[18*(*idx)+15] += alpha16*(*v);
2747:       y[18*(*idx)+16] += alpha17*(*v);
2748:       y[18*(*idx)+17] += alpha18*(*v);
2749:       idx++; v++;
2750:     }
2751:   }
2752:   PetscLogFlops(36.0*a->nz);
2753:   VecRestoreArrayRead(xx,&x);
2754:   VecRestoreArray(zz,&y);
2755:   return(0);
2756: }

2760: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2761: {
2762:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2763:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2764:   const PetscScalar *x,*v;
2765:   PetscScalar       *y,*sums;
2766:   PetscErrorCode    ierr;
2767:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2768:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2771:   VecGetArrayRead(xx,&x);
2772:   VecSet(yy,0.0);
2773:   VecGetArray(yy,&y);
2774:   idx  = a->j;
2775:   v    = a->a;
2776:   ii   = a->i;

2778:   for (i=0; i<m; i++) {
2779:     jrow = ii[i];
2780:     n    = ii[i+1] - jrow;
2781:     sums = y + dof*i;
2782:     for (j=0; j<n; j++) {
2783:       for (k=0; k<dof; k++) {
2784:         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2785:       }
2786:       jrow++;
2787:     }
2788:   }

2790:   PetscLogFlops(2.0*dof*a->nz);
2791:   VecRestoreArrayRead(xx,&x);
2792:   VecRestoreArray(yy,&y);
2793:   return(0);
2794: }

2798: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2799: {
2800:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2801:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2802:   const PetscScalar *x,*v;
2803:   PetscScalar       *y,*sums;
2804:   PetscErrorCode    ierr;
2805:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2806:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2809:   if (yy != zz) {VecCopy(yy,zz);}
2810:   VecGetArrayRead(xx,&x);
2811:   VecGetArray(zz,&y);
2812:   idx  = a->j;
2813:   v    = a->a;
2814:   ii   = a->i;

2816:   for (i=0; i<m; i++) {
2817:     jrow = ii[i];
2818:     n    = ii[i+1] - jrow;
2819:     sums = y + dof*i;
2820:     for (j=0; j<n; j++) {
2821:       for (k=0; k<dof; k++) {
2822:         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2823:       }
2824:       jrow++;
2825:     }
2826:   }

2828:   PetscLogFlops(2.0*dof*a->nz);
2829:   VecRestoreArrayRead(xx,&x);
2830:   VecRestoreArray(zz,&y);
2831:   return(0);
2832: }

2836: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2837: {
2838:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2839:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2840:   const PetscScalar *x,*v,*alpha;
2841:   PetscScalar       *y;
2842:   PetscErrorCode    ierr;
2843:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2844:   PetscInt          n,i,k;

2847:   VecGetArrayRead(xx,&x);
2848:   VecSet(yy,0.0);
2849:   VecGetArray(yy,&y);
2850:   for (i=0; i<m; i++) {
2851:     idx    = a->j + a->i[i] ;
2852:     v      = a->a + a->i[i] ;
2853:     n      = a->i[i+1] - a->i[i];
2854:     alpha  = x + dof*i;
2855:     while (n-->0) {
2856:       for (k=0; k<dof; k++) {
2857:         y[dof*(*idx)+k] += alpha[k]*(*v);
2858:       }
2859:       idx++; v++;
2860:     }
2861:   }
2862:   PetscLogFlops(2.0*dof*a->nz);
2863:   VecRestoreArrayRead(xx,&x);
2864:   VecRestoreArray(yy,&y);
2865:   return(0);
2866: }

2870: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2871: {
2872:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2873:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2874:   const PetscScalar *x,*v,*alpha;
2875:   PetscScalar       *y;
2876:   PetscErrorCode    ierr;
2877:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2878:   PetscInt          n,i,k;

2881:   if (yy != zz) {VecCopy(yy,zz);}
2882:   VecGetArrayRead(xx,&x);
2883:   VecGetArray(zz,&y);
2884:   for (i=0; i<m; i++) {
2885:     idx    = a->j + a->i[i] ;
2886:     v      = a->a + a->i[i] ;
2887:     n      = a->i[i+1] - a->i[i];
2888:     alpha  = x + dof*i;
2889:     while (n-->0) {
2890:       for (k=0; k<dof; k++) {
2891:         y[dof*(*idx)+k] += alpha[k]*(*v);
2892:       }
2893:       idx++; v++;
2894:     }
2895:   }
2896:   PetscLogFlops(2.0*dof*a->nz);
2897:   VecRestoreArrayRead(xx,&x);
2898:   VecRestoreArray(zz,&y);
2899:   return(0);
2900: }

2902: /*===================================================================================*/
2905: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2906: {
2907:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2911:   /* start the scatter */
2912:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2913:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2914:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2915:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2916:   return(0);
2917: }

2921: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2922: {
2923:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2927:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2928:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2929:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2930:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2931:   return(0);
2932: }

2936: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2937: {
2938:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2942:   /* start the scatter */
2943:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2944:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2945:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2946:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2947:   return(0);
2948: }

2952: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2953: {
2954:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2958:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2959:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2960:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2961:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2962:   return(0);
2963: }

2965: /* ----------------------------------------------------------------*/
2968: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2969: {
2970:   /* This routine requires testing -- but it's getting better. */
2971:   PetscErrorCode     ierr;
2972:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2973:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2974:   Mat                P=pp->AIJ;
2975:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2976:   PetscInt           *pti,*ptj,*ptJ;
2977:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2978:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2979:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2980:   MatScalar          *ca;
2981:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2984:   /* Start timer */
2985:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

2987:   /* Get ij structure of P^T */
2988:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2990:   cn = pn*ppdof;
2991:   /* Allocate ci array, arrays for fill computation and */
2992:   /* free space for accumulating nonzero column info */
2993:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2994:   ci[0] = 0;

2996:   /* Work arrays for rows of P^T*A */
2997:   PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);
2998:   PetscMemzero(ptadenserow,an*sizeof(PetscInt));
2999:   PetscMemzero(denserow,cn*sizeof(PetscInt));

3001:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3002:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3003:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3004:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3005:   current_space = free_space;

3007:   /* Determine symbolic info for each row of C: */
3008:   for (i=0;i<pn;i++) {
3009:     ptnzi  = pti[i+1] - pti[i];
3010:     ptJ    = ptj + pti[i];
3011:     for (dof=0;dof<ppdof;dof++) {
3012:       ptanzi = 0;
3013:       /* Determine symbolic row of PtA: */
3014:       for (j=0;j<ptnzi;j++) {
3015:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3016:         arow = ptJ[j]*ppdof + dof;
3017:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3018:         anzj = ai[arow+1] - ai[arow];
3019:         ajj  = aj + ai[arow];
3020:         for (k=0;k<anzj;k++) {
3021:           if (!ptadenserow[ajj[k]]) {
3022:             ptadenserow[ajj[k]]    = -1;
3023:             ptasparserow[ptanzi++] = ajj[k];
3024:           }
3025:         }
3026:       }
3027:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3028:       ptaj = ptasparserow;
3029:       cnzi   = 0;
3030:       for (j=0;j<ptanzi;j++) {
3031:         /* Get offset within block of P */
3032:         pshift = *ptaj%ppdof;
3033:         /* Get block row of P */
3034:         prow = (*ptaj++)/ppdof; /* integer division */
3035:         /* P has same number of nonzeros per row as the compressed form */
3036:         pnzj = pi[prow+1] - pi[prow];
3037:         pjj  = pj + pi[prow];
3038:         for (k=0;k<pnzj;k++) {
3039:           /* Locations in C are shifted by the offset within the block */
3040:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3041:           if (!denserow[pjj[k]*ppdof+pshift]) {
3042:             denserow[pjj[k]*ppdof+pshift] = -1;
3043:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3044:           }
3045:         }
3046:       }

3048:       /* sort sparserow */
3049:       PetscSortInt(cnzi,sparserow);
3050: 
3051:       /* If free space is not available, make more free space */
3052:       /* Double the amount of total space in the list */
3053:       if (current_space->local_remaining<cnzi) {
3054:         PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
3055:       }

3057:       /* Copy data into free space, and zero out denserows */
3058:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3059:       current_space->array           += cnzi;
3060:       current_space->local_used      += cnzi;
3061:       current_space->local_remaining -= cnzi;

3063:       for (j=0;j<ptanzi;j++) {
3064:         ptadenserow[ptasparserow[j]] = 0;
3065:       }
3066:       for (j=0;j<cnzi;j++) {
3067:         denserow[sparserow[j]] = 0;
3068:       }
3069:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3070:       /*        For now, we will recompute what is needed. */
3071:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3072:     }
3073:   }
3074:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3075:   /* Allocate space for cj, initialize cj, and */
3076:   /* destroy list of free space and other temporary array(s) */
3077:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3078:   PetscFreeSpaceContiguous(&free_space,cj);
3079:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3080: 
3081:   /* Allocate space for ca */
3082:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3083:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
3084: 
3085:   /* put together the new matrix */
3086:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

3088:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3089:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3090:   c          = (Mat_SeqAIJ *)((*C)->data);
3091:   c->free_a  = PETSC_TRUE;
3092:   c->free_ij = PETSC_TRUE;
3093:   c->nonew   = 0;

3095:   /* Clean up. */
3096:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

3098:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
3099:   return(0);
3100: }

3104: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3105: {
3106:   /* This routine requires testing -- first draft only */
3107:   PetscErrorCode  ierr;
3108:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3109:   Mat             P=pp->AIJ;
3110:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3111:   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3112:   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3113:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3114:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3115:   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3116:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3117:   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3118:   MatScalar       *ca=c->a,*caj,*apa;

3121:   /* Allocate temporary array for storage of one row of A*P */
3122:   PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3123:   PetscMemzero(apa,cn*sizeof(MatScalar));
3124:   PetscMemzero(apj,cn*sizeof(PetscInt));
3125:   PetscMemzero(apjdense,cn*sizeof(PetscInt));

3127:   /* Clear old values in C */
3128:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

3130:   for (i=0;i<am;i++) {
3131:     /* Form sparse row of A*P */
3132:     anzi  = ai[i+1] - ai[i];
3133:     apnzj = 0;
3134:     for (j=0;j<anzi;j++) {
3135:       /* Get offset within block of P */
3136:       pshift = *aj%ppdof;
3137:       /* Get block row of P */
3138:       prow   = *aj++/ppdof; /* integer division */
3139:       pnzj = pi[prow+1] - pi[prow];
3140:       pjj  = pj + pi[prow];
3141:       paj  = pa + pi[prow];
3142:       for (k=0;k<pnzj;k++) {
3143:         poffset = pjj[k]*ppdof+pshift;
3144:         if (!apjdense[poffset]) {
3145:           apjdense[poffset] = -1;
3146:           apj[apnzj++]      = poffset;
3147:         }
3148:         apa[poffset] += (*aa)*paj[k];
3149:       }
3150:       PetscLogFlops(2.0*pnzj);
3151:       aa++;
3152:     }

3154:     /* Sort the j index array for quick sparse axpy. */
3155:     /* Note: a array does not need sorting as it is in dense storage locations. */
3156:     PetscSortInt(apnzj,apj);

3158:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3159:     prow    = i/ppdof; /* integer division */
3160:     pshift  = i%ppdof;
3161:     poffset = pi[prow];
3162:     pnzi = pi[prow+1] - poffset;
3163:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3164:     pJ   = pj+poffset;
3165:     pA   = pa+poffset;
3166:     for (j=0;j<pnzi;j++) {
3167:       crow   = (*pJ)*ppdof+pshift;
3168:       cjj    = cj + ci[crow];
3169:       caj    = ca + ci[crow];
3170:       pJ++;
3171:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3172:       for (k=0,nextap=0;nextap<apnzj;k++) {
3173:         if (cjj[k]==apj[nextap]) {
3174:           caj[k] += (*pA)*apa[apj[nextap++]];
3175:         }
3176:       }
3177:       PetscLogFlops(2.0*apnzj);
3178:       pA++;
3179:     }

3181:     /* Zero the current row info for A*P */
3182:     for (j=0;j<apnzj;j++) {
3183:       apa[apj[j]]      = 0.;
3184:       apjdense[apj[j]] = 0;
3185:     }
3186:   }

3188:   /* Assemble the final matrix and clean up */
3189:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3190:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3191:   PetscFree3(apa,apj,apjdense);
3192:   return(0);
3193: }

3197: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3198: {
3199:   PetscErrorCode    ierr;

3202:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3203:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3204:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3205:   return(0);
3206: }

3210: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3211: {
3213:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3214:   return(0);
3215: }

3217: EXTERN_C_BEGIN
3220: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3221: {
3222:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3223:   Mat               a = b->AIJ,B;
3224:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3225:   PetscErrorCode    ierr;
3226:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3227:   PetscInt          *cols;
3228:   PetscScalar       *vals;

3231:   MatGetSize(a,&m,&n);
3232:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3233:   for (i=0; i<m; i++) {
3234:     nmax = PetscMax(nmax,aij->ilen[i]);
3235:     for (j=0; j<dof; j++) {
3236:       ilen[dof*i+j] = aij->ilen[i];
3237:     }
3238:   }
3239:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3240:   PetscFree(ilen);
3241:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
3242:   ii   = 0;
3243:   for (i=0; i<m; i++) {
3244:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3245:     for (j=0; j<dof; j++) {
3246:       for (k=0; k<ncols; k++) {
3247:         icols[k] = dof*cols[k]+j;
3248:       }
3249:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3250:       ii++;
3251:     }
3252:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3253:   }
3254:   PetscFree(icols);
3255:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3256:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3258:   if (reuse == MAT_REUSE_MATRIX) {
3259:     MatHeaderReplace(A,B);
3260:   } else {
3261:     *newmat = B;
3262:   }
3263:   return(0);
3264: }
3265: EXTERN_C_END

3267: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3269: EXTERN_C_BEGIN
3272: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3273: {
3274:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3275:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3276:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3277:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3278:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3279:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3280:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3281:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3282:   PetscInt          rstart,cstart,*garray,ii,k;
3283:   PetscErrorCode    ierr;
3284:   PetscScalar       *vals,*ovals;

3287:   PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3288:   for (i=0; i<A->rmap->n/dof; i++) {
3289:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3290:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3291:     for (j=0; j<dof; j++) {
3292:       dnz[dof*i+j] = AIJ->ilen[i];
3293:       onz[dof*i+j] = OAIJ->ilen[i];
3294:     }
3295:   }
3296:   MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3297:   PetscFree2(dnz,onz);

3299:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3300:   rstart = dof*maij->A->rmap->rstart;
3301:   cstart = dof*maij->A->cmap->rstart;
3302:   garray = mpiaij->garray;

3304:   ii = rstart;
3305:   for (i=0; i<A->rmap->n/dof; i++) {
3306:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3307:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3308:     for (j=0; j<dof; j++) {
3309:       for (k=0; k<ncols; k++) {
3310:         icols[k] = cstart + dof*cols[k]+j;
3311:       }
3312:       for (k=0; k<oncols; k++) {
3313:         oicols[k] = dof*garray[ocols[k]]+j;
3314:       }
3315:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3316:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3317:       ii++;
3318:     }
3319:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3320:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3321:   }
3322:   PetscFree2(icols,oicols);

3324:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3325:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3327:   if (reuse == MAT_REUSE_MATRIX) {
3328:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3329:     ((PetscObject)A)->refct = 1;
3330:     MatHeaderReplace(A,B);
3331:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3332:   } else {
3333:     *newmat = B;
3334:   }
3335:   return(0);
3336: }
3337: EXTERN_C_END

3341: PetscErrorCode  MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3342: {
3344:   Mat            A;

3347:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3348:   MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3349:   MatDestroy(&A);
3350:   return(0);
3351: }

3353: /* ---------------------------------------------------------------------------------- */
3356: /*@C
3357:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3358:   operations for multicomponent problems.  It interpolates each component the same
3359:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3360:   and MATMPIAIJ for distributed matrices.

3362:   Collective

3364:   Input Parameters:
3365: + A - the AIJ matrix describing the action on blocks
3366: - dof - the block size (number of components per node)

3368:   Output Parameter:
3369: . maij - the new MAIJ matrix

3371:   Operations provided:
3372: + MatMult
3373: . MatMultTranspose
3374: . MatMultAdd
3375: . MatMultTransposeAdd
3376: - MatView

3378:   Level: advanced

3380: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3381: @*/
3382: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3383: {
3385:   PetscMPIInt    size;
3386:   PetscInt       n;
3387:   Mat            B;

3390:   PetscObjectReference((PetscObject)A);

3392:   if (dof == 1) {
3393:     *maij = A;
3394:   } else {
3395:     MatCreate(((PetscObject)A)->comm,&B);
3396:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3397:     PetscLayoutSetBlockSize(B->rmap,dof);
3398:     PetscLayoutSetBlockSize(B->cmap,dof);
3399:     PetscLayoutSetUp(B->rmap);
3400:     PetscLayoutSetUp(B->cmap);
3401:     B->assembled    = PETSC_TRUE;

3403:     MPI_Comm_size(((PetscObject)A)->comm,&size);
3404:     if (size == 1) {
3405:       Mat_SeqMAIJ    *b;

3407:       MatSetType(B,MATSEQMAIJ);
3408:       B->ops->setup   = PETSC_NULL;
3409:       B->ops->destroy = MatDestroy_SeqMAIJ;
3410:       B->ops->view    = MatView_SeqMAIJ;
3411:       b      = (Mat_SeqMAIJ*)B->data;
3412:       b->dof = dof;
3413:       b->AIJ = A;
3414:       if (dof == 2) {
3415:         B->ops->mult             = MatMult_SeqMAIJ_2;
3416:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3417:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3418:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3419:       } else if (dof == 3) {
3420:         B->ops->mult             = MatMult_SeqMAIJ_3;
3421:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3422:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3423:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3424:       } else if (dof == 4) {
3425:         B->ops->mult             = MatMult_SeqMAIJ_4;
3426:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3427:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3428:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3429:       } else if (dof == 5) {
3430:         B->ops->mult             = MatMult_SeqMAIJ_5;
3431:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3432:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3433:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3434:       } else if (dof == 6) {
3435:         B->ops->mult             = MatMult_SeqMAIJ_6;
3436:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3437:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3438:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3439:       } else if (dof == 7) {
3440:         B->ops->mult             = MatMult_SeqMAIJ_7;
3441:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3442:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3443:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3444:       } else if (dof == 8) {
3445:         B->ops->mult             = MatMult_SeqMAIJ_8;
3446:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3447:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3448:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3449:       } else if (dof == 9) {
3450:         B->ops->mult             = MatMult_SeqMAIJ_9;
3451:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3452:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3453:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3454:       } else if (dof == 10) {
3455:         B->ops->mult             = MatMult_SeqMAIJ_10;
3456:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3457:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3458:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3459:       } else if (dof == 11) {
3460:         B->ops->mult             = MatMult_SeqMAIJ_11;
3461:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3462:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3463:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3464:       } else if (dof == 16) {
3465:         B->ops->mult             = MatMult_SeqMAIJ_16;
3466:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3467:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3468:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3469:       } else if (dof == 18) {
3470:         B->ops->mult             = MatMult_SeqMAIJ_18;
3471:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3472:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3473:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3474:       } else {
3475:         B->ops->mult             = MatMult_SeqMAIJ_N;
3476:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3477:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3478:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3479:       }
3480:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3481:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3482:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3483:     } else {
3484:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3485:       Mat_MPIMAIJ *b;
3486:       IS          from,to;
3487:       Vec         gvec;

3489:       MatSetType(B,MATMPIMAIJ);
3490:       B->ops->setup   = PETSC_NULL;
3491:       B->ops->destroy = MatDestroy_MPIMAIJ;
3492:       B->ops->view    = MatView_MPIMAIJ;
3493:       b      = (Mat_MPIMAIJ*)B->data;
3494:       b->dof = dof;
3495:       b->A   = A;
3496:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3497:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3499:       VecGetSize(mpiaij->lvec,&n);
3500:       VecCreate(PETSC_COMM_SELF,&b->w);
3501:       VecSetSizes(b->w,n*dof,n*dof);
3502:       VecSetBlockSize(b->w,dof);
3503:       VecSetType(b->w,VECSEQ);

3505:       /* create two temporary Index sets for build scatter gather */
3506:       ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3507:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3509:       /* create temporary global vector to generate scatter context */
3510:       VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);

3512:       /* generate the scatter context */
3513:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3515:       ISDestroy(&from);
3516:       ISDestroy(&to);
3517:       VecDestroy(&gvec);

3519:       B->ops->mult                = MatMult_MPIMAIJ_dof;
3520:       B->ops->multtranspose       = MatMultTranspose_MPIMAIJ_dof;
3521:       B->ops->multadd             = MatMultAdd_MPIMAIJ_dof;
3522:       B->ops->multtransposeadd    = MatMultTransposeAdd_MPIMAIJ_dof;
3523:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3524:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3525:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3526:     }
3527:     B->ops->getsubmatrix        = MatGetSubMatrix_MAIJ;
3528:     MatSetUp(B);
3529:     *maij = B;
3530:     MatView_Private(B);
3531:   }
3532:   return(0);
3533: }