Actual source code: maij.c

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include <../src/mat/impls/maij/maij.h>
 20:  #include <../src/mat/utils/freespace.h>

 22: /*@
 23:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 25:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 27:    Input Parameter:
 28: .  A - the MAIJ matrix

 30:    Output Parameter:
 31: .  B - the AIJ matrix

 33:    Level: advanced

 35:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 37: .seealso: MatCreateMAIJ()
 38: @*/
 39: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 40: {
 42:   PetscBool      ismpimaij,isseqmaij;

 45:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 46:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 47:   if (ismpimaij) {
 48:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 50:     *B = b->A;
 51:   } else if (isseqmaij) {
 52:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 54:     *B = b->AIJ;
 55:   } else {
 56:     *B = A;
 57:   }
 58:   return(0);
 59: }

 61: /*@
 62:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 64:    Logically Collective

 66:    Input Parameter:
 67: +  A - the MAIJ matrix
 68: -  dof - the block size for the new matrix

 70:    Output Parameter:
 71: .  B - the new MAIJ matrix

 73:    Level: advanced

 75: .seealso: MatCreateMAIJ()
 76: @*/
 77: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 78: {
 80:   Mat            Aij = NULL;

 84:   MatMAIJGetAIJ(A,&Aij);
 85:   MatCreateMAIJ(Aij,dof,B);
 86:   return(0);
 87: }

 89: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 90: {
 92:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 95:   MatDestroy(&b->AIJ);
 96:   PetscFree(A->data);
 97:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
 98:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
 99:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);
100:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);
101:   return(0);
102: }

104: PetscErrorCode MatSetUp_MAIJ(Mat A)
105: {
107:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
108:   return(0);
109: }

111: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
112: {
114:   Mat            B;

117:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
118:   MatView(B,viewer);
119:   MatDestroy(&B);
120:   return(0);
121: }

123: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
124: {
126:   Mat            B;

129:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
130:   MatView(B,viewer);
131:   MatDestroy(&B);
132:   return(0);
133: }

135: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
136: {
138:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

141:   MatDestroy(&b->AIJ);
142:   MatDestroy(&b->OAIJ);
143:   MatDestroy(&b->A);
144:   VecScatterDestroy(&b->ctx);
145:   VecDestroy(&b->w);
146:   PetscFree(A->data);
147:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
148:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
149:   PetscObjectChangeTypeName((PetscObject)A,0);
150:   return(0);
151: }

153: /*MC
154:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
155:   multicomponent problems, interpolating or restricting each component the same way independently.
156:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

158:   Operations provided:
159: . MatMult
160: . MatMultTranspose
161: . MatMultAdd
162: . MatMultTransposeAdd

164:   Level: advanced

166: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
167: M*/

169: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
170: {
172:   Mat_MPIMAIJ    *b;
173:   PetscMPIInt    size;

176:   PetscNewLog(A,&b);
177:   A->data  = (void*)b;

179:   PetscMemzero(A->ops,sizeof(struct _MatOps));

181:   A->ops->setup = MatSetUp_MAIJ;

183:   b->AIJ  = 0;
184:   b->dof  = 0;
185:   b->OAIJ = 0;
186:   b->ctx  = 0;
187:   b->w    = 0;
188:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
189:   if (size == 1) {
190:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
191:   } else {
192:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
193:   }
194:   A->preallocated  = PETSC_TRUE;
195:   A->assembled     = PETSC_TRUE;
196:   return(0);
197: }

199: /* --------------------------------------------------------------------------------------*/
200: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
201: {
202:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
203:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
204:   const PetscScalar *x,*v;
205:   PetscScalar       *y, sum1, sum2;
206:   PetscErrorCode    ierr;
207:   PetscInt          nonzerorow=0,n,i,jrow,j;
208:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

211:   VecGetArrayRead(xx,&x);
212:   VecGetArray(yy,&y);
213:   idx  = a->j;
214:   v    = a->a;
215:   ii   = a->i;

217:   for (i=0; i<m; i++) {
218:     jrow  = ii[i];
219:     n     = ii[i+1] - jrow;
220:     sum1  = 0.0;
221:     sum2  = 0.0;

223:     nonzerorow += (n>0);
224:     for (j=0; j<n; j++) {
225:       sum1 += v[jrow]*x[2*idx[jrow]];
226:       sum2 += v[jrow]*x[2*idx[jrow]+1];
227:       jrow++;
228:     }
229:     y[2*i]   = sum1;
230:     y[2*i+1] = sum2;
231:   }

233:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
234:   VecRestoreArrayRead(xx,&x);
235:   VecRestoreArray(yy,&y);
236:   return(0);
237: }

239: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
240: {
241:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
242:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
243:   const PetscScalar *x,*v;
244:   PetscScalar       *y,alpha1,alpha2;
245:   PetscErrorCode    ierr;
246:   PetscInt          n,i;
247:   const PetscInt    m = b->AIJ->rmap->n,*idx;

250:   VecSet(yy,0.0);
251:   VecGetArrayRead(xx,&x);
252:   VecGetArray(yy,&y);

254:   for (i=0; i<m; i++) {
255:     idx    = a->j + a->i[i];
256:     v      = a->a + a->i[i];
257:     n      = a->i[i+1] - a->i[i];
258:     alpha1 = x[2*i];
259:     alpha2 = x[2*i+1];
260:     while (n-->0) {
261:       y[2*(*idx)]   += alpha1*(*v);
262:       y[2*(*idx)+1] += alpha2*(*v);
263:       idx++; v++;
264:     }
265:   }
266:   PetscLogFlops(4.0*a->nz);
267:   VecRestoreArrayRead(xx,&x);
268:   VecRestoreArray(yy,&y);
269:   return(0);
270: }

272: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
273: {
274:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
275:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
276:   const PetscScalar *x,*v;
277:   PetscScalar       *y,sum1, sum2;
278:   PetscErrorCode    ierr;
279:   PetscInt          n,i,jrow,j;
280:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

283:   if (yy != zz) {VecCopy(yy,zz);}
284:   VecGetArrayRead(xx,&x);
285:   VecGetArray(zz,&y);
286:   idx  = a->j;
287:   v    = a->a;
288:   ii   = a->i;

290:   for (i=0; i<m; i++) {
291:     jrow = ii[i];
292:     n    = ii[i+1] - jrow;
293:     sum1 = 0.0;
294:     sum2 = 0.0;
295:     for (j=0; j<n; j++) {
296:       sum1 += v[jrow]*x[2*idx[jrow]];
297:       sum2 += v[jrow]*x[2*idx[jrow]+1];
298:       jrow++;
299:     }
300:     y[2*i]   += sum1;
301:     y[2*i+1] += sum2;
302:   }

304:   PetscLogFlops(4.0*a->nz);
305:   VecRestoreArrayRead(xx,&x);
306:   VecRestoreArray(zz,&y);
307:   return(0);
308: }
309: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
310: {
311:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
312:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
313:   const PetscScalar *x,*v;
314:   PetscScalar       *y,alpha1,alpha2;
315:   PetscErrorCode    ierr;
316:   PetscInt          n,i;
317:   const PetscInt    m = b->AIJ->rmap->n,*idx;

320:   if (yy != zz) {VecCopy(yy,zz);}
321:   VecGetArrayRead(xx,&x);
322:   VecGetArray(zz,&y);

324:   for (i=0; i<m; i++) {
325:     idx    = a->j + a->i[i];
326:     v      = a->a + a->i[i];
327:     n      = a->i[i+1] - a->i[i];
328:     alpha1 = x[2*i];
329:     alpha2 = x[2*i+1];
330:     while (n-->0) {
331:       y[2*(*idx)]   += alpha1*(*v);
332:       y[2*(*idx)+1] += alpha2*(*v);
333:       idx++; v++;
334:     }
335:   }
336:   PetscLogFlops(4.0*a->nz);
337:   VecRestoreArrayRead(xx,&x);
338:   VecRestoreArray(zz,&y);
339:   return(0);
340: }
341: /* --------------------------------------------------------------------------------------*/
342: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
343: {
344:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
345:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
346:   const PetscScalar *x,*v;
347:   PetscScalar       *y,sum1, sum2, sum3;
348:   PetscErrorCode    ierr;
349:   PetscInt          nonzerorow=0,n,i,jrow,j;
350:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

353:   VecGetArrayRead(xx,&x);
354:   VecGetArray(yy,&y);
355:   idx  = a->j;
356:   v    = a->a;
357:   ii   = a->i;

359:   for (i=0; i<m; i++) {
360:     jrow  = ii[i];
361:     n     = ii[i+1] - jrow;
362:     sum1  = 0.0;
363:     sum2  = 0.0;
364:     sum3  = 0.0;

366:     nonzerorow += (n>0);
367:     for (j=0; j<n; j++) {
368:       sum1 += v[jrow]*x[3*idx[jrow]];
369:       sum2 += v[jrow]*x[3*idx[jrow]+1];
370:       sum3 += v[jrow]*x[3*idx[jrow]+2];
371:       jrow++;
372:      }
373:     y[3*i]   = sum1;
374:     y[3*i+1] = sum2;
375:     y[3*i+2] = sum3;
376:   }

378:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
379:   VecRestoreArrayRead(xx,&x);
380:   VecRestoreArray(yy,&y);
381:   return(0);
382: }

384: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
385: {
386:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
387:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
388:   const PetscScalar *x,*v;
389:   PetscScalar       *y,alpha1,alpha2,alpha3;
390:   PetscErrorCode    ierr;
391:   PetscInt          n,i;
392:   const PetscInt    m = b->AIJ->rmap->n,*idx;

395:   VecSet(yy,0.0);
396:   VecGetArrayRead(xx,&x);
397:   VecGetArray(yy,&y);

399:   for (i=0; i<m; i++) {
400:     idx    = a->j + a->i[i];
401:     v      = a->a + a->i[i];
402:     n      = a->i[i+1] - a->i[i];
403:     alpha1 = x[3*i];
404:     alpha2 = x[3*i+1];
405:     alpha3 = x[3*i+2];
406:     while (n-->0) {
407:       y[3*(*idx)]   += alpha1*(*v);
408:       y[3*(*idx)+1] += alpha2*(*v);
409:       y[3*(*idx)+2] += alpha3*(*v);
410:       idx++; v++;
411:     }
412:   }
413:   PetscLogFlops(6.0*a->nz);
414:   VecRestoreArrayRead(xx,&x);
415:   VecRestoreArray(yy,&y);
416:   return(0);
417: }

419: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
420: {
421:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
422:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
423:   const PetscScalar *x,*v;
424:   PetscScalar       *y,sum1, sum2, sum3;
425:   PetscErrorCode    ierr;
426:   PetscInt          n,i,jrow,j;
427:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

430:   if (yy != zz) {VecCopy(yy,zz);}
431:   VecGetArrayRead(xx,&x);
432:   VecGetArray(zz,&y);
433:   idx  = a->j;
434:   v    = a->a;
435:   ii   = a->i;

437:   for (i=0; i<m; i++) {
438:     jrow = ii[i];
439:     n    = ii[i+1] - jrow;
440:     sum1 = 0.0;
441:     sum2 = 0.0;
442:     sum3 = 0.0;
443:     for (j=0; j<n; j++) {
444:       sum1 += v[jrow]*x[3*idx[jrow]];
445:       sum2 += v[jrow]*x[3*idx[jrow]+1];
446:       sum3 += v[jrow]*x[3*idx[jrow]+2];
447:       jrow++;
448:     }
449:     y[3*i]   += sum1;
450:     y[3*i+1] += sum2;
451:     y[3*i+2] += sum3;
452:   }

454:   PetscLogFlops(6.0*a->nz);
455:   VecRestoreArrayRead(xx,&x);
456:   VecRestoreArray(zz,&y);
457:   return(0);
458: }
459: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
460: {
461:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
462:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
463:   const PetscScalar *x,*v;
464:   PetscScalar       *y,alpha1,alpha2,alpha3;
465:   PetscErrorCode    ierr;
466:   PetscInt          n,i;
467:   const PetscInt    m = b->AIJ->rmap->n,*idx;

470:   if (yy != zz) {VecCopy(yy,zz);}
471:   VecGetArrayRead(xx,&x);
472:   VecGetArray(zz,&y);
473:   for (i=0; i<m; i++) {
474:     idx    = a->j + a->i[i];
475:     v      = a->a + a->i[i];
476:     n      = a->i[i+1] - a->i[i];
477:     alpha1 = x[3*i];
478:     alpha2 = x[3*i+1];
479:     alpha3 = x[3*i+2];
480:     while (n-->0) {
481:       y[3*(*idx)]   += alpha1*(*v);
482:       y[3*(*idx)+1] += alpha2*(*v);
483:       y[3*(*idx)+2] += alpha3*(*v);
484:       idx++; v++;
485:     }
486:   }
487:   PetscLogFlops(6.0*a->nz);
488:   VecRestoreArrayRead(xx,&x);
489:   VecRestoreArray(zz,&y);
490:   return(0);
491: }

493: /* ------------------------------------------------------------------------------*/
494: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
495: {
496:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
497:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
498:   const PetscScalar *x,*v;
499:   PetscScalar       *y,sum1, sum2, sum3, sum4;
500:   PetscErrorCode    ierr;
501:   PetscInt          nonzerorow=0,n,i,jrow,j;
502:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

505:   VecGetArrayRead(xx,&x);
506:   VecGetArray(yy,&y);
507:   idx  = a->j;
508:   v    = a->a;
509:   ii   = a->i;

511:   for (i=0; i<m; i++) {
512:     jrow        = ii[i];
513:     n           = ii[i+1] - jrow;
514:     sum1        = 0.0;
515:     sum2        = 0.0;
516:     sum3        = 0.0;
517:     sum4        = 0.0;
518:     nonzerorow += (n>0);
519:     for (j=0; j<n; j++) {
520:       sum1 += v[jrow]*x[4*idx[jrow]];
521:       sum2 += v[jrow]*x[4*idx[jrow]+1];
522:       sum3 += v[jrow]*x[4*idx[jrow]+2];
523:       sum4 += v[jrow]*x[4*idx[jrow]+3];
524:       jrow++;
525:     }
526:     y[4*i]   = sum1;
527:     y[4*i+1] = sum2;
528:     y[4*i+2] = sum3;
529:     y[4*i+3] = sum4;
530:   }

532:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
533:   VecRestoreArrayRead(xx,&x);
534:   VecRestoreArray(yy,&y);
535:   return(0);
536: }

538: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
539: {
540:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
541:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542:   const PetscScalar *x,*v;
543:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
544:   PetscErrorCode    ierr;
545:   PetscInt          n,i;
546:   const PetscInt    m = b->AIJ->rmap->n,*idx;

549:   VecSet(yy,0.0);
550:   VecGetArrayRead(xx,&x);
551:   VecGetArray(yy,&y);
552:   for (i=0; i<m; i++) {
553:     idx    = a->j + a->i[i];
554:     v      = a->a + a->i[i];
555:     n      = a->i[i+1] - a->i[i];
556:     alpha1 = x[4*i];
557:     alpha2 = x[4*i+1];
558:     alpha3 = x[4*i+2];
559:     alpha4 = x[4*i+3];
560:     while (n-->0) {
561:       y[4*(*idx)]   += alpha1*(*v);
562:       y[4*(*idx)+1] += alpha2*(*v);
563:       y[4*(*idx)+2] += alpha3*(*v);
564:       y[4*(*idx)+3] += alpha4*(*v);
565:       idx++; v++;
566:     }
567:   }
568:   PetscLogFlops(8.0*a->nz);
569:   VecRestoreArrayRead(xx,&x);
570:   VecRestoreArray(yy,&y);
571:   return(0);
572: }

574: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
575: {
576:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
577:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
578:   const PetscScalar *x,*v;
579:   PetscScalar       *y,sum1, sum2, sum3, sum4;
580:   PetscErrorCode    ierr;
581:   PetscInt          n,i,jrow,j;
582:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

585:   if (yy != zz) {VecCopy(yy,zz);}
586:   VecGetArrayRead(xx,&x);
587:   VecGetArray(zz,&y);
588:   idx  = a->j;
589:   v    = a->a;
590:   ii   = a->i;

592:   for (i=0; i<m; i++) {
593:     jrow = ii[i];
594:     n    = ii[i+1] - jrow;
595:     sum1 = 0.0;
596:     sum2 = 0.0;
597:     sum3 = 0.0;
598:     sum4 = 0.0;
599:     for (j=0; j<n; j++) {
600:       sum1 += v[jrow]*x[4*idx[jrow]];
601:       sum2 += v[jrow]*x[4*idx[jrow]+1];
602:       sum3 += v[jrow]*x[4*idx[jrow]+2];
603:       sum4 += v[jrow]*x[4*idx[jrow]+3];
604:       jrow++;
605:     }
606:     y[4*i]   += sum1;
607:     y[4*i+1] += sum2;
608:     y[4*i+2] += sum3;
609:     y[4*i+3] += sum4;
610:   }

612:   PetscLogFlops(8.0*a->nz);
613:   VecRestoreArrayRead(xx,&x);
614:   VecRestoreArray(zz,&y);
615:   return(0);
616: }
617: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
618: {
619:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
620:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
621:   const PetscScalar *x,*v;
622:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
623:   PetscErrorCode    ierr;
624:   PetscInt          n,i;
625:   const PetscInt    m = b->AIJ->rmap->n,*idx;

628:   if (yy != zz) {VecCopy(yy,zz);}
629:   VecGetArrayRead(xx,&x);
630:   VecGetArray(zz,&y);

632:   for (i=0; i<m; i++) {
633:     idx    = a->j + a->i[i];
634:     v      = a->a + a->i[i];
635:     n      = a->i[i+1] - a->i[i];
636:     alpha1 = x[4*i];
637:     alpha2 = x[4*i+1];
638:     alpha3 = x[4*i+2];
639:     alpha4 = x[4*i+3];
640:     while (n-->0) {
641:       y[4*(*idx)]   += alpha1*(*v);
642:       y[4*(*idx)+1] += alpha2*(*v);
643:       y[4*(*idx)+2] += alpha3*(*v);
644:       y[4*(*idx)+3] += alpha4*(*v);
645:       idx++; v++;
646:     }
647:   }
648:   PetscLogFlops(8.0*a->nz);
649:   VecRestoreArrayRead(xx,&x);
650:   VecRestoreArray(zz,&y);
651:   return(0);
652: }
653: /* ------------------------------------------------------------------------------*/

655: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
656: {
657:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
658:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659:   const PetscScalar *x,*v;
660:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
661:   PetscErrorCode    ierr;
662:   PetscInt          nonzerorow=0,n,i,jrow,j;
663:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

666:   VecGetArrayRead(xx,&x);
667:   VecGetArray(yy,&y);
668:   idx  = a->j;
669:   v    = a->a;
670:   ii   = a->i;

672:   for (i=0; i<m; i++) {
673:     jrow  = ii[i];
674:     n     = ii[i+1] - jrow;
675:     sum1  = 0.0;
676:     sum2  = 0.0;
677:     sum3  = 0.0;
678:     sum4  = 0.0;
679:     sum5  = 0.0;

681:     nonzerorow += (n>0);
682:     for (j=0; j<n; j++) {
683:       sum1 += v[jrow]*x[5*idx[jrow]];
684:       sum2 += v[jrow]*x[5*idx[jrow]+1];
685:       sum3 += v[jrow]*x[5*idx[jrow]+2];
686:       sum4 += v[jrow]*x[5*idx[jrow]+3];
687:       sum5 += v[jrow]*x[5*idx[jrow]+4];
688:       jrow++;
689:     }
690:     y[5*i]   = sum1;
691:     y[5*i+1] = sum2;
692:     y[5*i+2] = sum3;
693:     y[5*i+3] = sum4;
694:     y[5*i+4] = sum5;
695:   }

697:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
698:   VecRestoreArrayRead(xx,&x);
699:   VecRestoreArray(yy,&y);
700:   return(0);
701: }

703: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
704: {
705:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
706:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
707:   const PetscScalar *x,*v;
708:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
709:   PetscErrorCode    ierr;
710:   PetscInt          n,i;
711:   const PetscInt    m = b->AIJ->rmap->n,*idx;

714:   VecSet(yy,0.0);
715:   VecGetArrayRead(xx,&x);
716:   VecGetArray(yy,&y);

718:   for (i=0; i<m; i++) {
719:     idx    = a->j + a->i[i];
720:     v      = a->a + a->i[i];
721:     n      = a->i[i+1] - a->i[i];
722:     alpha1 = x[5*i];
723:     alpha2 = x[5*i+1];
724:     alpha3 = x[5*i+2];
725:     alpha4 = x[5*i+3];
726:     alpha5 = x[5*i+4];
727:     while (n-->0) {
728:       y[5*(*idx)]   += alpha1*(*v);
729:       y[5*(*idx)+1] += alpha2*(*v);
730:       y[5*(*idx)+2] += alpha3*(*v);
731:       y[5*(*idx)+3] += alpha4*(*v);
732:       y[5*(*idx)+4] += alpha5*(*v);
733:       idx++; v++;
734:     }
735:   }
736:   PetscLogFlops(10.0*a->nz);
737:   VecRestoreArrayRead(xx,&x);
738:   VecRestoreArray(yy,&y);
739:   return(0);
740: }

742: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
743: {
744:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
745:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
746:   const PetscScalar *x,*v;
747:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
748:   PetscErrorCode    ierr;
749:   PetscInt          n,i,jrow,j;
750:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

753:   if (yy != zz) {VecCopy(yy,zz);}
754:   VecGetArrayRead(xx,&x);
755:   VecGetArray(zz,&y);
756:   idx  = a->j;
757:   v    = a->a;
758:   ii   = a->i;

760:   for (i=0; i<m; i++) {
761:     jrow = ii[i];
762:     n    = ii[i+1] - jrow;
763:     sum1 = 0.0;
764:     sum2 = 0.0;
765:     sum3 = 0.0;
766:     sum4 = 0.0;
767:     sum5 = 0.0;
768:     for (j=0; j<n; j++) {
769:       sum1 += v[jrow]*x[5*idx[jrow]];
770:       sum2 += v[jrow]*x[5*idx[jrow]+1];
771:       sum3 += v[jrow]*x[5*idx[jrow]+2];
772:       sum4 += v[jrow]*x[5*idx[jrow]+3];
773:       sum5 += v[jrow]*x[5*idx[jrow]+4];
774:       jrow++;
775:     }
776:     y[5*i]   += sum1;
777:     y[5*i+1] += sum2;
778:     y[5*i+2] += sum3;
779:     y[5*i+3] += sum4;
780:     y[5*i+4] += sum5;
781:   }

783:   PetscLogFlops(10.0*a->nz);
784:   VecRestoreArrayRead(xx,&x);
785:   VecRestoreArray(zz,&y);
786:   return(0);
787: }

789: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
790: {
791:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
792:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
793:   const PetscScalar *x,*v;
794:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
795:   PetscErrorCode    ierr;
796:   PetscInt          n,i;
797:   const PetscInt    m = b->AIJ->rmap->n,*idx;

800:   if (yy != zz) {VecCopy(yy,zz);}
801:   VecGetArrayRead(xx,&x);
802:   VecGetArray(zz,&y);

804:   for (i=0; i<m; i++) {
805:     idx    = a->j + a->i[i];
806:     v      = a->a + a->i[i];
807:     n      = a->i[i+1] - a->i[i];
808:     alpha1 = x[5*i];
809:     alpha2 = x[5*i+1];
810:     alpha3 = x[5*i+2];
811:     alpha4 = x[5*i+3];
812:     alpha5 = x[5*i+4];
813:     while (n-->0) {
814:       y[5*(*idx)]   += alpha1*(*v);
815:       y[5*(*idx)+1] += alpha2*(*v);
816:       y[5*(*idx)+2] += alpha3*(*v);
817:       y[5*(*idx)+3] += alpha4*(*v);
818:       y[5*(*idx)+4] += alpha5*(*v);
819:       idx++; v++;
820:     }
821:   }
822:   PetscLogFlops(10.0*a->nz);
823:   VecRestoreArrayRead(xx,&x);
824:   VecRestoreArray(zz,&y);
825:   return(0);
826: }

828: /* ------------------------------------------------------------------------------*/
829: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
830: {
831:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
832:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
833:   const PetscScalar *x,*v;
834:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
835:   PetscErrorCode    ierr;
836:   PetscInt          nonzerorow=0,n,i,jrow,j;
837:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

840:   VecGetArrayRead(xx,&x);
841:   VecGetArray(yy,&y);
842:   idx  = a->j;
843:   v    = a->a;
844:   ii   = a->i;

846:   for (i=0; i<m; i++) {
847:     jrow  = ii[i];
848:     n     = ii[i+1] - jrow;
849:     sum1  = 0.0;
850:     sum2  = 0.0;
851:     sum3  = 0.0;
852:     sum4  = 0.0;
853:     sum5  = 0.0;
854:     sum6  = 0.0;

856:     nonzerorow += (n>0);
857:     for (j=0; j<n; j++) {
858:       sum1 += v[jrow]*x[6*idx[jrow]];
859:       sum2 += v[jrow]*x[6*idx[jrow]+1];
860:       sum3 += v[jrow]*x[6*idx[jrow]+2];
861:       sum4 += v[jrow]*x[6*idx[jrow]+3];
862:       sum5 += v[jrow]*x[6*idx[jrow]+4];
863:       sum6 += v[jrow]*x[6*idx[jrow]+5];
864:       jrow++;
865:     }
866:     y[6*i]   = sum1;
867:     y[6*i+1] = sum2;
868:     y[6*i+2] = sum3;
869:     y[6*i+3] = sum4;
870:     y[6*i+4] = sum5;
871:     y[6*i+5] = sum6;
872:   }

874:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
875:   VecRestoreArrayRead(xx,&x);
876:   VecRestoreArray(yy,&y);
877:   return(0);
878: }

880: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
881: {
882:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
883:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
884:   const PetscScalar *x,*v;
885:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
886:   PetscErrorCode    ierr;
887:   PetscInt          n,i;
888:   const PetscInt    m = b->AIJ->rmap->n,*idx;

891:   VecSet(yy,0.0);
892:   VecGetArrayRead(xx,&x);
893:   VecGetArray(yy,&y);

895:   for (i=0; i<m; i++) {
896:     idx    = a->j + a->i[i];
897:     v      = a->a + a->i[i];
898:     n      = a->i[i+1] - a->i[i];
899:     alpha1 = x[6*i];
900:     alpha2 = x[6*i+1];
901:     alpha3 = x[6*i+2];
902:     alpha4 = x[6*i+3];
903:     alpha5 = x[6*i+4];
904:     alpha6 = x[6*i+5];
905:     while (n-->0) {
906:       y[6*(*idx)]   += alpha1*(*v);
907:       y[6*(*idx)+1] += alpha2*(*v);
908:       y[6*(*idx)+2] += alpha3*(*v);
909:       y[6*(*idx)+3] += alpha4*(*v);
910:       y[6*(*idx)+4] += alpha5*(*v);
911:       y[6*(*idx)+5] += alpha6*(*v);
912:       idx++; v++;
913:     }
914:   }
915:   PetscLogFlops(12.0*a->nz);
916:   VecRestoreArrayRead(xx,&x);
917:   VecRestoreArray(yy,&y);
918:   return(0);
919: }

921: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
922: {
923:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
924:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
925:   const PetscScalar *x,*v;
926:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
927:   PetscErrorCode    ierr;
928:   PetscInt          n,i,jrow,j;
929:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

932:   if (yy != zz) {VecCopy(yy,zz);}
933:   VecGetArrayRead(xx,&x);
934:   VecGetArray(zz,&y);
935:   idx  = a->j;
936:   v    = a->a;
937:   ii   = a->i;

939:   for (i=0; i<m; i++) {
940:     jrow = ii[i];
941:     n    = ii[i+1] - jrow;
942:     sum1 = 0.0;
943:     sum2 = 0.0;
944:     sum3 = 0.0;
945:     sum4 = 0.0;
946:     sum5 = 0.0;
947:     sum6 = 0.0;
948:     for (j=0; j<n; j++) {
949:       sum1 += v[jrow]*x[6*idx[jrow]];
950:       sum2 += v[jrow]*x[6*idx[jrow]+1];
951:       sum3 += v[jrow]*x[6*idx[jrow]+2];
952:       sum4 += v[jrow]*x[6*idx[jrow]+3];
953:       sum5 += v[jrow]*x[6*idx[jrow]+4];
954:       sum6 += v[jrow]*x[6*idx[jrow]+5];
955:       jrow++;
956:     }
957:     y[6*i]   += sum1;
958:     y[6*i+1] += sum2;
959:     y[6*i+2] += sum3;
960:     y[6*i+3] += sum4;
961:     y[6*i+4] += sum5;
962:     y[6*i+5] += sum6;
963:   }

965:   PetscLogFlops(12.0*a->nz);
966:   VecRestoreArrayRead(xx,&x);
967:   VecRestoreArray(zz,&y);
968:   return(0);
969: }

971: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
972: {
973:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
974:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
975:   const PetscScalar *x,*v;
976:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
977:   PetscErrorCode    ierr;
978:   PetscInt          n,i;
979:   const PetscInt    m = b->AIJ->rmap->n,*idx;

982:   if (yy != zz) {VecCopy(yy,zz);}
983:   VecGetArrayRead(xx,&x);
984:   VecGetArray(zz,&y);

986:   for (i=0; i<m; i++) {
987:     idx    = a->j + a->i[i];
988:     v      = a->a + a->i[i];
989:     n      = a->i[i+1] - a->i[i];
990:     alpha1 = x[6*i];
991:     alpha2 = x[6*i+1];
992:     alpha3 = x[6*i+2];
993:     alpha4 = x[6*i+3];
994:     alpha5 = x[6*i+4];
995:     alpha6 = x[6*i+5];
996:     while (n-->0) {
997:       y[6*(*idx)]   += alpha1*(*v);
998:       y[6*(*idx)+1] += alpha2*(*v);
999:       y[6*(*idx)+2] += alpha3*(*v);
1000:       y[6*(*idx)+3] += alpha4*(*v);
1001:       y[6*(*idx)+4] += alpha5*(*v);
1002:       y[6*(*idx)+5] += alpha6*(*v);
1003:       idx++; v++;
1004:     }
1005:   }
1006:   PetscLogFlops(12.0*a->nz);
1007:   VecRestoreArrayRead(xx,&x);
1008:   VecRestoreArray(zz,&y);
1009:   return(0);
1010: }

1012: /* ------------------------------------------------------------------------------*/
1013: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1014: {
1015:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1016:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1017:   const PetscScalar *x,*v;
1018:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1019:   PetscErrorCode    ierr;
1020:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1021:   PetscInt          nonzerorow=0,n,i,jrow,j;

1024:   VecGetArrayRead(xx,&x);
1025:   VecGetArray(yy,&y);
1026:   idx  = a->j;
1027:   v    = a->a;
1028:   ii   = a->i;

1030:   for (i=0; i<m; i++) {
1031:     jrow  = ii[i];
1032:     n     = ii[i+1] - jrow;
1033:     sum1  = 0.0;
1034:     sum2  = 0.0;
1035:     sum3  = 0.0;
1036:     sum4  = 0.0;
1037:     sum5  = 0.0;
1038:     sum6  = 0.0;
1039:     sum7  = 0.0;

1041:     nonzerorow += (n>0);
1042:     for (j=0; j<n; j++) {
1043:       sum1 += v[jrow]*x[7*idx[jrow]];
1044:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1045:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1046:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1047:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1048:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1049:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1050:       jrow++;
1051:     }
1052:     y[7*i]   = sum1;
1053:     y[7*i+1] = sum2;
1054:     y[7*i+2] = sum3;
1055:     y[7*i+3] = sum4;
1056:     y[7*i+4] = sum5;
1057:     y[7*i+5] = sum6;
1058:     y[7*i+6] = sum7;
1059:   }

1061:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1062:   VecRestoreArrayRead(xx,&x);
1063:   VecRestoreArray(yy,&y);
1064:   return(0);
1065: }

1067: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1068: {
1069:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1070:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1071:   const PetscScalar *x,*v;
1072:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1073:   PetscErrorCode    ierr;
1074:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1075:   PetscInt          n,i;

1078:   VecSet(yy,0.0);
1079:   VecGetArrayRead(xx,&x);
1080:   VecGetArray(yy,&y);

1082:   for (i=0; i<m; i++) {
1083:     idx    = a->j + a->i[i];
1084:     v      = a->a + a->i[i];
1085:     n      = a->i[i+1] - a->i[i];
1086:     alpha1 = x[7*i];
1087:     alpha2 = x[7*i+1];
1088:     alpha3 = x[7*i+2];
1089:     alpha4 = x[7*i+3];
1090:     alpha5 = x[7*i+4];
1091:     alpha6 = x[7*i+5];
1092:     alpha7 = x[7*i+6];
1093:     while (n-->0) {
1094:       y[7*(*idx)]   += alpha1*(*v);
1095:       y[7*(*idx)+1] += alpha2*(*v);
1096:       y[7*(*idx)+2] += alpha3*(*v);
1097:       y[7*(*idx)+3] += alpha4*(*v);
1098:       y[7*(*idx)+4] += alpha5*(*v);
1099:       y[7*(*idx)+5] += alpha6*(*v);
1100:       y[7*(*idx)+6] += alpha7*(*v);
1101:       idx++; v++;
1102:     }
1103:   }
1104:   PetscLogFlops(14.0*a->nz);
1105:   VecRestoreArrayRead(xx,&x);
1106:   VecRestoreArray(yy,&y);
1107:   return(0);
1108: }

1110: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1111: {
1112:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114:   const PetscScalar *x,*v;
1115:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1116:   PetscErrorCode    ierr;
1117:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1118:   PetscInt          n,i,jrow,j;

1121:   if (yy != zz) {VecCopy(yy,zz);}
1122:   VecGetArrayRead(xx,&x);
1123:   VecGetArray(zz,&y);
1124:   idx  = a->j;
1125:   v    = a->a;
1126:   ii   = a->i;

1128:   for (i=0; i<m; i++) {
1129:     jrow = ii[i];
1130:     n    = ii[i+1] - jrow;
1131:     sum1 = 0.0;
1132:     sum2 = 0.0;
1133:     sum3 = 0.0;
1134:     sum4 = 0.0;
1135:     sum5 = 0.0;
1136:     sum6 = 0.0;
1137:     sum7 = 0.0;
1138:     for (j=0; j<n; j++) {
1139:       sum1 += v[jrow]*x[7*idx[jrow]];
1140:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1141:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1142:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1143:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1144:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1145:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1146:       jrow++;
1147:     }
1148:     y[7*i]   += sum1;
1149:     y[7*i+1] += sum2;
1150:     y[7*i+2] += sum3;
1151:     y[7*i+3] += sum4;
1152:     y[7*i+4] += sum5;
1153:     y[7*i+5] += sum6;
1154:     y[7*i+6] += sum7;
1155:   }

1157:   PetscLogFlops(14.0*a->nz);
1158:   VecRestoreArrayRead(xx,&x);
1159:   VecRestoreArray(zz,&y);
1160:   return(0);
1161: }

1163: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1164: {
1165:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1166:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1167:   const PetscScalar *x,*v;
1168:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1169:   PetscErrorCode    ierr;
1170:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1171:   PetscInt          n,i;

1174:   if (yy != zz) {VecCopy(yy,zz);}
1175:   VecGetArrayRead(xx,&x);
1176:   VecGetArray(zz,&y);
1177:   for (i=0; i<m; i++) {
1178:     idx    = a->j + a->i[i];
1179:     v      = a->a + a->i[i];
1180:     n      = a->i[i+1] - a->i[i];
1181:     alpha1 = x[7*i];
1182:     alpha2 = x[7*i+1];
1183:     alpha3 = x[7*i+2];
1184:     alpha4 = x[7*i+3];
1185:     alpha5 = x[7*i+4];
1186:     alpha6 = x[7*i+5];
1187:     alpha7 = x[7*i+6];
1188:     while (n-->0) {
1189:       y[7*(*idx)]   += alpha1*(*v);
1190:       y[7*(*idx)+1] += alpha2*(*v);
1191:       y[7*(*idx)+2] += alpha3*(*v);
1192:       y[7*(*idx)+3] += alpha4*(*v);
1193:       y[7*(*idx)+4] += alpha5*(*v);
1194:       y[7*(*idx)+5] += alpha6*(*v);
1195:       y[7*(*idx)+6] += alpha7*(*v);
1196:       idx++; v++;
1197:     }
1198:   }
1199:   PetscLogFlops(14.0*a->nz);
1200:   VecRestoreArrayRead(xx,&x);
1201:   VecRestoreArray(zz,&y);
1202:   return(0);
1203: }

1205: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1206: {
1207:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1208:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1209:   const PetscScalar *x,*v;
1210:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1211:   PetscErrorCode    ierr;
1212:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1213:   PetscInt          nonzerorow=0,n,i,jrow,j;

1216:   VecGetArrayRead(xx,&x);
1217:   VecGetArray(yy,&y);
1218:   idx  = a->j;
1219:   v    = a->a;
1220:   ii   = a->i;

1222:   for (i=0; i<m; i++) {
1223:     jrow  = ii[i];
1224:     n     = ii[i+1] - jrow;
1225:     sum1  = 0.0;
1226:     sum2  = 0.0;
1227:     sum3  = 0.0;
1228:     sum4  = 0.0;
1229:     sum5  = 0.0;
1230:     sum6  = 0.0;
1231:     sum7  = 0.0;
1232:     sum8  = 0.0;

1234:     nonzerorow += (n>0);
1235:     for (j=0; j<n; j++) {
1236:       sum1 += v[jrow]*x[8*idx[jrow]];
1237:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1238:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1239:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1240:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1241:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1242:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1243:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1244:       jrow++;
1245:     }
1246:     y[8*i]   = sum1;
1247:     y[8*i+1] = sum2;
1248:     y[8*i+2] = sum3;
1249:     y[8*i+3] = sum4;
1250:     y[8*i+4] = sum5;
1251:     y[8*i+5] = sum6;
1252:     y[8*i+6] = sum7;
1253:     y[8*i+7] = sum8;
1254:   }

1256:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1257:   VecRestoreArrayRead(xx,&x);
1258:   VecRestoreArray(yy,&y);
1259:   return(0);
1260: }

1262: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1263: {
1264:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1265:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1266:   const PetscScalar *x,*v;
1267:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1268:   PetscErrorCode    ierr;
1269:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1270:   PetscInt          n,i;

1273:   VecSet(yy,0.0);
1274:   VecGetArrayRead(xx,&x);
1275:   VecGetArray(yy,&y);

1277:   for (i=0; i<m; i++) {
1278:     idx    = a->j + a->i[i];
1279:     v      = a->a + a->i[i];
1280:     n      = a->i[i+1] - a->i[i];
1281:     alpha1 = x[8*i];
1282:     alpha2 = x[8*i+1];
1283:     alpha3 = x[8*i+2];
1284:     alpha4 = x[8*i+3];
1285:     alpha5 = x[8*i+4];
1286:     alpha6 = x[8*i+5];
1287:     alpha7 = x[8*i+6];
1288:     alpha8 = x[8*i+7];
1289:     while (n-->0) {
1290:       y[8*(*idx)]   += alpha1*(*v);
1291:       y[8*(*idx)+1] += alpha2*(*v);
1292:       y[8*(*idx)+2] += alpha3*(*v);
1293:       y[8*(*idx)+3] += alpha4*(*v);
1294:       y[8*(*idx)+4] += alpha5*(*v);
1295:       y[8*(*idx)+5] += alpha6*(*v);
1296:       y[8*(*idx)+6] += alpha7*(*v);
1297:       y[8*(*idx)+7] += alpha8*(*v);
1298:       idx++; v++;
1299:     }
1300:   }
1301:   PetscLogFlops(16.0*a->nz);
1302:   VecRestoreArrayRead(xx,&x);
1303:   VecRestoreArray(yy,&y);
1304:   return(0);
1305: }

1307: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1308: {
1309:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1310:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1311:   const PetscScalar *x,*v;
1312:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1313:   PetscErrorCode    ierr;
1314:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1315:   PetscInt          n,i,jrow,j;

1318:   if (yy != zz) {VecCopy(yy,zz);}
1319:   VecGetArrayRead(xx,&x);
1320:   VecGetArray(zz,&y);
1321:   idx  = a->j;
1322:   v    = a->a;
1323:   ii   = a->i;

1325:   for (i=0; i<m; i++) {
1326:     jrow = ii[i];
1327:     n    = ii[i+1] - jrow;
1328:     sum1 = 0.0;
1329:     sum2 = 0.0;
1330:     sum3 = 0.0;
1331:     sum4 = 0.0;
1332:     sum5 = 0.0;
1333:     sum6 = 0.0;
1334:     sum7 = 0.0;
1335:     sum8 = 0.0;
1336:     for (j=0; j<n; j++) {
1337:       sum1 += v[jrow]*x[8*idx[jrow]];
1338:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1339:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1340:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1341:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1342:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1343:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1344:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1345:       jrow++;
1346:     }
1347:     y[8*i]   += sum1;
1348:     y[8*i+1] += sum2;
1349:     y[8*i+2] += sum3;
1350:     y[8*i+3] += sum4;
1351:     y[8*i+4] += sum5;
1352:     y[8*i+5] += sum6;
1353:     y[8*i+6] += sum7;
1354:     y[8*i+7] += sum8;
1355:   }

1357:   PetscLogFlops(16.0*a->nz);
1358:   VecRestoreArrayRead(xx,&x);
1359:   VecRestoreArray(zz,&y);
1360:   return(0);
1361: }

1363: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1364: {
1365:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1366:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1367:   const PetscScalar *x,*v;
1368:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1369:   PetscErrorCode    ierr;
1370:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1371:   PetscInt          n,i;

1374:   if (yy != zz) {VecCopy(yy,zz);}
1375:   VecGetArrayRead(xx,&x);
1376:   VecGetArray(zz,&y);
1377:   for (i=0; i<m; i++) {
1378:     idx    = a->j + a->i[i];
1379:     v      = a->a + a->i[i];
1380:     n      = a->i[i+1] - a->i[i];
1381:     alpha1 = x[8*i];
1382:     alpha2 = x[8*i+1];
1383:     alpha3 = x[8*i+2];
1384:     alpha4 = x[8*i+3];
1385:     alpha5 = x[8*i+4];
1386:     alpha6 = x[8*i+5];
1387:     alpha7 = x[8*i+6];
1388:     alpha8 = x[8*i+7];
1389:     while (n-->0) {
1390:       y[8*(*idx)]   += alpha1*(*v);
1391:       y[8*(*idx)+1] += alpha2*(*v);
1392:       y[8*(*idx)+2] += alpha3*(*v);
1393:       y[8*(*idx)+3] += alpha4*(*v);
1394:       y[8*(*idx)+4] += alpha5*(*v);
1395:       y[8*(*idx)+5] += alpha6*(*v);
1396:       y[8*(*idx)+6] += alpha7*(*v);
1397:       y[8*(*idx)+7] += alpha8*(*v);
1398:       idx++; v++;
1399:     }
1400:   }
1401:   PetscLogFlops(16.0*a->nz);
1402:   VecRestoreArrayRead(xx,&x);
1403:   VecRestoreArray(zz,&y);
1404:   return(0);
1405: }

1407: /* ------------------------------------------------------------------------------*/
1408: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1409: {
1410:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1411:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1412:   const PetscScalar *x,*v;
1413:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1414:   PetscErrorCode    ierr;
1415:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1416:   PetscInt          nonzerorow=0,n,i,jrow,j;

1419:   VecGetArrayRead(xx,&x);
1420:   VecGetArray(yy,&y);
1421:   idx  = a->j;
1422:   v    = a->a;
1423:   ii   = a->i;

1425:   for (i=0; i<m; i++) {
1426:     jrow  = ii[i];
1427:     n     = ii[i+1] - jrow;
1428:     sum1  = 0.0;
1429:     sum2  = 0.0;
1430:     sum3  = 0.0;
1431:     sum4  = 0.0;
1432:     sum5  = 0.0;
1433:     sum6  = 0.0;
1434:     sum7  = 0.0;
1435:     sum8  = 0.0;
1436:     sum9  = 0.0;

1438:     nonzerorow += (n>0);
1439:     for (j=0; j<n; j++) {
1440:       sum1 += v[jrow]*x[9*idx[jrow]];
1441:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1442:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1443:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1444:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1445:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1446:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1447:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1448:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1449:       jrow++;
1450:     }
1451:     y[9*i]   = sum1;
1452:     y[9*i+1] = sum2;
1453:     y[9*i+2] = sum3;
1454:     y[9*i+3] = sum4;
1455:     y[9*i+4] = sum5;
1456:     y[9*i+5] = sum6;
1457:     y[9*i+6] = sum7;
1458:     y[9*i+7] = sum8;
1459:     y[9*i+8] = sum9;
1460:   }

1462:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1463:   VecRestoreArrayRead(xx,&x);
1464:   VecRestoreArray(yy,&y);
1465:   return(0);
1466: }

1468: /* ------------------------------------------------------------------------------*/

1470: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1471: {
1472:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1473:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1474:   const PetscScalar *x,*v;
1475:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1476:   PetscErrorCode    ierr;
1477:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1478:   PetscInt          n,i;

1481:   VecSet(yy,0.0);
1482:   VecGetArrayRead(xx,&x);
1483:   VecGetArray(yy,&y);

1485:   for (i=0; i<m; i++) {
1486:     idx    = a->j + a->i[i];
1487:     v      = a->a + a->i[i];
1488:     n      = a->i[i+1] - a->i[i];
1489:     alpha1 = x[9*i];
1490:     alpha2 = x[9*i+1];
1491:     alpha3 = x[9*i+2];
1492:     alpha4 = x[9*i+3];
1493:     alpha5 = x[9*i+4];
1494:     alpha6 = x[9*i+5];
1495:     alpha7 = x[9*i+6];
1496:     alpha8 = x[9*i+7];
1497:     alpha9 = x[9*i+8];
1498:     while (n-->0) {
1499:       y[9*(*idx)]   += alpha1*(*v);
1500:       y[9*(*idx)+1] += alpha2*(*v);
1501:       y[9*(*idx)+2] += alpha3*(*v);
1502:       y[9*(*idx)+3] += alpha4*(*v);
1503:       y[9*(*idx)+4] += alpha5*(*v);
1504:       y[9*(*idx)+5] += alpha6*(*v);
1505:       y[9*(*idx)+6] += alpha7*(*v);
1506:       y[9*(*idx)+7] += alpha8*(*v);
1507:       y[9*(*idx)+8] += alpha9*(*v);
1508:       idx++; v++;
1509:     }
1510:   }
1511:   PetscLogFlops(18.0*a->nz);
1512:   VecRestoreArrayRead(xx,&x);
1513:   VecRestoreArray(yy,&y);
1514:   return(0);
1515: }

1517: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1518: {
1519:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1520:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1521:   const PetscScalar *x,*v;
1522:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1523:   PetscErrorCode    ierr;
1524:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1525:   PetscInt          n,i,jrow,j;

1528:   if (yy != zz) {VecCopy(yy,zz);}
1529:   VecGetArrayRead(xx,&x);
1530:   VecGetArray(zz,&y);
1531:   idx  = a->j;
1532:   v    = a->a;
1533:   ii   = a->i;

1535:   for (i=0; i<m; i++) {
1536:     jrow = ii[i];
1537:     n    = ii[i+1] - jrow;
1538:     sum1 = 0.0;
1539:     sum2 = 0.0;
1540:     sum3 = 0.0;
1541:     sum4 = 0.0;
1542:     sum5 = 0.0;
1543:     sum6 = 0.0;
1544:     sum7 = 0.0;
1545:     sum8 = 0.0;
1546:     sum9 = 0.0;
1547:     for (j=0; j<n; j++) {
1548:       sum1 += v[jrow]*x[9*idx[jrow]];
1549:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1550:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1551:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1552:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1553:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1554:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1555:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1556:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1557:       jrow++;
1558:     }
1559:     y[9*i]   += sum1;
1560:     y[9*i+1] += sum2;
1561:     y[9*i+2] += sum3;
1562:     y[9*i+3] += sum4;
1563:     y[9*i+4] += sum5;
1564:     y[9*i+5] += sum6;
1565:     y[9*i+6] += sum7;
1566:     y[9*i+7] += sum8;
1567:     y[9*i+8] += sum9;
1568:   }

1570:   PetscLogFlops(18*a->nz);
1571:   VecRestoreArrayRead(xx,&x);
1572:   VecRestoreArray(zz,&y);
1573:   return(0);
1574: }

1576: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577: {
1578:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1579:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580:   const PetscScalar *x,*v;
1581:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1582:   PetscErrorCode    ierr;
1583:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1584:   PetscInt          n,i;

1587:   if (yy != zz) {VecCopy(yy,zz);}
1588:   VecGetArrayRead(xx,&x);
1589:   VecGetArray(zz,&y);
1590:   for (i=0; i<m; i++) {
1591:     idx    = a->j + a->i[i];
1592:     v      = a->a + a->i[i];
1593:     n      = a->i[i+1] - a->i[i];
1594:     alpha1 = x[9*i];
1595:     alpha2 = x[9*i+1];
1596:     alpha3 = x[9*i+2];
1597:     alpha4 = x[9*i+3];
1598:     alpha5 = x[9*i+4];
1599:     alpha6 = x[9*i+5];
1600:     alpha7 = x[9*i+6];
1601:     alpha8 = x[9*i+7];
1602:     alpha9 = x[9*i+8];
1603:     while (n-->0) {
1604:       y[9*(*idx)]   += alpha1*(*v);
1605:       y[9*(*idx)+1] += alpha2*(*v);
1606:       y[9*(*idx)+2] += alpha3*(*v);
1607:       y[9*(*idx)+3] += alpha4*(*v);
1608:       y[9*(*idx)+4] += alpha5*(*v);
1609:       y[9*(*idx)+5] += alpha6*(*v);
1610:       y[9*(*idx)+6] += alpha7*(*v);
1611:       y[9*(*idx)+7] += alpha8*(*v);
1612:       y[9*(*idx)+8] += alpha9*(*v);
1613:       idx++; v++;
1614:     }
1615:   }
1616:   PetscLogFlops(18.0*a->nz);
1617:   VecRestoreArrayRead(xx,&x);
1618:   VecRestoreArray(zz,&y);
1619:   return(0);
1620: }
1621: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1622: {
1623:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1624:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1625:   const PetscScalar *x,*v;
1626:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1627:   PetscErrorCode    ierr;
1628:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1629:   PetscInt          nonzerorow=0,n,i,jrow,j;

1632:   VecGetArrayRead(xx,&x);
1633:   VecGetArray(yy,&y);
1634:   idx  = a->j;
1635:   v    = a->a;
1636:   ii   = a->i;

1638:   for (i=0; i<m; i++) {
1639:     jrow  = ii[i];
1640:     n     = ii[i+1] - jrow;
1641:     sum1  = 0.0;
1642:     sum2  = 0.0;
1643:     sum3  = 0.0;
1644:     sum4  = 0.0;
1645:     sum5  = 0.0;
1646:     sum6  = 0.0;
1647:     sum7  = 0.0;
1648:     sum8  = 0.0;
1649:     sum9  = 0.0;
1650:     sum10 = 0.0;

1652:     nonzerorow += (n>0);
1653:     for (j=0; j<n; j++) {
1654:       sum1  += v[jrow]*x[10*idx[jrow]];
1655:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1656:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1657:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1658:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1659:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1660:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1661:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1662:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1663:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1664:       jrow++;
1665:     }
1666:     y[10*i]   = sum1;
1667:     y[10*i+1] = sum2;
1668:     y[10*i+2] = sum3;
1669:     y[10*i+3] = sum4;
1670:     y[10*i+4] = sum5;
1671:     y[10*i+5] = sum6;
1672:     y[10*i+6] = sum7;
1673:     y[10*i+7] = sum8;
1674:     y[10*i+8] = sum9;
1675:     y[10*i+9] = sum10;
1676:   }

1678:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1679:   VecRestoreArrayRead(xx,&x);
1680:   VecRestoreArray(yy,&y);
1681:   return(0);
1682: }

1684: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1685: {
1686:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688:   const PetscScalar *x,*v;
1689:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690:   PetscErrorCode    ierr;
1691:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692:   PetscInt          n,i,jrow,j;

1695:   if (yy != zz) {VecCopy(yy,zz);}
1696:   VecGetArrayRead(xx,&x);
1697:   VecGetArray(zz,&y);
1698:   idx  = a->j;
1699:   v    = a->a;
1700:   ii   = a->i;

1702:   for (i=0; i<m; i++) {
1703:     jrow  = ii[i];
1704:     n     = ii[i+1] - jrow;
1705:     sum1  = 0.0;
1706:     sum2  = 0.0;
1707:     sum3  = 0.0;
1708:     sum4  = 0.0;
1709:     sum5  = 0.0;
1710:     sum6  = 0.0;
1711:     sum7  = 0.0;
1712:     sum8  = 0.0;
1713:     sum9  = 0.0;
1714:     sum10 = 0.0;
1715:     for (j=0; j<n; j++) {
1716:       sum1  += v[jrow]*x[10*idx[jrow]];
1717:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726:       jrow++;
1727:     }
1728:     y[10*i]   += sum1;
1729:     y[10*i+1] += sum2;
1730:     y[10*i+2] += sum3;
1731:     y[10*i+3] += sum4;
1732:     y[10*i+4] += sum5;
1733:     y[10*i+5] += sum6;
1734:     y[10*i+6] += sum7;
1735:     y[10*i+7] += sum8;
1736:     y[10*i+8] += sum9;
1737:     y[10*i+9] += sum10;
1738:   }

1740:   PetscLogFlops(20.0*a->nz);
1741:   VecRestoreArrayRead(xx,&x);
1742:   VecRestoreArray(yy,&y);
1743:   return(0);
1744: }

1746: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1747: {
1748:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1749:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1750:   const PetscScalar *x,*v;
1751:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1752:   PetscErrorCode    ierr;
1753:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1754:   PetscInt          n,i;

1757:   VecSet(yy,0.0);
1758:   VecGetArrayRead(xx,&x);
1759:   VecGetArray(yy,&y);

1761:   for (i=0; i<m; i++) {
1762:     idx     = a->j + a->i[i];
1763:     v       = a->a + a->i[i];
1764:     n       = a->i[i+1] - a->i[i];
1765:     alpha1  = x[10*i];
1766:     alpha2  = x[10*i+1];
1767:     alpha3  = x[10*i+2];
1768:     alpha4  = x[10*i+3];
1769:     alpha5  = x[10*i+4];
1770:     alpha6  = x[10*i+5];
1771:     alpha7  = x[10*i+6];
1772:     alpha8  = x[10*i+7];
1773:     alpha9  = x[10*i+8];
1774:     alpha10 = x[10*i+9];
1775:     while (n-->0) {
1776:       y[10*(*idx)]   += alpha1*(*v);
1777:       y[10*(*idx)+1] += alpha2*(*v);
1778:       y[10*(*idx)+2] += alpha3*(*v);
1779:       y[10*(*idx)+3] += alpha4*(*v);
1780:       y[10*(*idx)+4] += alpha5*(*v);
1781:       y[10*(*idx)+5] += alpha6*(*v);
1782:       y[10*(*idx)+6] += alpha7*(*v);
1783:       y[10*(*idx)+7] += alpha8*(*v);
1784:       y[10*(*idx)+8] += alpha9*(*v);
1785:       y[10*(*idx)+9] += alpha10*(*v);
1786:       idx++; v++;
1787:     }
1788:   }
1789:   PetscLogFlops(20.0*a->nz);
1790:   VecRestoreArrayRead(xx,&x);
1791:   VecRestoreArray(yy,&y);
1792:   return(0);
1793: }

1795: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1796: {
1797:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1798:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1799:   const PetscScalar *x,*v;
1800:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1801:   PetscErrorCode    ierr;
1802:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1803:   PetscInt          n,i;

1806:   if (yy != zz) {VecCopy(yy,zz);}
1807:   VecGetArrayRead(xx,&x);
1808:   VecGetArray(zz,&y);
1809:   for (i=0; i<m; i++) {
1810:     idx     = a->j + a->i[i];
1811:     v       = a->a + a->i[i];
1812:     n       = a->i[i+1] - a->i[i];
1813:     alpha1  = x[10*i];
1814:     alpha2  = x[10*i+1];
1815:     alpha3  = x[10*i+2];
1816:     alpha4  = x[10*i+3];
1817:     alpha5  = x[10*i+4];
1818:     alpha6  = x[10*i+5];
1819:     alpha7  = x[10*i+6];
1820:     alpha8  = x[10*i+7];
1821:     alpha9  = x[10*i+8];
1822:     alpha10 = x[10*i+9];
1823:     while (n-->0) {
1824:       y[10*(*idx)]   += alpha1*(*v);
1825:       y[10*(*idx)+1] += alpha2*(*v);
1826:       y[10*(*idx)+2] += alpha3*(*v);
1827:       y[10*(*idx)+3] += alpha4*(*v);
1828:       y[10*(*idx)+4] += alpha5*(*v);
1829:       y[10*(*idx)+5] += alpha6*(*v);
1830:       y[10*(*idx)+6] += alpha7*(*v);
1831:       y[10*(*idx)+7] += alpha8*(*v);
1832:       y[10*(*idx)+8] += alpha9*(*v);
1833:       y[10*(*idx)+9] += alpha10*(*v);
1834:       idx++; v++;
1835:     }
1836:   }
1837:   PetscLogFlops(20.0*a->nz);
1838:   VecRestoreArrayRead(xx,&x);
1839:   VecRestoreArray(zz,&y);
1840:   return(0);
1841: }


1844: /*--------------------------------------------------------------------------------------------*/
1845: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1846: {
1847:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1848:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1849:   const PetscScalar *x,*v;
1850:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1851:   PetscErrorCode    ierr;
1852:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1853:   PetscInt          nonzerorow=0,n,i,jrow,j;

1856:   VecGetArrayRead(xx,&x);
1857:   VecGetArray(yy,&y);
1858:   idx  = a->j;
1859:   v    = a->a;
1860:   ii   = a->i;

1862:   for (i=0; i<m; i++) {
1863:     jrow  = ii[i];
1864:     n     = ii[i+1] - jrow;
1865:     sum1  = 0.0;
1866:     sum2  = 0.0;
1867:     sum3  = 0.0;
1868:     sum4  = 0.0;
1869:     sum5  = 0.0;
1870:     sum6  = 0.0;
1871:     sum7  = 0.0;
1872:     sum8  = 0.0;
1873:     sum9  = 0.0;
1874:     sum10 = 0.0;
1875:     sum11 = 0.0;

1877:     nonzerorow += (n>0);
1878:     for (j=0; j<n; j++) {
1879:       sum1  += v[jrow]*x[11*idx[jrow]];
1880:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1881:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1882:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1883:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1884:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1885:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1886:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1887:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1888:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1889:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1890:       jrow++;
1891:     }
1892:     y[11*i]    = sum1;
1893:     y[11*i+1]  = sum2;
1894:     y[11*i+2]  = sum3;
1895:     y[11*i+3]  = sum4;
1896:     y[11*i+4]  = sum5;
1897:     y[11*i+5]  = sum6;
1898:     y[11*i+6]  = sum7;
1899:     y[11*i+7]  = sum8;
1900:     y[11*i+8]  = sum9;
1901:     y[11*i+9]  = sum10;
1902:     y[11*i+10] = sum11;
1903:   }

1905:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1906:   VecRestoreArrayRead(xx,&x);
1907:   VecRestoreArray(yy,&y);
1908:   return(0);
1909: }

1911: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1912: {
1913:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1914:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1915:   const PetscScalar *x,*v;
1916:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1917:   PetscErrorCode    ierr;
1918:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1919:   PetscInt          n,i,jrow,j;

1922:   if (yy != zz) {VecCopy(yy,zz);}
1923:   VecGetArrayRead(xx,&x);
1924:   VecGetArray(zz,&y);
1925:   idx  = a->j;
1926:   v    = a->a;
1927:   ii   = a->i;

1929:   for (i=0; i<m; i++) {
1930:     jrow  = ii[i];
1931:     n     = ii[i+1] - jrow;
1932:     sum1  = 0.0;
1933:     sum2  = 0.0;
1934:     sum3  = 0.0;
1935:     sum4  = 0.0;
1936:     sum5  = 0.0;
1937:     sum6  = 0.0;
1938:     sum7  = 0.0;
1939:     sum8  = 0.0;
1940:     sum9  = 0.0;
1941:     sum10 = 0.0;
1942:     sum11 = 0.0;
1943:     for (j=0; j<n; j++) {
1944:       sum1  += v[jrow]*x[11*idx[jrow]];
1945:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1946:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1947:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1948:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1949:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1950:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1951:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1952:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1953:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1954:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1955:       jrow++;
1956:     }
1957:     y[11*i]    += sum1;
1958:     y[11*i+1]  += sum2;
1959:     y[11*i+2]  += sum3;
1960:     y[11*i+3]  += sum4;
1961:     y[11*i+4]  += sum5;
1962:     y[11*i+5]  += sum6;
1963:     y[11*i+6]  += sum7;
1964:     y[11*i+7]  += sum8;
1965:     y[11*i+8]  += sum9;
1966:     y[11*i+9]  += sum10;
1967:     y[11*i+10] += sum11;
1968:   }

1970:   PetscLogFlops(22*a->nz);
1971:   VecRestoreArrayRead(xx,&x);
1972:   VecRestoreArray(yy,&y);
1973:   return(0);
1974: }

1976: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1977: {
1978:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1979:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1980:   const PetscScalar *x,*v;
1981:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1982:   PetscErrorCode    ierr;
1983:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1984:   PetscInt          n,i;

1987:   VecSet(yy,0.0);
1988:   VecGetArrayRead(xx,&x);
1989:   VecGetArray(yy,&y);

1991:   for (i=0; i<m; i++) {
1992:     idx     = a->j + a->i[i];
1993:     v       = a->a + a->i[i];
1994:     n       = a->i[i+1] - a->i[i];
1995:     alpha1  = x[11*i];
1996:     alpha2  = x[11*i+1];
1997:     alpha3  = x[11*i+2];
1998:     alpha4  = x[11*i+3];
1999:     alpha5  = x[11*i+4];
2000:     alpha6  = x[11*i+5];
2001:     alpha7  = x[11*i+6];
2002:     alpha8  = x[11*i+7];
2003:     alpha9  = x[11*i+8];
2004:     alpha10 = x[11*i+9];
2005:     alpha11 = x[11*i+10];
2006:     while (n-->0) {
2007:       y[11*(*idx)]    += alpha1*(*v);
2008:       y[11*(*idx)+1]  += alpha2*(*v);
2009:       y[11*(*idx)+2]  += alpha3*(*v);
2010:       y[11*(*idx)+3]  += alpha4*(*v);
2011:       y[11*(*idx)+4]  += alpha5*(*v);
2012:       y[11*(*idx)+5]  += alpha6*(*v);
2013:       y[11*(*idx)+6]  += alpha7*(*v);
2014:       y[11*(*idx)+7]  += alpha8*(*v);
2015:       y[11*(*idx)+8]  += alpha9*(*v);
2016:       y[11*(*idx)+9]  += alpha10*(*v);
2017:       y[11*(*idx)+10] += alpha11*(*v);
2018:       idx++; v++;
2019:     }
2020:   }
2021:   PetscLogFlops(22*a->nz);
2022:   VecRestoreArrayRead(xx,&x);
2023:   VecRestoreArray(yy,&y);
2024:   return(0);
2025: }

2027: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2028: {
2029:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2030:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2031:   const PetscScalar *x,*v;
2032:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2033:   PetscErrorCode    ierr;
2034:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2035:   PetscInt          n,i;

2038:   if (yy != zz) {VecCopy(yy,zz);}
2039:   VecGetArrayRead(xx,&x);
2040:   VecGetArray(zz,&y);
2041:   for (i=0; i<m; i++) {
2042:     idx     = a->j + a->i[i];
2043:     v       = a->a + a->i[i];
2044:     n       = a->i[i+1] - a->i[i];
2045:     alpha1  = x[11*i];
2046:     alpha2  = x[11*i+1];
2047:     alpha3  = x[11*i+2];
2048:     alpha4  = x[11*i+3];
2049:     alpha5  = x[11*i+4];
2050:     alpha6  = x[11*i+5];
2051:     alpha7  = x[11*i+6];
2052:     alpha8  = x[11*i+7];
2053:     alpha9  = x[11*i+8];
2054:     alpha10 = x[11*i+9];
2055:     alpha11 = x[11*i+10];
2056:     while (n-->0) {
2057:       y[11*(*idx)]    += alpha1*(*v);
2058:       y[11*(*idx)+1]  += alpha2*(*v);
2059:       y[11*(*idx)+2]  += alpha3*(*v);
2060:       y[11*(*idx)+3]  += alpha4*(*v);
2061:       y[11*(*idx)+4]  += alpha5*(*v);
2062:       y[11*(*idx)+5]  += alpha6*(*v);
2063:       y[11*(*idx)+6]  += alpha7*(*v);
2064:       y[11*(*idx)+7]  += alpha8*(*v);
2065:       y[11*(*idx)+8]  += alpha9*(*v);
2066:       y[11*(*idx)+9]  += alpha10*(*v);
2067:       y[11*(*idx)+10] += alpha11*(*v);
2068:       idx++; v++;
2069:     }
2070:   }
2071:   PetscLogFlops(22*a->nz);
2072:   VecRestoreArrayRead(xx,&x);
2073:   VecRestoreArray(zz,&y);
2074:   return(0);
2075: }


2078: /*--------------------------------------------------------------------------------------------*/
2079: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2080: {
2081:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2082:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2083:   const PetscScalar *x,*v;
2084:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2085:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2086:   PetscErrorCode    ierr;
2087:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2088:   PetscInt          nonzerorow=0,n,i,jrow,j;

2091:   VecGetArrayRead(xx,&x);
2092:   VecGetArray(yy,&y);
2093:   idx  = a->j;
2094:   v    = a->a;
2095:   ii   = a->i;

2097:   for (i=0; i<m; i++) {
2098:     jrow  = ii[i];
2099:     n     = ii[i+1] - jrow;
2100:     sum1  = 0.0;
2101:     sum2  = 0.0;
2102:     sum3  = 0.0;
2103:     sum4  = 0.0;
2104:     sum5  = 0.0;
2105:     sum6  = 0.0;
2106:     sum7  = 0.0;
2107:     sum8  = 0.0;
2108:     sum9  = 0.0;
2109:     sum10 = 0.0;
2110:     sum11 = 0.0;
2111:     sum12 = 0.0;
2112:     sum13 = 0.0;
2113:     sum14 = 0.0;
2114:     sum15 = 0.0;
2115:     sum16 = 0.0;

2117:     nonzerorow += (n>0);
2118:     for (j=0; j<n; j++) {
2119:       sum1  += v[jrow]*x[16*idx[jrow]];
2120:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2121:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2122:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2123:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2124:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2125:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2126:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2127:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2128:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2129:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2130:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2131:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2132:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2133:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2134:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2135:       jrow++;
2136:     }
2137:     y[16*i]    = sum1;
2138:     y[16*i+1]  = sum2;
2139:     y[16*i+2]  = sum3;
2140:     y[16*i+3]  = sum4;
2141:     y[16*i+4]  = sum5;
2142:     y[16*i+5]  = sum6;
2143:     y[16*i+6]  = sum7;
2144:     y[16*i+7]  = sum8;
2145:     y[16*i+8]  = sum9;
2146:     y[16*i+9]  = sum10;
2147:     y[16*i+10] = sum11;
2148:     y[16*i+11] = sum12;
2149:     y[16*i+12] = sum13;
2150:     y[16*i+13] = sum14;
2151:     y[16*i+14] = sum15;
2152:     y[16*i+15] = sum16;
2153:   }

2155:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2156:   VecRestoreArrayRead(xx,&x);
2157:   VecRestoreArray(yy,&y);
2158:   return(0);
2159: }

2161: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2162: {
2163:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2164:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2165:   const PetscScalar *x,*v;
2166:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2167:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2168:   PetscErrorCode    ierr;
2169:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2170:   PetscInt          n,i;

2173:   VecSet(yy,0.0);
2174:   VecGetArrayRead(xx,&x);
2175:   VecGetArray(yy,&y);

2177:   for (i=0; i<m; i++) {
2178:     idx     = a->j + a->i[i];
2179:     v       = a->a + a->i[i];
2180:     n       = a->i[i+1] - a->i[i];
2181:     alpha1  = x[16*i];
2182:     alpha2  = x[16*i+1];
2183:     alpha3  = x[16*i+2];
2184:     alpha4  = x[16*i+3];
2185:     alpha5  = x[16*i+4];
2186:     alpha6  = x[16*i+5];
2187:     alpha7  = x[16*i+6];
2188:     alpha8  = x[16*i+7];
2189:     alpha9  = x[16*i+8];
2190:     alpha10 = x[16*i+9];
2191:     alpha11 = x[16*i+10];
2192:     alpha12 = x[16*i+11];
2193:     alpha13 = x[16*i+12];
2194:     alpha14 = x[16*i+13];
2195:     alpha15 = x[16*i+14];
2196:     alpha16 = x[16*i+15];
2197:     while (n-->0) {
2198:       y[16*(*idx)]    += alpha1*(*v);
2199:       y[16*(*idx)+1]  += alpha2*(*v);
2200:       y[16*(*idx)+2]  += alpha3*(*v);
2201:       y[16*(*idx)+3]  += alpha4*(*v);
2202:       y[16*(*idx)+4]  += alpha5*(*v);
2203:       y[16*(*idx)+5]  += alpha6*(*v);
2204:       y[16*(*idx)+6]  += alpha7*(*v);
2205:       y[16*(*idx)+7]  += alpha8*(*v);
2206:       y[16*(*idx)+8]  += alpha9*(*v);
2207:       y[16*(*idx)+9]  += alpha10*(*v);
2208:       y[16*(*idx)+10] += alpha11*(*v);
2209:       y[16*(*idx)+11] += alpha12*(*v);
2210:       y[16*(*idx)+12] += alpha13*(*v);
2211:       y[16*(*idx)+13] += alpha14*(*v);
2212:       y[16*(*idx)+14] += alpha15*(*v);
2213:       y[16*(*idx)+15] += alpha16*(*v);
2214:       idx++; v++;
2215:     }
2216:   }
2217:   PetscLogFlops(32.0*a->nz);
2218:   VecRestoreArrayRead(xx,&x);
2219:   VecRestoreArray(yy,&y);
2220:   return(0);
2221: }

2223: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2224: {
2225:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2226:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2227:   const PetscScalar *x,*v;
2228:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2229:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2230:   PetscErrorCode    ierr;
2231:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2232:   PetscInt          n,i,jrow,j;

2235:   if (yy != zz) {VecCopy(yy,zz);}
2236:   VecGetArrayRead(xx,&x);
2237:   VecGetArray(zz,&y);
2238:   idx  = a->j;
2239:   v    = a->a;
2240:   ii   = a->i;

2242:   for (i=0; i<m; i++) {
2243:     jrow  = ii[i];
2244:     n     = ii[i+1] - jrow;
2245:     sum1  = 0.0;
2246:     sum2  = 0.0;
2247:     sum3  = 0.0;
2248:     sum4  = 0.0;
2249:     sum5  = 0.0;
2250:     sum6  = 0.0;
2251:     sum7  = 0.0;
2252:     sum8  = 0.0;
2253:     sum9  = 0.0;
2254:     sum10 = 0.0;
2255:     sum11 = 0.0;
2256:     sum12 = 0.0;
2257:     sum13 = 0.0;
2258:     sum14 = 0.0;
2259:     sum15 = 0.0;
2260:     sum16 = 0.0;
2261:     for (j=0; j<n; j++) {
2262:       sum1  += v[jrow]*x[16*idx[jrow]];
2263:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2264:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2265:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2266:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2267:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2268:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2269:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2270:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2271:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2272:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2273:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2274:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2275:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2276:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2277:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2278:       jrow++;
2279:     }
2280:     y[16*i]    += sum1;
2281:     y[16*i+1]  += sum2;
2282:     y[16*i+2]  += sum3;
2283:     y[16*i+3]  += sum4;
2284:     y[16*i+4]  += sum5;
2285:     y[16*i+5]  += sum6;
2286:     y[16*i+6]  += sum7;
2287:     y[16*i+7]  += sum8;
2288:     y[16*i+8]  += sum9;
2289:     y[16*i+9]  += sum10;
2290:     y[16*i+10] += sum11;
2291:     y[16*i+11] += sum12;
2292:     y[16*i+12] += sum13;
2293:     y[16*i+13] += sum14;
2294:     y[16*i+14] += sum15;
2295:     y[16*i+15] += sum16;
2296:   }

2298:   PetscLogFlops(32.0*a->nz);
2299:   VecRestoreArrayRead(xx,&x);
2300:   VecRestoreArray(zz,&y);
2301:   return(0);
2302: }

2304: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2305: {
2306:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2307:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2308:   const PetscScalar *x,*v;
2309:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2310:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2311:   PetscErrorCode    ierr;
2312:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2313:   PetscInt          n,i;

2316:   if (yy != zz) {VecCopy(yy,zz);}
2317:   VecGetArrayRead(xx,&x);
2318:   VecGetArray(zz,&y);
2319:   for (i=0; i<m; i++) {
2320:     idx     = a->j + a->i[i];
2321:     v       = a->a + a->i[i];
2322:     n       = a->i[i+1] - a->i[i];
2323:     alpha1  = x[16*i];
2324:     alpha2  = x[16*i+1];
2325:     alpha3  = x[16*i+2];
2326:     alpha4  = x[16*i+3];
2327:     alpha5  = x[16*i+4];
2328:     alpha6  = x[16*i+5];
2329:     alpha7  = x[16*i+6];
2330:     alpha8  = x[16*i+7];
2331:     alpha9  = x[16*i+8];
2332:     alpha10 = x[16*i+9];
2333:     alpha11 = x[16*i+10];
2334:     alpha12 = x[16*i+11];
2335:     alpha13 = x[16*i+12];
2336:     alpha14 = x[16*i+13];
2337:     alpha15 = x[16*i+14];
2338:     alpha16 = x[16*i+15];
2339:     while (n-->0) {
2340:       y[16*(*idx)]    += alpha1*(*v);
2341:       y[16*(*idx)+1]  += alpha2*(*v);
2342:       y[16*(*idx)+2]  += alpha3*(*v);
2343:       y[16*(*idx)+3]  += alpha4*(*v);
2344:       y[16*(*idx)+4]  += alpha5*(*v);
2345:       y[16*(*idx)+5]  += alpha6*(*v);
2346:       y[16*(*idx)+6]  += alpha7*(*v);
2347:       y[16*(*idx)+7]  += alpha8*(*v);
2348:       y[16*(*idx)+8]  += alpha9*(*v);
2349:       y[16*(*idx)+9]  += alpha10*(*v);
2350:       y[16*(*idx)+10] += alpha11*(*v);
2351:       y[16*(*idx)+11] += alpha12*(*v);
2352:       y[16*(*idx)+12] += alpha13*(*v);
2353:       y[16*(*idx)+13] += alpha14*(*v);
2354:       y[16*(*idx)+14] += alpha15*(*v);
2355:       y[16*(*idx)+15] += alpha16*(*v);
2356:       idx++; v++;
2357:     }
2358:   }
2359:   PetscLogFlops(32.0*a->nz);
2360:   VecRestoreArrayRead(xx,&x);
2361:   VecRestoreArray(zz,&y);
2362:   return(0);
2363: }

2365: /*--------------------------------------------------------------------------------------------*/
2366: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2367: {
2368:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2369:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2370:   const PetscScalar *x,*v;
2371:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2372:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2373:   PetscErrorCode    ierr;
2374:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2375:   PetscInt          nonzerorow=0,n,i,jrow,j;

2378:   VecGetArrayRead(xx,&x);
2379:   VecGetArray(yy,&y);
2380:   idx  = a->j;
2381:   v    = a->a;
2382:   ii   = a->i;

2384:   for (i=0; i<m; i++) {
2385:     jrow  = ii[i];
2386:     n     = ii[i+1] - jrow;
2387:     sum1  = 0.0;
2388:     sum2  = 0.0;
2389:     sum3  = 0.0;
2390:     sum4  = 0.0;
2391:     sum5  = 0.0;
2392:     sum6  = 0.0;
2393:     sum7  = 0.0;
2394:     sum8  = 0.0;
2395:     sum9  = 0.0;
2396:     sum10 = 0.0;
2397:     sum11 = 0.0;
2398:     sum12 = 0.0;
2399:     sum13 = 0.0;
2400:     sum14 = 0.0;
2401:     sum15 = 0.0;
2402:     sum16 = 0.0;
2403:     sum17 = 0.0;
2404:     sum18 = 0.0;

2406:     nonzerorow += (n>0);
2407:     for (j=0; j<n; j++) {
2408:       sum1  += v[jrow]*x[18*idx[jrow]];
2409:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2410:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2411:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2412:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2413:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2414:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2415:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2416:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2417:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2418:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2419:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2420:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2421:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2422:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2423:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2424:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2425:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2426:       jrow++;
2427:     }
2428:     y[18*i]    = sum1;
2429:     y[18*i+1]  = sum2;
2430:     y[18*i+2]  = sum3;
2431:     y[18*i+3]  = sum4;
2432:     y[18*i+4]  = sum5;
2433:     y[18*i+5]  = sum6;
2434:     y[18*i+6]  = sum7;
2435:     y[18*i+7]  = sum8;
2436:     y[18*i+8]  = sum9;
2437:     y[18*i+9]  = sum10;
2438:     y[18*i+10] = sum11;
2439:     y[18*i+11] = sum12;
2440:     y[18*i+12] = sum13;
2441:     y[18*i+13] = sum14;
2442:     y[18*i+14] = sum15;
2443:     y[18*i+15] = sum16;
2444:     y[18*i+16] = sum17;
2445:     y[18*i+17] = sum18;
2446:   }

2448:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2449:   VecRestoreArrayRead(xx,&x);
2450:   VecRestoreArray(yy,&y);
2451:   return(0);
2452: }

2454: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2455: {
2456:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2457:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2458:   const PetscScalar *x,*v;
2459:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2460:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2461:   PetscErrorCode    ierr;
2462:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2463:   PetscInt          n,i;

2466:   VecSet(yy,0.0);
2467:   VecGetArrayRead(xx,&x);
2468:   VecGetArray(yy,&y);

2470:   for (i=0; i<m; i++) {
2471:     idx     = a->j + a->i[i];
2472:     v       = a->a + a->i[i];
2473:     n       = a->i[i+1] - a->i[i];
2474:     alpha1  = x[18*i];
2475:     alpha2  = x[18*i+1];
2476:     alpha3  = x[18*i+2];
2477:     alpha4  = x[18*i+3];
2478:     alpha5  = x[18*i+4];
2479:     alpha6  = x[18*i+5];
2480:     alpha7  = x[18*i+6];
2481:     alpha8  = x[18*i+7];
2482:     alpha9  = x[18*i+8];
2483:     alpha10 = x[18*i+9];
2484:     alpha11 = x[18*i+10];
2485:     alpha12 = x[18*i+11];
2486:     alpha13 = x[18*i+12];
2487:     alpha14 = x[18*i+13];
2488:     alpha15 = x[18*i+14];
2489:     alpha16 = x[18*i+15];
2490:     alpha17 = x[18*i+16];
2491:     alpha18 = x[18*i+17];
2492:     while (n-->0) {
2493:       y[18*(*idx)]    += alpha1*(*v);
2494:       y[18*(*idx)+1]  += alpha2*(*v);
2495:       y[18*(*idx)+2]  += alpha3*(*v);
2496:       y[18*(*idx)+3]  += alpha4*(*v);
2497:       y[18*(*idx)+4]  += alpha5*(*v);
2498:       y[18*(*idx)+5]  += alpha6*(*v);
2499:       y[18*(*idx)+6]  += alpha7*(*v);
2500:       y[18*(*idx)+7]  += alpha8*(*v);
2501:       y[18*(*idx)+8]  += alpha9*(*v);
2502:       y[18*(*idx)+9]  += alpha10*(*v);
2503:       y[18*(*idx)+10] += alpha11*(*v);
2504:       y[18*(*idx)+11] += alpha12*(*v);
2505:       y[18*(*idx)+12] += alpha13*(*v);
2506:       y[18*(*idx)+13] += alpha14*(*v);
2507:       y[18*(*idx)+14] += alpha15*(*v);
2508:       y[18*(*idx)+15] += alpha16*(*v);
2509:       y[18*(*idx)+16] += alpha17*(*v);
2510:       y[18*(*idx)+17] += alpha18*(*v);
2511:       idx++; v++;
2512:     }
2513:   }
2514:   PetscLogFlops(36.0*a->nz);
2515:   VecRestoreArrayRead(xx,&x);
2516:   VecRestoreArray(yy,&y);
2517:   return(0);
2518: }

2520: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2521: {
2522:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2523:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2524:   const PetscScalar *x,*v;
2525:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2526:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2527:   PetscErrorCode    ierr;
2528:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2529:   PetscInt          n,i,jrow,j;

2532:   if (yy != zz) {VecCopy(yy,zz);}
2533:   VecGetArrayRead(xx,&x);
2534:   VecGetArray(zz,&y);
2535:   idx  = a->j;
2536:   v    = a->a;
2537:   ii   = a->i;

2539:   for (i=0; i<m; i++) {
2540:     jrow  = ii[i];
2541:     n     = ii[i+1] - jrow;
2542:     sum1  = 0.0;
2543:     sum2  = 0.0;
2544:     sum3  = 0.0;
2545:     sum4  = 0.0;
2546:     sum5  = 0.0;
2547:     sum6  = 0.0;
2548:     sum7  = 0.0;
2549:     sum8  = 0.0;
2550:     sum9  = 0.0;
2551:     sum10 = 0.0;
2552:     sum11 = 0.0;
2553:     sum12 = 0.0;
2554:     sum13 = 0.0;
2555:     sum14 = 0.0;
2556:     sum15 = 0.0;
2557:     sum16 = 0.0;
2558:     sum17 = 0.0;
2559:     sum18 = 0.0;
2560:     for (j=0; j<n; j++) {
2561:       sum1  += v[jrow]*x[18*idx[jrow]];
2562:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2563:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2564:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2565:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2566:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2567:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2568:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2569:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2570:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2571:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2572:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2573:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2574:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2575:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2576:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2577:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2578:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2579:       jrow++;
2580:     }
2581:     y[18*i]    += sum1;
2582:     y[18*i+1]  += sum2;
2583:     y[18*i+2]  += sum3;
2584:     y[18*i+3]  += sum4;
2585:     y[18*i+4]  += sum5;
2586:     y[18*i+5]  += sum6;
2587:     y[18*i+6]  += sum7;
2588:     y[18*i+7]  += sum8;
2589:     y[18*i+8]  += sum9;
2590:     y[18*i+9]  += sum10;
2591:     y[18*i+10] += sum11;
2592:     y[18*i+11] += sum12;
2593:     y[18*i+12] += sum13;
2594:     y[18*i+13] += sum14;
2595:     y[18*i+14] += sum15;
2596:     y[18*i+15] += sum16;
2597:     y[18*i+16] += sum17;
2598:     y[18*i+17] += sum18;
2599:   }

2601:   PetscLogFlops(36.0*a->nz);
2602:   VecRestoreArrayRead(xx,&x);
2603:   VecRestoreArray(zz,&y);
2604:   return(0);
2605: }

2607: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608: {
2609:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2610:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2611:   const PetscScalar *x,*v;
2612:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2613:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2614:   PetscErrorCode    ierr;
2615:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2616:   PetscInt          n,i;

2619:   if (yy != zz) {VecCopy(yy,zz);}
2620:   VecGetArrayRead(xx,&x);
2621:   VecGetArray(zz,&y);
2622:   for (i=0; i<m; i++) {
2623:     idx     = a->j + a->i[i];
2624:     v       = a->a + a->i[i];
2625:     n       = a->i[i+1] - a->i[i];
2626:     alpha1  = x[18*i];
2627:     alpha2  = x[18*i+1];
2628:     alpha3  = x[18*i+2];
2629:     alpha4  = x[18*i+3];
2630:     alpha5  = x[18*i+4];
2631:     alpha6  = x[18*i+5];
2632:     alpha7  = x[18*i+6];
2633:     alpha8  = x[18*i+7];
2634:     alpha9  = x[18*i+8];
2635:     alpha10 = x[18*i+9];
2636:     alpha11 = x[18*i+10];
2637:     alpha12 = x[18*i+11];
2638:     alpha13 = x[18*i+12];
2639:     alpha14 = x[18*i+13];
2640:     alpha15 = x[18*i+14];
2641:     alpha16 = x[18*i+15];
2642:     alpha17 = x[18*i+16];
2643:     alpha18 = x[18*i+17];
2644:     while (n-->0) {
2645:       y[18*(*idx)]    += alpha1*(*v);
2646:       y[18*(*idx)+1]  += alpha2*(*v);
2647:       y[18*(*idx)+2]  += alpha3*(*v);
2648:       y[18*(*idx)+3]  += alpha4*(*v);
2649:       y[18*(*idx)+4]  += alpha5*(*v);
2650:       y[18*(*idx)+5]  += alpha6*(*v);
2651:       y[18*(*idx)+6]  += alpha7*(*v);
2652:       y[18*(*idx)+7]  += alpha8*(*v);
2653:       y[18*(*idx)+8]  += alpha9*(*v);
2654:       y[18*(*idx)+9]  += alpha10*(*v);
2655:       y[18*(*idx)+10] += alpha11*(*v);
2656:       y[18*(*idx)+11] += alpha12*(*v);
2657:       y[18*(*idx)+12] += alpha13*(*v);
2658:       y[18*(*idx)+13] += alpha14*(*v);
2659:       y[18*(*idx)+14] += alpha15*(*v);
2660:       y[18*(*idx)+15] += alpha16*(*v);
2661:       y[18*(*idx)+16] += alpha17*(*v);
2662:       y[18*(*idx)+17] += alpha18*(*v);
2663:       idx++; v++;
2664:     }
2665:   }
2666:   PetscLogFlops(36.0*a->nz);
2667:   VecRestoreArrayRead(xx,&x);
2668:   VecRestoreArray(zz,&y);
2669:   return(0);
2670: }

2672: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2673: {
2674:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2675:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2676:   const PetscScalar *x,*v;
2677:   PetscScalar       *y,*sums;
2678:   PetscErrorCode    ierr;
2679:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2680:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2683:   VecGetArrayRead(xx,&x);
2684:   VecSet(yy,0.0);
2685:   VecGetArray(yy,&y);
2686:   idx  = a->j;
2687:   v    = a->a;
2688:   ii   = a->i;

2690:   for (i=0; i<m; i++) {
2691:     jrow = ii[i];
2692:     n    = ii[i+1] - jrow;
2693:     sums = y + dof*i;
2694:     for (j=0; j<n; j++) {
2695:       for (k=0; k<dof; k++) {
2696:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2697:       }
2698:       jrow++;
2699:     }
2700:   }

2702:   PetscLogFlops(2.0*dof*a->nz);
2703:   VecRestoreArrayRead(xx,&x);
2704:   VecRestoreArray(yy,&y);
2705:   return(0);
2706: }

2708: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2709: {
2710:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2711:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2712:   const PetscScalar *x,*v;
2713:   PetscScalar       *y,*sums;
2714:   PetscErrorCode    ierr;
2715:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2716:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2719:   if (yy != zz) {VecCopy(yy,zz);}
2720:   VecGetArrayRead(xx,&x);
2721:   VecGetArray(zz,&y);
2722:   idx  = a->j;
2723:   v    = a->a;
2724:   ii   = a->i;

2726:   for (i=0; i<m; i++) {
2727:     jrow = ii[i];
2728:     n    = ii[i+1] - jrow;
2729:     sums = y + dof*i;
2730:     for (j=0; j<n; j++) {
2731:       for (k=0; k<dof; k++) {
2732:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2733:       }
2734:       jrow++;
2735:     }
2736:   }

2738:   PetscLogFlops(2.0*dof*a->nz);
2739:   VecRestoreArrayRead(xx,&x);
2740:   VecRestoreArray(zz,&y);
2741:   return(0);
2742: }

2744: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2745: {
2746:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2747:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2748:   const PetscScalar *x,*v,*alpha;
2749:   PetscScalar       *y;
2750:   PetscErrorCode    ierr;
2751:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2752:   PetscInt          n,i,k;

2755:   VecGetArrayRead(xx,&x);
2756:   VecSet(yy,0.0);
2757:   VecGetArray(yy,&y);
2758:   for (i=0; i<m; i++) {
2759:     idx   = a->j + a->i[i];
2760:     v     = a->a + a->i[i];
2761:     n     = a->i[i+1] - a->i[i];
2762:     alpha = x + dof*i;
2763:     while (n-->0) {
2764:       for (k=0; k<dof; k++) {
2765:         y[dof*(*idx)+k] += alpha[k]*(*v);
2766:       }
2767:       idx++; v++;
2768:     }
2769:   }
2770:   PetscLogFlops(2.0*dof*a->nz);
2771:   VecRestoreArrayRead(xx,&x);
2772:   VecRestoreArray(yy,&y);
2773:   return(0);
2774: }

2776: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2777: {
2778:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2779:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2780:   const PetscScalar *x,*v,*alpha;
2781:   PetscScalar       *y;
2782:   PetscErrorCode    ierr;
2783:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2784:   PetscInt          n,i,k;

2787:   if (yy != zz) {VecCopy(yy,zz);}
2788:   VecGetArrayRead(xx,&x);
2789:   VecGetArray(zz,&y);
2790:   for (i=0; i<m; i++) {
2791:     idx   = a->j + a->i[i];
2792:     v     = a->a + a->i[i];
2793:     n     = a->i[i+1] - a->i[i];
2794:     alpha = x + dof*i;
2795:     while (n-->0) {
2796:       for (k=0; k<dof; k++) {
2797:         y[dof*(*idx)+k] += alpha[k]*(*v);
2798:       }
2799:       idx++; v++;
2800:     }
2801:   }
2802:   PetscLogFlops(2.0*dof*a->nz);
2803:   VecRestoreArrayRead(xx,&x);
2804:   VecRestoreArray(zz,&y);
2805:   return(0);
2806: }

2808: /*===================================================================================*/
2809: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2810: {
2811:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2815:   /* start the scatter */
2816:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2817:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2818:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2819:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2820:   return(0);
2821: }

2823: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2824: {
2825:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2829:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2830:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2831:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2832:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2833:   return(0);
2834: }

2836: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2837: {
2838:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2842:   /* start the scatter */
2843:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2844:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2845:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2846:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2847:   return(0);
2848: }

2850: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2851: {
2852:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2856:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2857:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2858:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2859:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2860:   return(0);
2861: }

2863: /* ----------------------------------------------------------------*/
2864: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2865: {
2866:   PetscErrorCode     ierr;
2867:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2868:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2869:   Mat                P         =pp->AIJ;
2870:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2871:   PetscInt           *pti,*ptj,*ptJ;
2872:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2873:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2874:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2875:   MatScalar          *ca;
2876:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2879:   /* Get ij structure of P^T */
2880:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2882:   cn = pn*ppdof;
2883:   /* Allocate ci array, arrays for fill computation and */
2884:   /* free space for accumulating nonzero column info */
2885:   PetscMalloc1(cn+1,&ci);
2886:   ci[0] = 0;

2888:   /* Work arrays for rows of P^T*A */
2889:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2890:   PetscMemzero(ptadenserow,an*sizeof(PetscInt));
2891:   PetscMemzero(denserow,cn*sizeof(PetscInt));

2893:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2894:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2895:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2896:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2897:   current_space = free_space;

2899:   /* Determine symbolic info for each row of C: */
2900:   for (i=0; i<pn; i++) {
2901:     ptnzi = pti[i+1] - pti[i];
2902:     ptJ   = ptj + pti[i];
2903:     for (dof=0; dof<ppdof; dof++) {
2904:       ptanzi = 0;
2905:       /* Determine symbolic row of PtA: */
2906:       for (j=0; j<ptnzi; j++) {
2907:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2908:         arow = ptJ[j]*ppdof + dof;
2909:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2910:         anzj = ai[arow+1] - ai[arow];
2911:         ajj  = aj + ai[arow];
2912:         for (k=0; k<anzj; k++) {
2913:           if (!ptadenserow[ajj[k]]) {
2914:             ptadenserow[ajj[k]]    = -1;
2915:             ptasparserow[ptanzi++] = ajj[k];
2916:           }
2917:         }
2918:       }
2919:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2920:       ptaj = ptasparserow;
2921:       cnzi = 0;
2922:       for (j=0; j<ptanzi; j++) {
2923:         /* Get offset within block of P */
2924:         pshift = *ptaj%ppdof;
2925:         /* Get block row of P */
2926:         prow = (*ptaj++)/ppdof; /* integer division */
2927:         /* P has same number of nonzeros per row as the compressed form */
2928:         pnzj = pi[prow+1] - pi[prow];
2929:         pjj  = pj + pi[prow];
2930:         for (k=0;k<pnzj;k++) {
2931:           /* Locations in C are shifted by the offset within the block */
2932:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2933:           if (!denserow[pjj[k]*ppdof+pshift]) {
2934:             denserow[pjj[k]*ppdof+pshift] = -1;
2935:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2936:           }
2937:         }
2938:       }

2940:       /* sort sparserow */
2941:       PetscSortInt(cnzi,sparserow);

2943:       /* If free space is not available, make more free space */
2944:       /* Double the amount of total space in the list */
2945:       if (current_space->local_remaining<cnzi) {
2946:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
2947:       }

2949:       /* Copy data into free space, and zero out denserows */
2950:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));

2952:       current_space->array           += cnzi;
2953:       current_space->local_used      += cnzi;
2954:       current_space->local_remaining -= cnzi;

2956:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2957:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

2959:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2960:       /*        For now, we will recompute what is needed. */
2961:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2962:     }
2963:   }
2964:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2965:   /* Allocate space for cj, initialize cj, and */
2966:   /* destroy list of free space and other temporary array(s) */
2967:   PetscMalloc1(ci[cn]+1,&cj);
2968:   PetscFreeSpaceContiguous(&free_space,cj);
2969:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

2971:   /* Allocate space for ca */
2972:   PetscCalloc1(ci[cn]+1,&ca);

2974:   /* put together the new matrix */
2975:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
2976:   MatSetBlockSize(*C,pp->dof);

2978:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2979:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2980:   c          = (Mat_SeqAIJ*)((*C)->data);
2981:   c->free_a  = PETSC_TRUE;
2982:   c->free_ij = PETSC_TRUE;
2983:   c->nonew   = 0;

2985:   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;

2987:   /* Clean up. */
2988:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2989:   return(0);
2990: }

2992: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2993: {
2994:   /* This routine requires testing -- first draft only */
2995:   PetscErrorCode  ierr;
2996:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2997:   Mat             P  =pp->AIJ;
2998:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
2999:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3000:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3001:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3002:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3003:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3004:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3005:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3006:   MatScalar       *ca=c->a,*caj,*apa;

3009:   /* Allocate temporary array for storage of one row of A*P */
3010:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

3012:   /* Clear old values in C */
3013:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

3015:   for (i=0; i<am; i++) {
3016:     /* Form sparse row of A*P */
3017:     anzi  = ai[i+1] - ai[i];
3018:     apnzj = 0;
3019:     for (j=0; j<anzi; j++) {
3020:       /* Get offset within block of P */
3021:       pshift = *aj%ppdof;
3022:       /* Get block row of P */
3023:       prow = *aj++/ppdof;   /* integer division */
3024:       pnzj = pi[prow+1] - pi[prow];
3025:       pjj  = pj + pi[prow];
3026:       paj  = pa + pi[prow];
3027:       for (k=0; k<pnzj; k++) {
3028:         poffset = pjj[k]*ppdof+pshift;
3029:         if (!apjdense[poffset]) {
3030:           apjdense[poffset] = -1;
3031:           apj[apnzj++]      = poffset;
3032:         }
3033:         apa[poffset] += (*aa)*paj[k];
3034:       }
3035:       PetscLogFlops(2.0*pnzj);
3036:       aa++;
3037:     }

3039:     /* Sort the j index array for quick sparse axpy. */
3040:     /* Note: a array does not need sorting as it is in dense storage locations. */
3041:     PetscSortInt(apnzj,apj);

3043:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3044:     prow    = i/ppdof; /* integer division */
3045:     pshift  = i%ppdof;
3046:     poffset = pi[prow];
3047:     pnzi    = pi[prow+1] - poffset;
3048:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3049:     pJ = pj+poffset;
3050:     pA = pa+poffset;
3051:     for (j=0; j<pnzi; j++) {
3052:       crow = (*pJ)*ppdof+pshift;
3053:       cjj  = cj + ci[crow];
3054:       caj  = ca + ci[crow];
3055:       pJ++;
3056:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3057:       for (k=0,nextap=0; nextap<apnzj; k++) {
3058:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3059:       }
3060:       PetscLogFlops(2.0*apnzj);
3061:       pA++;
3062:     }

3064:     /* Zero the current row info for A*P */
3065:     for (j=0; j<apnzj; j++) {
3066:       apa[apj[j]]      = 0.;
3067:       apjdense[apj[j]] = 0;
3068:     }
3069:   }

3071:   /* Assemble the final matrix and clean up */
3072:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3073:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3074:   PetscFree3(apa,apj,apjdense);
3075:   return(0);
3076: }

3078: PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3079: {

3083:   if (scall == MAT_INITIAL_MATRIX) {
3084:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3085:     MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3086:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3087:   }
3088:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3089:   MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3090:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3091:   return(0);
3092: }

3094: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3095: {

3099:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3100:   MatConvert(PP,MATMPIAIJ,MAT_INPLACE_MATRIX,&PP);
3101:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3102:   return(0);
3103: }

3105: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3106: {
3108:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3109:   return(0);
3110: }

3112: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3113: {

3117:   if (scall == MAT_INITIAL_MATRIX) {
3118:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3119:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);
3120:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3121:   }
3122:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3123:   ((*C)->ops->ptapnumeric)(A,P,*C);
3124:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3125:   return(0);
3126: }

3128: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3129: {
3130:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3131:   Mat            a    = b->AIJ,B;
3132:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3134:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3135:   PetscInt       *cols;
3136:   PetscScalar    *vals;

3139:   MatGetSize(a,&m,&n);
3140:   PetscMalloc1(dof*m,&ilen);
3141:   for (i=0; i<m; i++) {
3142:     nmax = PetscMax(nmax,aij->ilen[i]);
3143:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3144:   }
3145:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3146:   PetscFree(ilen);
3147:   PetscMalloc1(nmax,&icols);
3148:   ii   = 0;
3149:   for (i=0; i<m; i++) {
3150:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3151:     for (j=0; j<dof; j++) {
3152:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3153:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3154:       ii++;
3155:     }
3156:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3157:   }
3158:   PetscFree(icols);
3159:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3160:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3162:   if (reuse == MAT_INPLACE_MATRIX) {
3163:     MatHeaderReplace(A,&B);
3164:   } else {
3165:     *newmat = B;
3166:   }
3167:   return(0);
3168: }

3170:  #include <../src/mat/impls/aij/mpi/mpiaij.h>

3172: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3173: {
3174:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3175:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3176:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3177:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3178:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3179:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3180:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3181:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3182:   PetscInt       rstart,cstart,*garray,ii,k;
3184:   PetscScalar    *vals,*ovals;

3187:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3188:   for (i=0; i<A->rmap->n/dof; i++) {
3189:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3190:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3191:     for (j=0; j<dof; j++) {
3192:       dnz[dof*i+j] = AIJ->ilen[i];
3193:       onz[dof*i+j] = OAIJ->ilen[i];
3194:     }
3195:   }
3196:   MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3197:   MatSetBlockSize(B,dof);
3198:   PetscFree2(dnz,onz);

3200:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3201:   rstart = dof*maij->A->rmap->rstart;
3202:   cstart = dof*maij->A->cmap->rstart;
3203:   garray = mpiaij->garray;

3205:   ii = rstart;
3206:   for (i=0; i<A->rmap->n/dof; i++) {
3207:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3208:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3209:     for (j=0; j<dof; j++) {
3210:       for (k=0; k<ncols; k++) {
3211:         icols[k] = cstart + dof*cols[k]+j;
3212:       }
3213:       for (k=0; k<oncols; k++) {
3214:         oicols[k] = dof*garray[ocols[k]]+j;
3215:       }
3216:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3217:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3218:       ii++;
3219:     }
3220:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3221:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3222:   }
3223:   PetscFree2(icols,oicols);

3225:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3226:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3228:   if (reuse == MAT_INPLACE_MATRIX) {
3229:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3230:     ((PetscObject)A)->refct = 1;

3232:     MatHeaderReplace(A,&B);

3234:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3235:   } else {
3236:     *newmat = B;
3237:   }
3238:   return(0);
3239: }

3241: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3242: {
3244:   Mat            A;

3247:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3248:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3249:   MatDestroy(&A);
3250:   return(0);
3251: }

3253: /* ---------------------------------------------------------------------------------- */
3254: /*@
3255:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3256:   operations for multicomponent problems.  It interpolates each component the same
3257:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3258:   and MATMPIAIJ for distributed matrices.

3260:   Collective

3262:   Input Parameters:
3263: + A - the AIJ matrix describing the action on blocks
3264: - dof - the block size (number of components per node)

3266:   Output Parameter:
3267: . maij - the new MAIJ matrix

3269:   Operations provided:
3270: + MatMult
3271: . MatMultTranspose
3272: . MatMultAdd
3273: . MatMultTransposeAdd
3274: - MatView

3276:   Level: advanced

3278: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3279: @*/
3280: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3281: {
3283:   PetscMPIInt    size;
3284:   PetscInt       n;
3285:   Mat            B;

3288:   PetscObjectReference((PetscObject)A);

3290:   if (dof == 1) *maij = A;
3291:   else {
3292:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3293:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3294:     PetscLayoutSetBlockSize(B->rmap,dof);
3295:     PetscLayoutSetBlockSize(B->cmap,dof);
3296:     PetscLayoutSetUp(B->rmap);
3297:     PetscLayoutSetUp(B->cmap);

3299:     B->assembled = PETSC_TRUE;

3301:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3302:     if (size == 1) {
3303:       Mat_SeqMAIJ *b;

3305:       MatSetType(B,MATSEQMAIJ);

3307:       B->ops->setup   = NULL;
3308:       B->ops->destroy = MatDestroy_SeqMAIJ;
3309:       B->ops->view    = MatView_SeqMAIJ;
3310:       b               = (Mat_SeqMAIJ*)B->data;
3311:       b->dof          = dof;
3312:       b->AIJ          = A;

3314:       if (dof == 2) {
3315:         B->ops->mult             = MatMult_SeqMAIJ_2;
3316:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3317:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3318:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3319:       } else if (dof == 3) {
3320:         B->ops->mult             = MatMult_SeqMAIJ_3;
3321:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3322:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3323:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3324:       } else if (dof == 4) {
3325:         B->ops->mult             = MatMult_SeqMAIJ_4;
3326:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3327:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3328:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3329:       } else if (dof == 5) {
3330:         B->ops->mult             = MatMult_SeqMAIJ_5;
3331:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3332:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3333:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3334:       } else if (dof == 6) {
3335:         B->ops->mult             = MatMult_SeqMAIJ_6;
3336:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3337:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3338:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3339:       } else if (dof == 7) {
3340:         B->ops->mult             = MatMult_SeqMAIJ_7;
3341:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3342:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3343:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3344:       } else if (dof == 8) {
3345:         B->ops->mult             = MatMult_SeqMAIJ_8;
3346:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3347:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3348:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3349:       } else if (dof == 9) {
3350:         B->ops->mult             = MatMult_SeqMAIJ_9;
3351:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3352:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3353:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3354:       } else if (dof == 10) {
3355:         B->ops->mult             = MatMult_SeqMAIJ_10;
3356:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3357:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3358:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3359:       } else if (dof == 11) {
3360:         B->ops->mult             = MatMult_SeqMAIJ_11;
3361:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3362:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3363:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3364:       } else if (dof == 16) {
3365:         B->ops->mult             = MatMult_SeqMAIJ_16;
3366:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3367:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3368:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3369:       } else if (dof == 18) {
3370:         B->ops->mult             = MatMult_SeqMAIJ_18;
3371:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3372:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3373:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3374:       } else {
3375:         B->ops->mult             = MatMult_SeqMAIJ_N;
3376:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3377:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3378:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3379:       }
3380:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3381:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3382:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3383:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3384:     } else {
3385:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3386:       Mat_MPIMAIJ *b;
3387:       IS          from,to;
3388:       Vec         gvec;

3390:       MatSetType(B,MATMPIMAIJ);

3392:       B->ops->setup   = NULL;
3393:       B->ops->destroy = MatDestroy_MPIMAIJ;
3394:       B->ops->view    = MatView_MPIMAIJ;

3396:       b      = (Mat_MPIMAIJ*)B->data;
3397:       b->dof = dof;
3398:       b->A   = A;

3400:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3401:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3403:       VecGetSize(mpiaij->lvec,&n);
3404:       VecCreate(PETSC_COMM_SELF,&b->w);
3405:       VecSetSizes(b->w,n*dof,n*dof);
3406:       VecSetBlockSize(b->w,dof);
3407:       VecSetType(b->w,VECSEQ);

3409:       /* create two temporary Index sets for build scatter gather */
3410:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3411:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3413:       /* create temporary global vector to generate scatter context */
3414:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3416:       /* generate the scatter context */
3417:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3419:       ISDestroy(&from);
3420:       ISDestroy(&to);
3421:       VecDestroy(&gvec);

3423:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3424:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3425:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3426:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3428:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3429:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3430:     }
3431:     B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3432:     MatSetUp(B);
3433:     *maij = B;
3434:     MatViewFromOptions(B,NULL,"-mat_view");
3435:   }
3436:   return(0);
3437: }