Actual source code: maij.c


  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19: #include <../src/mat/impls/maij/maij.h>
 20: #include <../src/mat/utils/freespace.h>

 22: /*@
 23:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 25:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 27:    Input Parameter:
 28: .  A - the MAIJ matrix

 30:    Output Parameter:
 31: .  B - the AIJ matrix

 33:    Level: advanced

 35:    Notes:
 36:     The reference count on the AIJ matrix is not increased so you should not destroy it.

 38: .seealso: MatCreateMAIJ()
 39: @*/
 40: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 41: {
 42:   PetscBool      ismpimaij,isseqmaij;

 44:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 45:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 46:   if (ismpimaij) {
 47:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 49:     *B = b->A;
 50:   } else if (isseqmaij) {
 51:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 53:     *B = b->AIJ;
 54:   } else {
 55:     *B = A;
 56:   }
 57:   return 0;
 58: }

 60: /*@
 61:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 63:    Logically Collective

 65:    Input Parameters:
 66: +  A - the MAIJ matrix
 67: -  dof - the block size for the new matrix

 69:    Output Parameter:
 70: .  B - the new MAIJ matrix

 72:    Level: advanced

 74: .seealso: MatCreateMAIJ()
 75: @*/
 76: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 77: {
 78:   Mat            Aij = NULL;

 81:   MatMAIJGetAIJ(A,&Aij);
 82:   MatCreateMAIJ(Aij,dof,B);
 83:   return 0;
 84: }

 86: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 87: {
 88:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 90:   MatDestroy(&b->AIJ);
 91:   PetscFree(A->data);
 92:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
 93:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
 94:   return 0;
 95: }

 97: PetscErrorCode MatSetUp_MAIJ(Mat A)
 98: {
 99:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
100: }

102: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
103: {
104:   Mat            B;

106:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
107:   MatView(B,viewer);
108:   MatDestroy(&B);
109:   return 0;
110: }

112: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
113: {
114:   Mat            B;

116:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
117:   MatView(B,viewer);
118:   MatDestroy(&B);
119:   return 0;
120: }

122: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
123: {
124:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

126:   MatDestroy(&b->AIJ);
127:   MatDestroy(&b->OAIJ);
128:   MatDestroy(&b->A);
129:   VecScatterDestroy(&b->ctx);
130:   VecDestroy(&b->w);
131:   PetscFree(A->data);
132:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
133:   PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
134:   PetscObjectChangeTypeName((PetscObject)A,NULL);
135:   return 0;
136: }

138: /*MC
139:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
140:   multicomponent problems, interpolating or restricting each component the same way independently.
141:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

143:   Operations provided:
144: .vb
145:     MatMult
146:     MatMultTranspose
147:     MatMultAdd
148:     MatMultTransposeAdd
149: .ve

151:   Level: advanced

153: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
154: M*/

156: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
157: {
158:   Mat_MPIMAIJ    *b;
159:   PetscMPIInt    size;

161:   PetscNewLog(A,&b);
162:   A->data  = (void*)b;

164:   PetscMemzero(A->ops,sizeof(struct _MatOps));

166:   A->ops->setup = MatSetUp_MAIJ;

168:   b->AIJ  = NULL;
169:   b->dof  = 0;
170:   b->OAIJ = NULL;
171:   b->ctx  = NULL;
172:   b->w    = NULL;
173:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
174:   if (size == 1) {
175:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
176:   } else {
177:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
178:   }
179:   A->preallocated  = PETSC_TRUE;
180:   A->assembled     = PETSC_TRUE;
181:   return 0;
182: }

184: /* --------------------------------------------------------------------------------------*/
185: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
186: {
187:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
188:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
189:   const PetscScalar *x,*v;
190:   PetscScalar       *y, sum1, sum2;
191:   PetscInt          nonzerorow=0,n,i,jrow,j;
192:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

194:   VecGetArrayRead(xx,&x);
195:   VecGetArray(yy,&y);
196:   idx  = a->j;
197:   v    = a->a;
198:   ii   = a->i;

200:   for (i=0; i<m; i++) {
201:     jrow  = ii[i];
202:     n     = ii[i+1] - jrow;
203:     sum1  = 0.0;
204:     sum2  = 0.0;

206:     nonzerorow += (n>0);
207:     for (j=0; j<n; j++) {
208:       sum1 += v[jrow]*x[2*idx[jrow]];
209:       sum2 += v[jrow]*x[2*idx[jrow]+1];
210:       jrow++;
211:     }
212:     y[2*i]   = sum1;
213:     y[2*i+1] = sum2;
214:   }

216:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
217:   VecRestoreArrayRead(xx,&x);
218:   VecRestoreArray(yy,&y);
219:   return 0;
220: }

222: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
223: {
224:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
225:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
226:   const PetscScalar *x,*v;
227:   PetscScalar       *y,alpha1,alpha2;
228:   PetscInt          n,i;
229:   const PetscInt    m = b->AIJ->rmap->n,*idx;

231:   VecSet(yy,0.0);
232:   VecGetArrayRead(xx,&x);
233:   VecGetArray(yy,&y);

235:   for (i=0; i<m; i++) {
236:     idx    = a->j + a->i[i];
237:     v      = a->a + a->i[i];
238:     n      = a->i[i+1] - a->i[i];
239:     alpha1 = x[2*i];
240:     alpha2 = x[2*i+1];
241:     while (n-->0) {
242:       y[2*(*idx)]   += alpha1*(*v);
243:       y[2*(*idx)+1] += alpha2*(*v);
244:       idx++; v++;
245:     }
246:   }
247:   PetscLogFlops(4.0*a->nz);
248:   VecRestoreArrayRead(xx,&x);
249:   VecRestoreArray(yy,&y);
250:   return 0;
251: }

253: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
254: {
255:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
256:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
257:   const PetscScalar *x,*v;
258:   PetscScalar       *y,sum1, sum2;
259:   PetscInt          n,i,jrow,j;
260:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

262:   if (yy != zz) VecCopy(yy,zz);
263:   VecGetArrayRead(xx,&x);
264:   VecGetArray(zz,&y);
265:   idx  = a->j;
266:   v    = a->a;
267:   ii   = a->i;

269:   for (i=0; i<m; i++) {
270:     jrow = ii[i];
271:     n    = ii[i+1] - jrow;
272:     sum1 = 0.0;
273:     sum2 = 0.0;
274:     for (j=0; j<n; j++) {
275:       sum1 += v[jrow]*x[2*idx[jrow]];
276:       sum2 += v[jrow]*x[2*idx[jrow]+1];
277:       jrow++;
278:     }
279:     y[2*i]   += sum1;
280:     y[2*i+1] += sum2;
281:   }

283:   PetscLogFlops(4.0*a->nz);
284:   VecRestoreArrayRead(xx,&x);
285:   VecRestoreArray(zz,&y);
286:   return 0;
287: }
288: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
289: {
290:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
291:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
292:   const PetscScalar *x,*v;
293:   PetscScalar       *y,alpha1,alpha2;
294:   PetscInt          n,i;
295:   const PetscInt    m = b->AIJ->rmap->n,*idx;

297:   if (yy != zz) VecCopy(yy,zz);
298:   VecGetArrayRead(xx,&x);
299:   VecGetArray(zz,&y);

301:   for (i=0; i<m; i++) {
302:     idx    = a->j + a->i[i];
303:     v      = a->a + a->i[i];
304:     n      = a->i[i+1] - a->i[i];
305:     alpha1 = x[2*i];
306:     alpha2 = x[2*i+1];
307:     while (n-->0) {
308:       y[2*(*idx)]   += alpha1*(*v);
309:       y[2*(*idx)+1] += alpha2*(*v);
310:       idx++; v++;
311:     }
312:   }
313:   PetscLogFlops(4.0*a->nz);
314:   VecRestoreArrayRead(xx,&x);
315:   VecRestoreArray(zz,&y);
316:   return 0;
317: }
318: /* --------------------------------------------------------------------------------------*/
319: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
320: {
321:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
322:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
323:   const PetscScalar *x,*v;
324:   PetscScalar       *y,sum1, sum2, sum3;
325:   PetscInt          nonzerorow=0,n,i,jrow,j;
326:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

328:   VecGetArrayRead(xx,&x);
329:   VecGetArray(yy,&y);
330:   idx  = a->j;
331:   v    = a->a;
332:   ii   = a->i;

334:   for (i=0; i<m; i++) {
335:     jrow  = ii[i];
336:     n     = ii[i+1] - jrow;
337:     sum1  = 0.0;
338:     sum2  = 0.0;
339:     sum3  = 0.0;

341:     nonzerorow += (n>0);
342:     for (j=0; j<n; j++) {
343:       sum1 += v[jrow]*x[3*idx[jrow]];
344:       sum2 += v[jrow]*x[3*idx[jrow]+1];
345:       sum3 += v[jrow]*x[3*idx[jrow]+2];
346:       jrow++;
347:      }
348:     y[3*i]   = sum1;
349:     y[3*i+1] = sum2;
350:     y[3*i+2] = sum3;
351:   }

353:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
354:   VecRestoreArrayRead(xx,&x);
355:   VecRestoreArray(yy,&y);
356:   return 0;
357: }

359: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
360: {
361:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
362:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
363:   const PetscScalar *x,*v;
364:   PetscScalar       *y,alpha1,alpha2,alpha3;
365:   PetscInt          n,i;
366:   const PetscInt    m = b->AIJ->rmap->n,*idx;

368:   VecSet(yy,0.0);
369:   VecGetArrayRead(xx,&x);
370:   VecGetArray(yy,&y);

372:   for (i=0; i<m; i++) {
373:     idx    = a->j + a->i[i];
374:     v      = a->a + a->i[i];
375:     n      = a->i[i+1] - a->i[i];
376:     alpha1 = x[3*i];
377:     alpha2 = x[3*i+1];
378:     alpha3 = x[3*i+2];
379:     while (n-->0) {
380:       y[3*(*idx)]   += alpha1*(*v);
381:       y[3*(*idx)+1] += alpha2*(*v);
382:       y[3*(*idx)+2] += alpha3*(*v);
383:       idx++; v++;
384:     }
385:   }
386:   PetscLogFlops(6.0*a->nz);
387:   VecRestoreArrayRead(xx,&x);
388:   VecRestoreArray(yy,&y);
389:   return 0;
390: }

392: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
393: {
394:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
395:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
396:   const PetscScalar *x,*v;
397:   PetscScalar       *y,sum1, sum2, sum3;
398:   PetscInt          n,i,jrow,j;
399:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

401:   if (yy != zz) VecCopy(yy,zz);
402:   VecGetArrayRead(xx,&x);
403:   VecGetArray(zz,&y);
404:   idx  = a->j;
405:   v    = a->a;
406:   ii   = a->i;

408:   for (i=0; i<m; i++) {
409:     jrow = ii[i];
410:     n    = ii[i+1] - jrow;
411:     sum1 = 0.0;
412:     sum2 = 0.0;
413:     sum3 = 0.0;
414:     for (j=0; j<n; j++) {
415:       sum1 += v[jrow]*x[3*idx[jrow]];
416:       sum2 += v[jrow]*x[3*idx[jrow]+1];
417:       sum3 += v[jrow]*x[3*idx[jrow]+2];
418:       jrow++;
419:     }
420:     y[3*i]   += sum1;
421:     y[3*i+1] += sum2;
422:     y[3*i+2] += sum3;
423:   }

425:   PetscLogFlops(6.0*a->nz);
426:   VecRestoreArrayRead(xx,&x);
427:   VecRestoreArray(zz,&y);
428:   return 0;
429: }
430: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
431: {
432:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
433:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
434:   const PetscScalar *x,*v;
435:   PetscScalar       *y,alpha1,alpha2,alpha3;
436:   PetscInt          n,i;
437:   const PetscInt    m = b->AIJ->rmap->n,*idx;

439:   if (yy != zz) VecCopy(yy,zz);
440:   VecGetArrayRead(xx,&x);
441:   VecGetArray(zz,&y);
442:   for (i=0; i<m; i++) {
443:     idx    = a->j + a->i[i];
444:     v      = a->a + a->i[i];
445:     n      = a->i[i+1] - a->i[i];
446:     alpha1 = x[3*i];
447:     alpha2 = x[3*i+1];
448:     alpha3 = x[3*i+2];
449:     while (n-->0) {
450:       y[3*(*idx)]   += alpha1*(*v);
451:       y[3*(*idx)+1] += alpha2*(*v);
452:       y[3*(*idx)+2] += alpha3*(*v);
453:       idx++; v++;
454:     }
455:   }
456:   PetscLogFlops(6.0*a->nz);
457:   VecRestoreArrayRead(xx,&x);
458:   VecRestoreArray(zz,&y);
459:   return 0;
460: }

462: /* ------------------------------------------------------------------------------*/
463: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
464: {
465:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
466:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
467:   const PetscScalar *x,*v;
468:   PetscScalar       *y,sum1, sum2, sum3, sum4;
469:   PetscInt          nonzerorow=0,n,i,jrow,j;
470:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

472:   VecGetArrayRead(xx,&x);
473:   VecGetArray(yy,&y);
474:   idx  = a->j;
475:   v    = a->a;
476:   ii   = a->i;

478:   for (i=0; i<m; i++) {
479:     jrow        = ii[i];
480:     n           = ii[i+1] - jrow;
481:     sum1        = 0.0;
482:     sum2        = 0.0;
483:     sum3        = 0.0;
484:     sum4        = 0.0;
485:     nonzerorow += (n>0);
486:     for (j=0; j<n; j++) {
487:       sum1 += v[jrow]*x[4*idx[jrow]];
488:       sum2 += v[jrow]*x[4*idx[jrow]+1];
489:       sum3 += v[jrow]*x[4*idx[jrow]+2];
490:       sum4 += v[jrow]*x[4*idx[jrow]+3];
491:       jrow++;
492:     }
493:     y[4*i]   = sum1;
494:     y[4*i+1] = sum2;
495:     y[4*i+2] = sum3;
496:     y[4*i+3] = sum4;
497:   }

499:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
500:   VecRestoreArrayRead(xx,&x);
501:   VecRestoreArray(yy,&y);
502:   return 0;
503: }

505: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
506: {
507:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
508:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
509:   const PetscScalar *x,*v;
510:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
511:   PetscInt          n,i;
512:   const PetscInt    m = b->AIJ->rmap->n,*idx;

514:   VecSet(yy,0.0);
515:   VecGetArrayRead(xx,&x);
516:   VecGetArray(yy,&y);
517:   for (i=0; i<m; i++) {
518:     idx    = a->j + a->i[i];
519:     v      = a->a + a->i[i];
520:     n      = a->i[i+1] - a->i[i];
521:     alpha1 = x[4*i];
522:     alpha2 = x[4*i+1];
523:     alpha3 = x[4*i+2];
524:     alpha4 = x[4*i+3];
525:     while (n-->0) {
526:       y[4*(*idx)]   += alpha1*(*v);
527:       y[4*(*idx)+1] += alpha2*(*v);
528:       y[4*(*idx)+2] += alpha3*(*v);
529:       y[4*(*idx)+3] += alpha4*(*v);
530:       idx++; v++;
531:     }
532:   }
533:   PetscLogFlops(8.0*a->nz);
534:   VecRestoreArrayRead(xx,&x);
535:   VecRestoreArray(yy,&y);
536:   return 0;
537: }

539: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
540: {
541:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
542:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
543:   const PetscScalar *x,*v;
544:   PetscScalar       *y,sum1, sum2, sum3, sum4;
545:   PetscInt          n,i,jrow,j;
546:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

548:   if (yy != zz) VecCopy(yy,zz);
549:   VecGetArrayRead(xx,&x);
550:   VecGetArray(zz,&y);
551:   idx  = a->j;
552:   v    = a->a;
553:   ii   = a->i;

555:   for (i=0; i<m; i++) {
556:     jrow = ii[i];
557:     n    = ii[i+1] - jrow;
558:     sum1 = 0.0;
559:     sum2 = 0.0;
560:     sum3 = 0.0;
561:     sum4 = 0.0;
562:     for (j=0; j<n; j++) {
563:       sum1 += v[jrow]*x[4*idx[jrow]];
564:       sum2 += v[jrow]*x[4*idx[jrow]+1];
565:       sum3 += v[jrow]*x[4*idx[jrow]+2];
566:       sum4 += v[jrow]*x[4*idx[jrow]+3];
567:       jrow++;
568:     }
569:     y[4*i]   += sum1;
570:     y[4*i+1] += sum2;
571:     y[4*i+2] += sum3;
572:     y[4*i+3] += sum4;
573:   }

575:   PetscLogFlops(8.0*a->nz);
576:   VecRestoreArrayRead(xx,&x);
577:   VecRestoreArray(zz,&y);
578:   return 0;
579: }
580: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
581: {
582:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
583:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
584:   const PetscScalar *x,*v;
585:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
586:   PetscInt          n,i;
587:   const PetscInt    m = b->AIJ->rmap->n,*idx;

589:   if (yy != zz) VecCopy(yy,zz);
590:   VecGetArrayRead(xx,&x);
591:   VecGetArray(zz,&y);

593:   for (i=0; i<m; i++) {
594:     idx    = a->j + a->i[i];
595:     v      = a->a + a->i[i];
596:     n      = a->i[i+1] - a->i[i];
597:     alpha1 = x[4*i];
598:     alpha2 = x[4*i+1];
599:     alpha3 = x[4*i+2];
600:     alpha4 = x[4*i+3];
601:     while (n-->0) {
602:       y[4*(*idx)]   += alpha1*(*v);
603:       y[4*(*idx)+1] += alpha2*(*v);
604:       y[4*(*idx)+2] += alpha3*(*v);
605:       y[4*(*idx)+3] += alpha4*(*v);
606:       idx++; v++;
607:     }
608:   }
609:   PetscLogFlops(8.0*a->nz);
610:   VecRestoreArrayRead(xx,&x);
611:   VecRestoreArray(zz,&y);
612:   return 0;
613: }
614: /* ------------------------------------------------------------------------------*/

616: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
617: {
618:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
619:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
620:   const PetscScalar *x,*v;
621:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
622:   PetscInt          nonzerorow=0,n,i,jrow,j;
623:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

625:   VecGetArrayRead(xx,&x);
626:   VecGetArray(yy,&y);
627:   idx  = a->j;
628:   v    = a->a;
629:   ii   = a->i;

631:   for (i=0; i<m; i++) {
632:     jrow  = ii[i];
633:     n     = ii[i+1] - jrow;
634:     sum1  = 0.0;
635:     sum2  = 0.0;
636:     sum3  = 0.0;
637:     sum4  = 0.0;
638:     sum5  = 0.0;

640:     nonzerorow += (n>0);
641:     for (j=0; j<n; j++) {
642:       sum1 += v[jrow]*x[5*idx[jrow]];
643:       sum2 += v[jrow]*x[5*idx[jrow]+1];
644:       sum3 += v[jrow]*x[5*idx[jrow]+2];
645:       sum4 += v[jrow]*x[5*idx[jrow]+3];
646:       sum5 += v[jrow]*x[5*idx[jrow]+4];
647:       jrow++;
648:     }
649:     y[5*i]   = sum1;
650:     y[5*i+1] = sum2;
651:     y[5*i+2] = sum3;
652:     y[5*i+3] = sum4;
653:     y[5*i+4] = sum5;
654:   }

656:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
657:   VecRestoreArrayRead(xx,&x);
658:   VecRestoreArray(yy,&y);
659:   return 0;
660: }

662: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
663: {
664:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
665:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
666:   const PetscScalar *x,*v;
667:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
668:   PetscInt          n,i;
669:   const PetscInt    m = b->AIJ->rmap->n,*idx;

671:   VecSet(yy,0.0);
672:   VecGetArrayRead(xx,&x);
673:   VecGetArray(yy,&y);

675:   for (i=0; i<m; i++) {
676:     idx    = a->j + a->i[i];
677:     v      = a->a + a->i[i];
678:     n      = a->i[i+1] - a->i[i];
679:     alpha1 = x[5*i];
680:     alpha2 = x[5*i+1];
681:     alpha3 = x[5*i+2];
682:     alpha4 = x[5*i+3];
683:     alpha5 = x[5*i+4];
684:     while (n-->0) {
685:       y[5*(*idx)]   += alpha1*(*v);
686:       y[5*(*idx)+1] += alpha2*(*v);
687:       y[5*(*idx)+2] += alpha3*(*v);
688:       y[5*(*idx)+3] += alpha4*(*v);
689:       y[5*(*idx)+4] += alpha5*(*v);
690:       idx++; v++;
691:     }
692:   }
693:   PetscLogFlops(10.0*a->nz);
694:   VecRestoreArrayRead(xx,&x);
695:   VecRestoreArray(yy,&y);
696:   return 0;
697: }

699: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
700: {
701:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
702:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
703:   const PetscScalar *x,*v;
704:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
705:   PetscInt          n,i,jrow,j;
706:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

708:   if (yy != zz) VecCopy(yy,zz);
709:   VecGetArrayRead(xx,&x);
710:   VecGetArray(zz,&y);
711:   idx  = a->j;
712:   v    = a->a;
713:   ii   = a->i;

715:   for (i=0; i<m; i++) {
716:     jrow = ii[i];
717:     n    = ii[i+1] - jrow;
718:     sum1 = 0.0;
719:     sum2 = 0.0;
720:     sum3 = 0.0;
721:     sum4 = 0.0;
722:     sum5 = 0.0;
723:     for (j=0; j<n; j++) {
724:       sum1 += v[jrow]*x[5*idx[jrow]];
725:       sum2 += v[jrow]*x[5*idx[jrow]+1];
726:       sum3 += v[jrow]*x[5*idx[jrow]+2];
727:       sum4 += v[jrow]*x[5*idx[jrow]+3];
728:       sum5 += v[jrow]*x[5*idx[jrow]+4];
729:       jrow++;
730:     }
731:     y[5*i]   += sum1;
732:     y[5*i+1] += sum2;
733:     y[5*i+2] += sum3;
734:     y[5*i+3] += sum4;
735:     y[5*i+4] += sum5;
736:   }

738:   PetscLogFlops(10.0*a->nz);
739:   VecRestoreArrayRead(xx,&x);
740:   VecRestoreArray(zz,&y);
741:   return 0;
742: }

744: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
745: {
746:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
747:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
748:   const PetscScalar *x,*v;
749:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
750:   PetscInt          n,i;
751:   const PetscInt    m = b->AIJ->rmap->n,*idx;

753:   if (yy != zz) VecCopy(yy,zz);
754:   VecGetArrayRead(xx,&x);
755:   VecGetArray(zz,&y);

757:   for (i=0; i<m; i++) {
758:     idx    = a->j + a->i[i];
759:     v      = a->a + a->i[i];
760:     n      = a->i[i+1] - a->i[i];
761:     alpha1 = x[5*i];
762:     alpha2 = x[5*i+1];
763:     alpha3 = x[5*i+2];
764:     alpha4 = x[5*i+3];
765:     alpha5 = x[5*i+4];
766:     while (n-->0) {
767:       y[5*(*idx)]   += alpha1*(*v);
768:       y[5*(*idx)+1] += alpha2*(*v);
769:       y[5*(*idx)+2] += alpha3*(*v);
770:       y[5*(*idx)+3] += alpha4*(*v);
771:       y[5*(*idx)+4] += alpha5*(*v);
772:       idx++; v++;
773:     }
774:   }
775:   PetscLogFlops(10.0*a->nz);
776:   VecRestoreArrayRead(xx,&x);
777:   VecRestoreArray(zz,&y);
778:   return 0;
779: }

781: /* ------------------------------------------------------------------------------*/
782: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
783: {
784:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
785:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
786:   const PetscScalar *x,*v;
787:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
788:   PetscInt          nonzerorow=0,n,i,jrow,j;
789:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

791:   VecGetArrayRead(xx,&x);
792:   VecGetArray(yy,&y);
793:   idx  = a->j;
794:   v    = a->a;
795:   ii   = a->i;

797:   for (i=0; i<m; i++) {
798:     jrow  = ii[i];
799:     n     = ii[i+1] - jrow;
800:     sum1  = 0.0;
801:     sum2  = 0.0;
802:     sum3  = 0.0;
803:     sum4  = 0.0;
804:     sum5  = 0.0;
805:     sum6  = 0.0;

807:     nonzerorow += (n>0);
808:     for (j=0; j<n; j++) {
809:       sum1 += v[jrow]*x[6*idx[jrow]];
810:       sum2 += v[jrow]*x[6*idx[jrow]+1];
811:       sum3 += v[jrow]*x[6*idx[jrow]+2];
812:       sum4 += v[jrow]*x[6*idx[jrow]+3];
813:       sum5 += v[jrow]*x[6*idx[jrow]+4];
814:       sum6 += v[jrow]*x[6*idx[jrow]+5];
815:       jrow++;
816:     }
817:     y[6*i]   = sum1;
818:     y[6*i+1] = sum2;
819:     y[6*i+2] = sum3;
820:     y[6*i+3] = sum4;
821:     y[6*i+4] = sum5;
822:     y[6*i+5] = sum6;
823:   }

825:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
826:   VecRestoreArrayRead(xx,&x);
827:   VecRestoreArray(yy,&y);
828:   return 0;
829: }

831: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
832: {
833:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
834:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
835:   const PetscScalar *x,*v;
836:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
837:   PetscInt          n,i;
838:   const PetscInt    m = b->AIJ->rmap->n,*idx;

840:   VecSet(yy,0.0);
841:   VecGetArrayRead(xx,&x);
842:   VecGetArray(yy,&y);

844:   for (i=0; i<m; i++) {
845:     idx    = a->j + a->i[i];
846:     v      = a->a + a->i[i];
847:     n      = a->i[i+1] - a->i[i];
848:     alpha1 = x[6*i];
849:     alpha2 = x[6*i+1];
850:     alpha3 = x[6*i+2];
851:     alpha4 = x[6*i+3];
852:     alpha5 = x[6*i+4];
853:     alpha6 = x[6*i+5];
854:     while (n-->0) {
855:       y[6*(*idx)]   += alpha1*(*v);
856:       y[6*(*idx)+1] += alpha2*(*v);
857:       y[6*(*idx)+2] += alpha3*(*v);
858:       y[6*(*idx)+3] += alpha4*(*v);
859:       y[6*(*idx)+4] += alpha5*(*v);
860:       y[6*(*idx)+5] += alpha6*(*v);
861:       idx++; v++;
862:     }
863:   }
864:   PetscLogFlops(12.0*a->nz);
865:   VecRestoreArrayRead(xx,&x);
866:   VecRestoreArray(yy,&y);
867:   return 0;
868: }

870: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
871: {
872:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
873:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
874:   const PetscScalar *x,*v;
875:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
876:   PetscInt          n,i,jrow,j;
877:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

879:   if (yy != zz) VecCopy(yy,zz);
880:   VecGetArrayRead(xx,&x);
881:   VecGetArray(zz,&y);
882:   idx  = a->j;
883:   v    = a->a;
884:   ii   = a->i;

886:   for (i=0; i<m; i++) {
887:     jrow = ii[i];
888:     n    = ii[i+1] - jrow;
889:     sum1 = 0.0;
890:     sum2 = 0.0;
891:     sum3 = 0.0;
892:     sum4 = 0.0;
893:     sum5 = 0.0;
894:     sum6 = 0.0;
895:     for (j=0; j<n; j++) {
896:       sum1 += v[jrow]*x[6*idx[jrow]];
897:       sum2 += v[jrow]*x[6*idx[jrow]+1];
898:       sum3 += v[jrow]*x[6*idx[jrow]+2];
899:       sum4 += v[jrow]*x[6*idx[jrow]+3];
900:       sum5 += v[jrow]*x[6*idx[jrow]+4];
901:       sum6 += v[jrow]*x[6*idx[jrow]+5];
902:       jrow++;
903:     }
904:     y[6*i]   += sum1;
905:     y[6*i+1] += sum2;
906:     y[6*i+2] += sum3;
907:     y[6*i+3] += sum4;
908:     y[6*i+4] += sum5;
909:     y[6*i+5] += sum6;
910:   }

912:   PetscLogFlops(12.0*a->nz);
913:   VecRestoreArrayRead(xx,&x);
914:   VecRestoreArray(zz,&y);
915:   return 0;
916: }

918: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
919: {
920:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
921:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
922:   const PetscScalar *x,*v;
923:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
924:   PetscInt          n,i;
925:   const PetscInt    m = b->AIJ->rmap->n,*idx;

927:   if (yy != zz) VecCopy(yy,zz);
928:   VecGetArrayRead(xx,&x);
929:   VecGetArray(zz,&y);

931:   for (i=0; i<m; i++) {
932:     idx    = a->j + a->i[i];
933:     v      = a->a + a->i[i];
934:     n      = a->i[i+1] - a->i[i];
935:     alpha1 = x[6*i];
936:     alpha2 = x[6*i+1];
937:     alpha3 = x[6*i+2];
938:     alpha4 = x[6*i+3];
939:     alpha5 = x[6*i+4];
940:     alpha6 = x[6*i+5];
941:     while (n-->0) {
942:       y[6*(*idx)]   += alpha1*(*v);
943:       y[6*(*idx)+1] += alpha2*(*v);
944:       y[6*(*idx)+2] += alpha3*(*v);
945:       y[6*(*idx)+3] += alpha4*(*v);
946:       y[6*(*idx)+4] += alpha5*(*v);
947:       y[6*(*idx)+5] += alpha6*(*v);
948:       idx++; v++;
949:     }
950:   }
951:   PetscLogFlops(12.0*a->nz);
952:   VecRestoreArrayRead(xx,&x);
953:   VecRestoreArray(zz,&y);
954:   return 0;
955: }

957: /* ------------------------------------------------------------------------------*/
958: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
959: {
960:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
961:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
962:   const PetscScalar *x,*v;
963:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
964:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
965:   PetscInt          nonzerorow=0,n,i,jrow,j;

967:   VecGetArrayRead(xx,&x);
968:   VecGetArray(yy,&y);
969:   idx  = a->j;
970:   v    = a->a;
971:   ii   = a->i;

973:   for (i=0; i<m; i++) {
974:     jrow  = ii[i];
975:     n     = ii[i+1] - jrow;
976:     sum1  = 0.0;
977:     sum2  = 0.0;
978:     sum3  = 0.0;
979:     sum4  = 0.0;
980:     sum5  = 0.0;
981:     sum6  = 0.0;
982:     sum7  = 0.0;

984:     nonzerorow += (n>0);
985:     for (j=0; j<n; j++) {
986:       sum1 += v[jrow]*x[7*idx[jrow]];
987:       sum2 += v[jrow]*x[7*idx[jrow]+1];
988:       sum3 += v[jrow]*x[7*idx[jrow]+2];
989:       sum4 += v[jrow]*x[7*idx[jrow]+3];
990:       sum5 += v[jrow]*x[7*idx[jrow]+4];
991:       sum6 += v[jrow]*x[7*idx[jrow]+5];
992:       sum7 += v[jrow]*x[7*idx[jrow]+6];
993:       jrow++;
994:     }
995:     y[7*i]   = sum1;
996:     y[7*i+1] = sum2;
997:     y[7*i+2] = sum3;
998:     y[7*i+3] = sum4;
999:     y[7*i+4] = sum5;
1000:     y[7*i+5] = sum6;
1001:     y[7*i+6] = sum7;
1002:   }

1004:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1005:   VecRestoreArrayRead(xx,&x);
1006:   VecRestoreArray(yy,&y);
1007:   return 0;
1008: }

1010: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1011: {
1012:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1013:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1014:   const PetscScalar *x,*v;
1015:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1016:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1017:   PetscInt          n,i;

1019:   VecSet(yy,0.0);
1020:   VecGetArrayRead(xx,&x);
1021:   VecGetArray(yy,&y);

1023:   for (i=0; i<m; i++) {
1024:     idx    = a->j + a->i[i];
1025:     v      = a->a + a->i[i];
1026:     n      = a->i[i+1] - a->i[i];
1027:     alpha1 = x[7*i];
1028:     alpha2 = x[7*i+1];
1029:     alpha3 = x[7*i+2];
1030:     alpha4 = x[7*i+3];
1031:     alpha5 = x[7*i+4];
1032:     alpha6 = x[7*i+5];
1033:     alpha7 = x[7*i+6];
1034:     while (n-->0) {
1035:       y[7*(*idx)]   += alpha1*(*v);
1036:       y[7*(*idx)+1] += alpha2*(*v);
1037:       y[7*(*idx)+2] += alpha3*(*v);
1038:       y[7*(*idx)+3] += alpha4*(*v);
1039:       y[7*(*idx)+4] += alpha5*(*v);
1040:       y[7*(*idx)+5] += alpha6*(*v);
1041:       y[7*(*idx)+6] += alpha7*(*v);
1042:       idx++; v++;
1043:     }
1044:   }
1045:   PetscLogFlops(14.0*a->nz);
1046:   VecRestoreArrayRead(xx,&x);
1047:   VecRestoreArray(yy,&y);
1048:   return 0;
1049: }

1051: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1052: {
1053:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1054:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1055:   const PetscScalar *x,*v;
1056:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1057:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1058:   PetscInt          n,i,jrow,j;

1060:   if (yy != zz) VecCopy(yy,zz);
1061:   VecGetArrayRead(xx,&x);
1062:   VecGetArray(zz,&y);
1063:   idx  = a->j;
1064:   v    = a->a;
1065:   ii   = a->i;

1067:   for (i=0; i<m; i++) {
1068:     jrow = ii[i];
1069:     n    = ii[i+1] - jrow;
1070:     sum1 = 0.0;
1071:     sum2 = 0.0;
1072:     sum3 = 0.0;
1073:     sum4 = 0.0;
1074:     sum5 = 0.0;
1075:     sum6 = 0.0;
1076:     sum7 = 0.0;
1077:     for (j=0; j<n; j++) {
1078:       sum1 += v[jrow]*x[7*idx[jrow]];
1079:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1080:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1081:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1082:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1083:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1084:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1085:       jrow++;
1086:     }
1087:     y[7*i]   += sum1;
1088:     y[7*i+1] += sum2;
1089:     y[7*i+2] += sum3;
1090:     y[7*i+3] += sum4;
1091:     y[7*i+4] += sum5;
1092:     y[7*i+5] += sum6;
1093:     y[7*i+6] += sum7;
1094:   }

1096:   PetscLogFlops(14.0*a->nz);
1097:   VecRestoreArrayRead(xx,&x);
1098:   VecRestoreArray(zz,&y);
1099:   return 0;
1100: }

1102: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1103: {
1104:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1105:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1106:   const PetscScalar *x,*v;
1107:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1108:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1109:   PetscInt          n,i;

1111:   if (yy != zz) VecCopy(yy,zz);
1112:   VecGetArrayRead(xx,&x);
1113:   VecGetArray(zz,&y);
1114:   for (i=0; i<m; i++) {
1115:     idx    = a->j + a->i[i];
1116:     v      = a->a + a->i[i];
1117:     n      = a->i[i+1] - a->i[i];
1118:     alpha1 = x[7*i];
1119:     alpha2 = x[7*i+1];
1120:     alpha3 = x[7*i+2];
1121:     alpha4 = x[7*i+3];
1122:     alpha5 = x[7*i+4];
1123:     alpha6 = x[7*i+5];
1124:     alpha7 = x[7*i+6];
1125:     while (n-->0) {
1126:       y[7*(*idx)]   += alpha1*(*v);
1127:       y[7*(*idx)+1] += alpha2*(*v);
1128:       y[7*(*idx)+2] += alpha3*(*v);
1129:       y[7*(*idx)+3] += alpha4*(*v);
1130:       y[7*(*idx)+4] += alpha5*(*v);
1131:       y[7*(*idx)+5] += alpha6*(*v);
1132:       y[7*(*idx)+6] += alpha7*(*v);
1133:       idx++; v++;
1134:     }
1135:   }
1136:   PetscLogFlops(14.0*a->nz);
1137:   VecRestoreArrayRead(xx,&x);
1138:   VecRestoreArray(zz,&y);
1139:   return 0;
1140: }

1142: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1143: {
1144:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1145:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1146:   const PetscScalar *x,*v;
1147:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1148:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1149:   PetscInt          nonzerorow=0,n,i,jrow,j;

1151:   VecGetArrayRead(xx,&x);
1152:   VecGetArray(yy,&y);
1153:   idx  = a->j;
1154:   v    = a->a;
1155:   ii   = a->i;

1157:   for (i=0; i<m; i++) {
1158:     jrow  = ii[i];
1159:     n     = ii[i+1] - jrow;
1160:     sum1  = 0.0;
1161:     sum2  = 0.0;
1162:     sum3  = 0.0;
1163:     sum4  = 0.0;
1164:     sum5  = 0.0;
1165:     sum6  = 0.0;
1166:     sum7  = 0.0;
1167:     sum8  = 0.0;

1169:     nonzerorow += (n>0);
1170:     for (j=0; j<n; j++) {
1171:       sum1 += v[jrow]*x[8*idx[jrow]];
1172:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1173:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1174:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1175:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1176:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1177:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1178:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1179:       jrow++;
1180:     }
1181:     y[8*i]   = sum1;
1182:     y[8*i+1] = sum2;
1183:     y[8*i+2] = sum3;
1184:     y[8*i+3] = sum4;
1185:     y[8*i+4] = sum5;
1186:     y[8*i+5] = sum6;
1187:     y[8*i+6] = sum7;
1188:     y[8*i+7] = sum8;
1189:   }

1191:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1192:   VecRestoreArrayRead(xx,&x);
1193:   VecRestoreArray(yy,&y);
1194:   return 0;
1195: }

1197: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1198: {
1199:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1200:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1201:   const PetscScalar *x,*v;
1202:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1203:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1204:   PetscInt          n,i;

1206:   VecSet(yy,0.0);
1207:   VecGetArrayRead(xx,&x);
1208:   VecGetArray(yy,&y);

1210:   for (i=0; i<m; i++) {
1211:     idx    = a->j + a->i[i];
1212:     v      = a->a + a->i[i];
1213:     n      = a->i[i+1] - a->i[i];
1214:     alpha1 = x[8*i];
1215:     alpha2 = x[8*i+1];
1216:     alpha3 = x[8*i+2];
1217:     alpha4 = x[8*i+3];
1218:     alpha5 = x[8*i+4];
1219:     alpha6 = x[8*i+5];
1220:     alpha7 = x[8*i+6];
1221:     alpha8 = x[8*i+7];
1222:     while (n-->0) {
1223:       y[8*(*idx)]   += alpha1*(*v);
1224:       y[8*(*idx)+1] += alpha2*(*v);
1225:       y[8*(*idx)+2] += alpha3*(*v);
1226:       y[8*(*idx)+3] += alpha4*(*v);
1227:       y[8*(*idx)+4] += alpha5*(*v);
1228:       y[8*(*idx)+5] += alpha6*(*v);
1229:       y[8*(*idx)+6] += alpha7*(*v);
1230:       y[8*(*idx)+7] += alpha8*(*v);
1231:       idx++; v++;
1232:     }
1233:   }
1234:   PetscLogFlops(16.0*a->nz);
1235:   VecRestoreArrayRead(xx,&x);
1236:   VecRestoreArray(yy,&y);
1237:   return 0;
1238: }

1240: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1241: {
1242:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1243:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1244:   const PetscScalar *x,*v;
1245:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1246:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1247:   PetscInt          n,i,jrow,j;

1249:   if (yy != zz) VecCopy(yy,zz);
1250:   VecGetArrayRead(xx,&x);
1251:   VecGetArray(zz,&y);
1252:   idx  = a->j;
1253:   v    = a->a;
1254:   ii   = a->i;

1256:   for (i=0; i<m; i++) {
1257:     jrow = ii[i];
1258:     n    = ii[i+1] - jrow;
1259:     sum1 = 0.0;
1260:     sum2 = 0.0;
1261:     sum3 = 0.0;
1262:     sum4 = 0.0;
1263:     sum5 = 0.0;
1264:     sum6 = 0.0;
1265:     sum7 = 0.0;
1266:     sum8 = 0.0;
1267:     for (j=0; j<n; j++) {
1268:       sum1 += v[jrow]*x[8*idx[jrow]];
1269:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1270:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1271:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1272:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1273:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1274:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1275:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1276:       jrow++;
1277:     }
1278:     y[8*i]   += sum1;
1279:     y[8*i+1] += sum2;
1280:     y[8*i+2] += sum3;
1281:     y[8*i+3] += sum4;
1282:     y[8*i+4] += sum5;
1283:     y[8*i+5] += sum6;
1284:     y[8*i+6] += sum7;
1285:     y[8*i+7] += sum8;
1286:   }

1288:   PetscLogFlops(16.0*a->nz);
1289:   VecRestoreArrayRead(xx,&x);
1290:   VecRestoreArray(zz,&y);
1291:   return 0;
1292: }

1294: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1295: {
1296:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1297:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1298:   const PetscScalar *x,*v;
1299:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1300:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1301:   PetscInt          n,i;

1303:   if (yy != zz) VecCopy(yy,zz);
1304:   VecGetArrayRead(xx,&x);
1305:   VecGetArray(zz,&y);
1306:   for (i=0; i<m; i++) {
1307:     idx    = a->j + a->i[i];
1308:     v      = a->a + a->i[i];
1309:     n      = a->i[i+1] - a->i[i];
1310:     alpha1 = x[8*i];
1311:     alpha2 = x[8*i+1];
1312:     alpha3 = x[8*i+2];
1313:     alpha4 = x[8*i+3];
1314:     alpha5 = x[8*i+4];
1315:     alpha6 = x[8*i+5];
1316:     alpha7 = x[8*i+6];
1317:     alpha8 = x[8*i+7];
1318:     while (n-->0) {
1319:       y[8*(*idx)]   += alpha1*(*v);
1320:       y[8*(*idx)+1] += alpha2*(*v);
1321:       y[8*(*idx)+2] += alpha3*(*v);
1322:       y[8*(*idx)+3] += alpha4*(*v);
1323:       y[8*(*idx)+4] += alpha5*(*v);
1324:       y[8*(*idx)+5] += alpha6*(*v);
1325:       y[8*(*idx)+6] += alpha7*(*v);
1326:       y[8*(*idx)+7] += alpha8*(*v);
1327:       idx++; v++;
1328:     }
1329:   }
1330:   PetscLogFlops(16.0*a->nz);
1331:   VecRestoreArrayRead(xx,&x);
1332:   VecRestoreArray(zz,&y);
1333:   return 0;
1334: }

1336: /* ------------------------------------------------------------------------------*/
1337: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1338: {
1339:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1340:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1341:   const PetscScalar *x,*v;
1342:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1343:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1344:   PetscInt          nonzerorow=0,n,i,jrow,j;

1346:   VecGetArrayRead(xx,&x);
1347:   VecGetArray(yy,&y);
1348:   idx  = a->j;
1349:   v    = a->a;
1350:   ii   = a->i;

1352:   for (i=0; i<m; i++) {
1353:     jrow  = ii[i];
1354:     n     = ii[i+1] - jrow;
1355:     sum1  = 0.0;
1356:     sum2  = 0.0;
1357:     sum3  = 0.0;
1358:     sum4  = 0.0;
1359:     sum5  = 0.0;
1360:     sum6  = 0.0;
1361:     sum7  = 0.0;
1362:     sum8  = 0.0;
1363:     sum9  = 0.0;

1365:     nonzerorow += (n>0);
1366:     for (j=0; j<n; j++) {
1367:       sum1 += v[jrow]*x[9*idx[jrow]];
1368:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1369:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1370:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1371:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1372:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1373:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1374:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1375:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1376:       jrow++;
1377:     }
1378:     y[9*i]   = sum1;
1379:     y[9*i+1] = sum2;
1380:     y[9*i+2] = sum3;
1381:     y[9*i+3] = sum4;
1382:     y[9*i+4] = sum5;
1383:     y[9*i+5] = sum6;
1384:     y[9*i+6] = sum7;
1385:     y[9*i+7] = sum8;
1386:     y[9*i+8] = sum9;
1387:   }

1389:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1390:   VecRestoreArrayRead(xx,&x);
1391:   VecRestoreArray(yy,&y);
1392:   return 0;
1393: }

1395: /* ------------------------------------------------------------------------------*/

1397: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1398: {
1399:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1400:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1401:   const PetscScalar *x,*v;
1402:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1403:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1404:   PetscInt          n,i;

1406:   VecSet(yy,0.0);
1407:   VecGetArrayRead(xx,&x);
1408:   VecGetArray(yy,&y);

1410:   for (i=0; i<m; i++) {
1411:     idx    = a->j + a->i[i];
1412:     v      = a->a + a->i[i];
1413:     n      = a->i[i+1] - a->i[i];
1414:     alpha1 = x[9*i];
1415:     alpha2 = x[9*i+1];
1416:     alpha3 = x[9*i+2];
1417:     alpha4 = x[9*i+3];
1418:     alpha5 = x[9*i+4];
1419:     alpha6 = x[9*i+5];
1420:     alpha7 = x[9*i+6];
1421:     alpha8 = x[9*i+7];
1422:     alpha9 = x[9*i+8];
1423:     while (n-->0) {
1424:       y[9*(*idx)]   += alpha1*(*v);
1425:       y[9*(*idx)+1] += alpha2*(*v);
1426:       y[9*(*idx)+2] += alpha3*(*v);
1427:       y[9*(*idx)+3] += alpha4*(*v);
1428:       y[9*(*idx)+4] += alpha5*(*v);
1429:       y[9*(*idx)+5] += alpha6*(*v);
1430:       y[9*(*idx)+6] += alpha7*(*v);
1431:       y[9*(*idx)+7] += alpha8*(*v);
1432:       y[9*(*idx)+8] += alpha9*(*v);
1433:       idx++; v++;
1434:     }
1435:   }
1436:   PetscLogFlops(18.0*a->nz);
1437:   VecRestoreArrayRead(xx,&x);
1438:   VecRestoreArray(yy,&y);
1439:   return 0;
1440: }

1442: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1443: {
1444:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1445:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1446:   const PetscScalar *x,*v;
1447:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1448:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1449:   PetscInt          n,i,jrow,j;

1451:   if (yy != zz) VecCopy(yy,zz);
1452:   VecGetArrayRead(xx,&x);
1453:   VecGetArray(zz,&y);
1454:   idx  = a->j;
1455:   v    = a->a;
1456:   ii   = a->i;

1458:   for (i=0; i<m; i++) {
1459:     jrow = ii[i];
1460:     n    = ii[i+1] - jrow;
1461:     sum1 = 0.0;
1462:     sum2 = 0.0;
1463:     sum3 = 0.0;
1464:     sum4 = 0.0;
1465:     sum5 = 0.0;
1466:     sum6 = 0.0;
1467:     sum7 = 0.0;
1468:     sum8 = 0.0;
1469:     sum9 = 0.0;
1470:     for (j=0; j<n; j++) {
1471:       sum1 += v[jrow]*x[9*idx[jrow]];
1472:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1473:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1474:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1475:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1476:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1477:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1478:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1479:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1480:       jrow++;
1481:     }
1482:     y[9*i]   += sum1;
1483:     y[9*i+1] += sum2;
1484:     y[9*i+2] += sum3;
1485:     y[9*i+3] += sum4;
1486:     y[9*i+4] += sum5;
1487:     y[9*i+5] += sum6;
1488:     y[9*i+6] += sum7;
1489:     y[9*i+7] += sum8;
1490:     y[9*i+8] += sum9;
1491:   }

1493:   PetscLogFlops(18.0*a->nz);
1494:   VecRestoreArrayRead(xx,&x);
1495:   VecRestoreArray(zz,&y);
1496:   return 0;
1497: }

1499: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1500: {
1501:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1502:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1503:   const PetscScalar *x,*v;
1504:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1505:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1506:   PetscInt          n,i;

1508:   if (yy != zz) VecCopy(yy,zz);
1509:   VecGetArrayRead(xx,&x);
1510:   VecGetArray(zz,&y);
1511:   for (i=0; i<m; i++) {
1512:     idx    = a->j + a->i[i];
1513:     v      = a->a + a->i[i];
1514:     n      = a->i[i+1] - a->i[i];
1515:     alpha1 = x[9*i];
1516:     alpha2 = x[9*i+1];
1517:     alpha3 = x[9*i+2];
1518:     alpha4 = x[9*i+3];
1519:     alpha5 = x[9*i+4];
1520:     alpha6 = x[9*i+5];
1521:     alpha7 = x[9*i+6];
1522:     alpha8 = x[9*i+7];
1523:     alpha9 = x[9*i+8];
1524:     while (n-->0) {
1525:       y[9*(*idx)]   += alpha1*(*v);
1526:       y[9*(*idx)+1] += alpha2*(*v);
1527:       y[9*(*idx)+2] += alpha3*(*v);
1528:       y[9*(*idx)+3] += alpha4*(*v);
1529:       y[9*(*idx)+4] += alpha5*(*v);
1530:       y[9*(*idx)+5] += alpha6*(*v);
1531:       y[9*(*idx)+6] += alpha7*(*v);
1532:       y[9*(*idx)+7] += alpha8*(*v);
1533:       y[9*(*idx)+8] += alpha9*(*v);
1534:       idx++; v++;
1535:     }
1536:   }
1537:   PetscLogFlops(18.0*a->nz);
1538:   VecRestoreArrayRead(xx,&x);
1539:   VecRestoreArray(zz,&y);
1540:   return 0;
1541: }
1542: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1543: {
1544:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1545:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1546:   const PetscScalar *x,*v;
1547:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1548:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1549:   PetscInt          nonzerorow=0,n,i,jrow,j;

1551:   VecGetArrayRead(xx,&x);
1552:   VecGetArray(yy,&y);
1553:   idx  = a->j;
1554:   v    = a->a;
1555:   ii   = a->i;

1557:   for (i=0; i<m; i++) {
1558:     jrow  = ii[i];
1559:     n     = ii[i+1] - jrow;
1560:     sum1  = 0.0;
1561:     sum2  = 0.0;
1562:     sum3  = 0.0;
1563:     sum4  = 0.0;
1564:     sum5  = 0.0;
1565:     sum6  = 0.0;
1566:     sum7  = 0.0;
1567:     sum8  = 0.0;
1568:     sum9  = 0.0;
1569:     sum10 = 0.0;

1571:     nonzerorow += (n>0);
1572:     for (j=0; j<n; j++) {
1573:       sum1  += v[jrow]*x[10*idx[jrow]];
1574:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1575:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1576:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1577:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1578:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1579:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1580:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1581:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1582:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1583:       jrow++;
1584:     }
1585:     y[10*i]   = sum1;
1586:     y[10*i+1] = sum2;
1587:     y[10*i+2] = sum3;
1588:     y[10*i+3] = sum4;
1589:     y[10*i+4] = sum5;
1590:     y[10*i+5] = sum6;
1591:     y[10*i+6] = sum7;
1592:     y[10*i+7] = sum8;
1593:     y[10*i+8] = sum9;
1594:     y[10*i+9] = sum10;
1595:   }

1597:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1598:   VecRestoreArrayRead(xx,&x);
1599:   VecRestoreArray(yy,&y);
1600:   return 0;
1601: }

1603: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1604: {
1605:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1606:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1607:   const PetscScalar *x,*v;
1608:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1609:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1610:   PetscInt          n,i,jrow,j;

1612:   if (yy != zz) VecCopy(yy,zz);
1613:   VecGetArrayRead(xx,&x);
1614:   VecGetArray(zz,&y);
1615:   idx  = a->j;
1616:   v    = a->a;
1617:   ii   = a->i;

1619:   for (i=0; i<m; i++) {
1620:     jrow  = ii[i];
1621:     n     = ii[i+1] - jrow;
1622:     sum1  = 0.0;
1623:     sum2  = 0.0;
1624:     sum3  = 0.0;
1625:     sum4  = 0.0;
1626:     sum5  = 0.0;
1627:     sum6  = 0.0;
1628:     sum7  = 0.0;
1629:     sum8  = 0.0;
1630:     sum9  = 0.0;
1631:     sum10 = 0.0;
1632:     for (j=0; j<n; j++) {
1633:       sum1  += v[jrow]*x[10*idx[jrow]];
1634:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1635:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1636:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1637:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1638:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1639:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1640:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1641:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1642:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1643:       jrow++;
1644:     }
1645:     y[10*i]   += sum1;
1646:     y[10*i+1] += sum2;
1647:     y[10*i+2] += sum3;
1648:     y[10*i+3] += sum4;
1649:     y[10*i+4] += sum5;
1650:     y[10*i+5] += sum6;
1651:     y[10*i+6] += sum7;
1652:     y[10*i+7] += sum8;
1653:     y[10*i+8] += sum9;
1654:     y[10*i+9] += sum10;
1655:   }

1657:   PetscLogFlops(20.0*a->nz);
1658:   VecRestoreArrayRead(xx,&x);
1659:   VecRestoreArray(yy,&y);
1660:   return 0;
1661: }

1663: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1664: {
1665:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1666:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1667:   const PetscScalar *x,*v;
1668:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1669:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1670:   PetscInt          n,i;

1672:   VecSet(yy,0.0);
1673:   VecGetArrayRead(xx,&x);
1674:   VecGetArray(yy,&y);

1676:   for (i=0; i<m; i++) {
1677:     idx     = a->j + a->i[i];
1678:     v       = a->a + a->i[i];
1679:     n       = a->i[i+1] - a->i[i];
1680:     alpha1  = x[10*i];
1681:     alpha2  = x[10*i+1];
1682:     alpha3  = x[10*i+2];
1683:     alpha4  = x[10*i+3];
1684:     alpha5  = x[10*i+4];
1685:     alpha6  = x[10*i+5];
1686:     alpha7  = x[10*i+6];
1687:     alpha8  = x[10*i+7];
1688:     alpha9  = x[10*i+8];
1689:     alpha10 = x[10*i+9];
1690:     while (n-->0) {
1691:       y[10*(*idx)]   += alpha1*(*v);
1692:       y[10*(*idx)+1] += alpha2*(*v);
1693:       y[10*(*idx)+2] += alpha3*(*v);
1694:       y[10*(*idx)+3] += alpha4*(*v);
1695:       y[10*(*idx)+4] += alpha5*(*v);
1696:       y[10*(*idx)+5] += alpha6*(*v);
1697:       y[10*(*idx)+6] += alpha7*(*v);
1698:       y[10*(*idx)+7] += alpha8*(*v);
1699:       y[10*(*idx)+8] += alpha9*(*v);
1700:       y[10*(*idx)+9] += alpha10*(*v);
1701:       idx++; v++;
1702:     }
1703:   }
1704:   PetscLogFlops(20.0*a->nz);
1705:   VecRestoreArrayRead(xx,&x);
1706:   VecRestoreArray(yy,&y);
1707:   return 0;
1708: }

1710: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1711: {
1712:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1713:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1714:   const PetscScalar *x,*v;
1715:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1716:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1717:   PetscInt          n,i;

1719:   if (yy != zz) VecCopy(yy,zz);
1720:   VecGetArrayRead(xx,&x);
1721:   VecGetArray(zz,&y);
1722:   for (i=0; i<m; i++) {
1723:     idx     = a->j + a->i[i];
1724:     v       = a->a + a->i[i];
1725:     n       = a->i[i+1] - a->i[i];
1726:     alpha1  = x[10*i];
1727:     alpha2  = x[10*i+1];
1728:     alpha3  = x[10*i+2];
1729:     alpha4  = x[10*i+3];
1730:     alpha5  = x[10*i+4];
1731:     alpha6  = x[10*i+5];
1732:     alpha7  = x[10*i+6];
1733:     alpha8  = x[10*i+7];
1734:     alpha9  = x[10*i+8];
1735:     alpha10 = x[10*i+9];
1736:     while (n-->0) {
1737:       y[10*(*idx)]   += alpha1*(*v);
1738:       y[10*(*idx)+1] += alpha2*(*v);
1739:       y[10*(*idx)+2] += alpha3*(*v);
1740:       y[10*(*idx)+3] += alpha4*(*v);
1741:       y[10*(*idx)+4] += alpha5*(*v);
1742:       y[10*(*idx)+5] += alpha6*(*v);
1743:       y[10*(*idx)+6] += alpha7*(*v);
1744:       y[10*(*idx)+7] += alpha8*(*v);
1745:       y[10*(*idx)+8] += alpha9*(*v);
1746:       y[10*(*idx)+9] += alpha10*(*v);
1747:       idx++; v++;
1748:     }
1749:   }
1750:   PetscLogFlops(20.0*a->nz);
1751:   VecRestoreArrayRead(xx,&x);
1752:   VecRestoreArray(zz,&y);
1753:   return 0;
1754: }

1756: /*--------------------------------------------------------------------------------------------*/
1757: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1758: {
1759:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1760:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1761:   const PetscScalar *x,*v;
1762:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1763:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1764:   PetscInt          nonzerorow=0,n,i,jrow,j;

1766:   VecGetArrayRead(xx,&x);
1767:   VecGetArray(yy,&y);
1768:   idx  = a->j;
1769:   v    = a->a;
1770:   ii   = a->i;

1772:   for (i=0; i<m; i++) {
1773:     jrow  = ii[i];
1774:     n     = ii[i+1] - jrow;
1775:     sum1  = 0.0;
1776:     sum2  = 0.0;
1777:     sum3  = 0.0;
1778:     sum4  = 0.0;
1779:     sum5  = 0.0;
1780:     sum6  = 0.0;
1781:     sum7  = 0.0;
1782:     sum8  = 0.0;
1783:     sum9  = 0.0;
1784:     sum10 = 0.0;
1785:     sum11 = 0.0;

1787:     nonzerorow += (n>0);
1788:     for (j=0; j<n; j++) {
1789:       sum1  += v[jrow]*x[11*idx[jrow]];
1790:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1791:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1792:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1793:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1794:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1795:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1796:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1797:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1798:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1799:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1800:       jrow++;
1801:     }
1802:     y[11*i]    = sum1;
1803:     y[11*i+1]  = sum2;
1804:     y[11*i+2]  = sum3;
1805:     y[11*i+3]  = sum4;
1806:     y[11*i+4]  = sum5;
1807:     y[11*i+5]  = sum6;
1808:     y[11*i+6]  = sum7;
1809:     y[11*i+7]  = sum8;
1810:     y[11*i+8]  = sum9;
1811:     y[11*i+9]  = sum10;
1812:     y[11*i+10] = sum11;
1813:   }

1815:   PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1816:   VecRestoreArrayRead(xx,&x);
1817:   VecRestoreArray(yy,&y);
1818:   return 0;
1819: }

1821: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1822: {
1823:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1824:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1825:   const PetscScalar *x,*v;
1826:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1827:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1828:   PetscInt          n,i,jrow,j;

1830:   if (yy != zz) VecCopy(yy,zz);
1831:   VecGetArrayRead(xx,&x);
1832:   VecGetArray(zz,&y);
1833:   idx  = a->j;
1834:   v    = a->a;
1835:   ii   = a->i;

1837:   for (i=0; i<m; i++) {
1838:     jrow  = ii[i];
1839:     n     = ii[i+1] - jrow;
1840:     sum1  = 0.0;
1841:     sum2  = 0.0;
1842:     sum3  = 0.0;
1843:     sum4  = 0.0;
1844:     sum5  = 0.0;
1845:     sum6  = 0.0;
1846:     sum7  = 0.0;
1847:     sum8  = 0.0;
1848:     sum9  = 0.0;
1849:     sum10 = 0.0;
1850:     sum11 = 0.0;
1851:     for (j=0; j<n; j++) {
1852:       sum1  += v[jrow]*x[11*idx[jrow]];
1853:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1854:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1855:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1856:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1857:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1858:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1859:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1860:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1861:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1862:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1863:       jrow++;
1864:     }
1865:     y[11*i]    += sum1;
1866:     y[11*i+1]  += sum2;
1867:     y[11*i+2]  += sum3;
1868:     y[11*i+3]  += sum4;
1869:     y[11*i+4]  += sum5;
1870:     y[11*i+5]  += sum6;
1871:     y[11*i+6]  += sum7;
1872:     y[11*i+7]  += sum8;
1873:     y[11*i+8]  += sum9;
1874:     y[11*i+9]  += sum10;
1875:     y[11*i+10] += sum11;
1876:   }

1878:   PetscLogFlops(22.0*a->nz);
1879:   VecRestoreArrayRead(xx,&x);
1880:   VecRestoreArray(yy,&y);
1881:   return 0;
1882: }

1884: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1885: {
1886:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1887:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1888:   const PetscScalar *x,*v;
1889:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1890:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1891:   PetscInt          n,i;

1893:   VecSet(yy,0.0);
1894:   VecGetArrayRead(xx,&x);
1895:   VecGetArray(yy,&y);

1897:   for (i=0; i<m; i++) {
1898:     idx     = a->j + a->i[i];
1899:     v       = a->a + a->i[i];
1900:     n       = a->i[i+1] - a->i[i];
1901:     alpha1  = x[11*i];
1902:     alpha2  = x[11*i+1];
1903:     alpha3  = x[11*i+2];
1904:     alpha4  = x[11*i+3];
1905:     alpha5  = x[11*i+4];
1906:     alpha6  = x[11*i+5];
1907:     alpha7  = x[11*i+6];
1908:     alpha8  = x[11*i+7];
1909:     alpha9  = x[11*i+8];
1910:     alpha10 = x[11*i+9];
1911:     alpha11 = x[11*i+10];
1912:     while (n-->0) {
1913:       y[11*(*idx)]    += alpha1*(*v);
1914:       y[11*(*idx)+1]  += alpha2*(*v);
1915:       y[11*(*idx)+2]  += alpha3*(*v);
1916:       y[11*(*idx)+3]  += alpha4*(*v);
1917:       y[11*(*idx)+4]  += alpha5*(*v);
1918:       y[11*(*idx)+5]  += alpha6*(*v);
1919:       y[11*(*idx)+6]  += alpha7*(*v);
1920:       y[11*(*idx)+7]  += alpha8*(*v);
1921:       y[11*(*idx)+8]  += alpha9*(*v);
1922:       y[11*(*idx)+9]  += alpha10*(*v);
1923:       y[11*(*idx)+10] += alpha11*(*v);
1924:       idx++; v++;
1925:     }
1926:   }
1927:   PetscLogFlops(22.0*a->nz);
1928:   VecRestoreArrayRead(xx,&x);
1929:   VecRestoreArray(yy,&y);
1930:   return 0;
1931: }

1933: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1934: {
1935:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1936:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1937:   const PetscScalar *x,*v;
1938:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1939:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1940:   PetscInt          n,i;

1942:   if (yy != zz) VecCopy(yy,zz);
1943:   VecGetArrayRead(xx,&x);
1944:   VecGetArray(zz,&y);
1945:   for (i=0; i<m; i++) {
1946:     idx     = a->j + a->i[i];
1947:     v       = a->a + a->i[i];
1948:     n       = a->i[i+1] - a->i[i];
1949:     alpha1  = x[11*i];
1950:     alpha2  = x[11*i+1];
1951:     alpha3  = x[11*i+2];
1952:     alpha4  = x[11*i+3];
1953:     alpha5  = x[11*i+4];
1954:     alpha6  = x[11*i+5];
1955:     alpha7  = x[11*i+6];
1956:     alpha8  = x[11*i+7];
1957:     alpha9  = x[11*i+8];
1958:     alpha10 = x[11*i+9];
1959:     alpha11 = x[11*i+10];
1960:     while (n-->0) {
1961:       y[11*(*idx)]    += alpha1*(*v);
1962:       y[11*(*idx)+1]  += alpha2*(*v);
1963:       y[11*(*idx)+2]  += alpha3*(*v);
1964:       y[11*(*idx)+3]  += alpha4*(*v);
1965:       y[11*(*idx)+4]  += alpha5*(*v);
1966:       y[11*(*idx)+5]  += alpha6*(*v);
1967:       y[11*(*idx)+6]  += alpha7*(*v);
1968:       y[11*(*idx)+7]  += alpha8*(*v);
1969:       y[11*(*idx)+8]  += alpha9*(*v);
1970:       y[11*(*idx)+9]  += alpha10*(*v);
1971:       y[11*(*idx)+10] += alpha11*(*v);
1972:       idx++; v++;
1973:     }
1974:   }
1975:   PetscLogFlops(22.0*a->nz);
1976:   VecRestoreArrayRead(xx,&x);
1977:   VecRestoreArray(zz,&y);
1978:   return 0;
1979: }

1981: /*--------------------------------------------------------------------------------------------*/
1982: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1983: {
1984:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1985:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1986:   const PetscScalar *x,*v;
1987:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1988:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1989:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1990:   PetscInt          nonzerorow=0,n,i,jrow,j;

1992:   VecGetArrayRead(xx,&x);
1993:   VecGetArray(yy,&y);
1994:   idx  = a->j;
1995:   v    = a->a;
1996:   ii   = a->i;

1998:   for (i=0; i<m; i++) {
1999:     jrow  = ii[i];
2000:     n     = ii[i+1] - jrow;
2001:     sum1  = 0.0;
2002:     sum2  = 0.0;
2003:     sum3  = 0.0;
2004:     sum4  = 0.0;
2005:     sum5  = 0.0;
2006:     sum6  = 0.0;
2007:     sum7  = 0.0;
2008:     sum8  = 0.0;
2009:     sum9  = 0.0;
2010:     sum10 = 0.0;
2011:     sum11 = 0.0;
2012:     sum12 = 0.0;
2013:     sum13 = 0.0;
2014:     sum14 = 0.0;
2015:     sum15 = 0.0;
2016:     sum16 = 0.0;

2018:     nonzerorow += (n>0);
2019:     for (j=0; j<n; j++) {
2020:       sum1  += v[jrow]*x[16*idx[jrow]];
2021:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2022:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2023:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2024:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2025:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2026:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2027:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2028:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2029:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2030:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2031:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2032:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2033:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2034:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2035:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2036:       jrow++;
2037:     }
2038:     y[16*i]    = sum1;
2039:     y[16*i+1]  = sum2;
2040:     y[16*i+2]  = sum3;
2041:     y[16*i+3]  = sum4;
2042:     y[16*i+4]  = sum5;
2043:     y[16*i+5]  = sum6;
2044:     y[16*i+6]  = sum7;
2045:     y[16*i+7]  = sum8;
2046:     y[16*i+8]  = sum9;
2047:     y[16*i+9]  = sum10;
2048:     y[16*i+10] = sum11;
2049:     y[16*i+11] = sum12;
2050:     y[16*i+12] = sum13;
2051:     y[16*i+13] = sum14;
2052:     y[16*i+14] = sum15;
2053:     y[16*i+15] = sum16;
2054:   }

2056:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2057:   VecRestoreArrayRead(xx,&x);
2058:   VecRestoreArray(yy,&y);
2059:   return 0;
2060: }

2062: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2063: {
2064:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2065:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2066:   const PetscScalar *x,*v;
2067:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2068:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2069:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2070:   PetscInt          n,i;

2072:   VecSet(yy,0.0);
2073:   VecGetArrayRead(xx,&x);
2074:   VecGetArray(yy,&y);

2076:   for (i=0; i<m; i++) {
2077:     idx     = a->j + a->i[i];
2078:     v       = a->a + a->i[i];
2079:     n       = a->i[i+1] - a->i[i];
2080:     alpha1  = x[16*i];
2081:     alpha2  = x[16*i+1];
2082:     alpha3  = x[16*i+2];
2083:     alpha4  = x[16*i+3];
2084:     alpha5  = x[16*i+4];
2085:     alpha6  = x[16*i+5];
2086:     alpha7  = x[16*i+6];
2087:     alpha8  = x[16*i+7];
2088:     alpha9  = x[16*i+8];
2089:     alpha10 = x[16*i+9];
2090:     alpha11 = x[16*i+10];
2091:     alpha12 = x[16*i+11];
2092:     alpha13 = x[16*i+12];
2093:     alpha14 = x[16*i+13];
2094:     alpha15 = x[16*i+14];
2095:     alpha16 = x[16*i+15];
2096:     while (n-->0) {
2097:       y[16*(*idx)]    += alpha1*(*v);
2098:       y[16*(*idx)+1]  += alpha2*(*v);
2099:       y[16*(*idx)+2]  += alpha3*(*v);
2100:       y[16*(*idx)+3]  += alpha4*(*v);
2101:       y[16*(*idx)+4]  += alpha5*(*v);
2102:       y[16*(*idx)+5]  += alpha6*(*v);
2103:       y[16*(*idx)+6]  += alpha7*(*v);
2104:       y[16*(*idx)+7]  += alpha8*(*v);
2105:       y[16*(*idx)+8]  += alpha9*(*v);
2106:       y[16*(*idx)+9]  += alpha10*(*v);
2107:       y[16*(*idx)+10] += alpha11*(*v);
2108:       y[16*(*idx)+11] += alpha12*(*v);
2109:       y[16*(*idx)+12] += alpha13*(*v);
2110:       y[16*(*idx)+13] += alpha14*(*v);
2111:       y[16*(*idx)+14] += alpha15*(*v);
2112:       y[16*(*idx)+15] += alpha16*(*v);
2113:       idx++; v++;
2114:     }
2115:   }
2116:   PetscLogFlops(32.0*a->nz);
2117:   VecRestoreArrayRead(xx,&x);
2118:   VecRestoreArray(yy,&y);
2119:   return 0;
2120: }

2122: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2123: {
2124:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2125:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2126:   const PetscScalar *x,*v;
2127:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2128:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2129:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2130:   PetscInt          n,i,jrow,j;

2132:   if (yy != zz) VecCopy(yy,zz);
2133:   VecGetArrayRead(xx,&x);
2134:   VecGetArray(zz,&y);
2135:   idx  = a->j;
2136:   v    = a->a;
2137:   ii   = a->i;

2139:   for (i=0; i<m; i++) {
2140:     jrow  = ii[i];
2141:     n     = ii[i+1] - jrow;
2142:     sum1  = 0.0;
2143:     sum2  = 0.0;
2144:     sum3  = 0.0;
2145:     sum4  = 0.0;
2146:     sum5  = 0.0;
2147:     sum6  = 0.0;
2148:     sum7  = 0.0;
2149:     sum8  = 0.0;
2150:     sum9  = 0.0;
2151:     sum10 = 0.0;
2152:     sum11 = 0.0;
2153:     sum12 = 0.0;
2154:     sum13 = 0.0;
2155:     sum14 = 0.0;
2156:     sum15 = 0.0;
2157:     sum16 = 0.0;
2158:     for (j=0; j<n; j++) {
2159:       sum1  += v[jrow]*x[16*idx[jrow]];
2160:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2161:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2162:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2163:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2164:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2165:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2166:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2167:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2168:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2169:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2170:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2171:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2172:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2173:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2174:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2175:       jrow++;
2176:     }
2177:     y[16*i]    += sum1;
2178:     y[16*i+1]  += sum2;
2179:     y[16*i+2]  += sum3;
2180:     y[16*i+3]  += sum4;
2181:     y[16*i+4]  += sum5;
2182:     y[16*i+5]  += sum6;
2183:     y[16*i+6]  += sum7;
2184:     y[16*i+7]  += sum8;
2185:     y[16*i+8]  += sum9;
2186:     y[16*i+9]  += sum10;
2187:     y[16*i+10] += sum11;
2188:     y[16*i+11] += sum12;
2189:     y[16*i+12] += sum13;
2190:     y[16*i+13] += sum14;
2191:     y[16*i+14] += sum15;
2192:     y[16*i+15] += sum16;
2193:   }

2195:   PetscLogFlops(32.0*a->nz);
2196:   VecRestoreArrayRead(xx,&x);
2197:   VecRestoreArray(zz,&y);
2198:   return 0;
2199: }

2201: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2202: {
2203:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2204:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2205:   const PetscScalar *x,*v;
2206:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2207:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2208:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2209:   PetscInt          n,i;

2211:   if (yy != zz) VecCopy(yy,zz);
2212:   VecGetArrayRead(xx,&x);
2213:   VecGetArray(zz,&y);
2214:   for (i=0; i<m; i++) {
2215:     idx     = a->j + a->i[i];
2216:     v       = a->a + a->i[i];
2217:     n       = a->i[i+1] - a->i[i];
2218:     alpha1  = x[16*i];
2219:     alpha2  = x[16*i+1];
2220:     alpha3  = x[16*i+2];
2221:     alpha4  = x[16*i+3];
2222:     alpha5  = x[16*i+4];
2223:     alpha6  = x[16*i+5];
2224:     alpha7  = x[16*i+6];
2225:     alpha8  = x[16*i+7];
2226:     alpha9  = x[16*i+8];
2227:     alpha10 = x[16*i+9];
2228:     alpha11 = x[16*i+10];
2229:     alpha12 = x[16*i+11];
2230:     alpha13 = x[16*i+12];
2231:     alpha14 = x[16*i+13];
2232:     alpha15 = x[16*i+14];
2233:     alpha16 = x[16*i+15];
2234:     while (n-->0) {
2235:       y[16*(*idx)]    += alpha1*(*v);
2236:       y[16*(*idx)+1]  += alpha2*(*v);
2237:       y[16*(*idx)+2]  += alpha3*(*v);
2238:       y[16*(*idx)+3]  += alpha4*(*v);
2239:       y[16*(*idx)+4]  += alpha5*(*v);
2240:       y[16*(*idx)+5]  += alpha6*(*v);
2241:       y[16*(*idx)+6]  += alpha7*(*v);
2242:       y[16*(*idx)+7]  += alpha8*(*v);
2243:       y[16*(*idx)+8]  += alpha9*(*v);
2244:       y[16*(*idx)+9]  += alpha10*(*v);
2245:       y[16*(*idx)+10] += alpha11*(*v);
2246:       y[16*(*idx)+11] += alpha12*(*v);
2247:       y[16*(*idx)+12] += alpha13*(*v);
2248:       y[16*(*idx)+13] += alpha14*(*v);
2249:       y[16*(*idx)+14] += alpha15*(*v);
2250:       y[16*(*idx)+15] += alpha16*(*v);
2251:       idx++; v++;
2252:     }
2253:   }
2254:   PetscLogFlops(32.0*a->nz);
2255:   VecRestoreArrayRead(xx,&x);
2256:   VecRestoreArray(zz,&y);
2257:   return 0;
2258: }

2260: /*--------------------------------------------------------------------------------------------*/
2261: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2262: {
2263:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2264:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2265:   const PetscScalar *x,*v;
2266:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2267:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2268:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2269:   PetscInt          nonzerorow=0,n,i,jrow,j;

2271:   VecGetArrayRead(xx,&x);
2272:   VecGetArray(yy,&y);
2273:   idx  = a->j;
2274:   v    = a->a;
2275:   ii   = a->i;

2277:   for (i=0; i<m; i++) {
2278:     jrow  = ii[i];
2279:     n     = ii[i+1] - jrow;
2280:     sum1  = 0.0;
2281:     sum2  = 0.0;
2282:     sum3  = 0.0;
2283:     sum4  = 0.0;
2284:     sum5  = 0.0;
2285:     sum6  = 0.0;
2286:     sum7  = 0.0;
2287:     sum8  = 0.0;
2288:     sum9  = 0.0;
2289:     sum10 = 0.0;
2290:     sum11 = 0.0;
2291:     sum12 = 0.0;
2292:     sum13 = 0.0;
2293:     sum14 = 0.0;
2294:     sum15 = 0.0;
2295:     sum16 = 0.0;
2296:     sum17 = 0.0;
2297:     sum18 = 0.0;

2299:     nonzerorow += (n>0);
2300:     for (j=0; j<n; j++) {
2301:       sum1  += v[jrow]*x[18*idx[jrow]];
2302:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2303:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2304:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2305:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2306:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2307:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2308:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2309:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2310:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2311:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2312:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2313:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2314:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2315:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2316:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2317:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2318:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2319:       jrow++;
2320:     }
2321:     y[18*i]    = sum1;
2322:     y[18*i+1]  = sum2;
2323:     y[18*i+2]  = sum3;
2324:     y[18*i+3]  = sum4;
2325:     y[18*i+4]  = sum5;
2326:     y[18*i+5]  = sum6;
2327:     y[18*i+6]  = sum7;
2328:     y[18*i+7]  = sum8;
2329:     y[18*i+8]  = sum9;
2330:     y[18*i+9]  = sum10;
2331:     y[18*i+10] = sum11;
2332:     y[18*i+11] = sum12;
2333:     y[18*i+12] = sum13;
2334:     y[18*i+13] = sum14;
2335:     y[18*i+14] = sum15;
2336:     y[18*i+15] = sum16;
2337:     y[18*i+16] = sum17;
2338:     y[18*i+17] = sum18;
2339:   }

2341:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2342:   VecRestoreArrayRead(xx,&x);
2343:   VecRestoreArray(yy,&y);
2344:   return 0;
2345: }

2347: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2348: {
2349:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2350:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2351:   const PetscScalar *x,*v;
2352:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2353:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2354:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2355:   PetscInt          n,i;

2357:   VecSet(yy,0.0);
2358:   VecGetArrayRead(xx,&x);
2359:   VecGetArray(yy,&y);

2361:   for (i=0; i<m; i++) {
2362:     idx     = a->j + a->i[i];
2363:     v       = a->a + a->i[i];
2364:     n       = a->i[i+1] - a->i[i];
2365:     alpha1  = x[18*i];
2366:     alpha2  = x[18*i+1];
2367:     alpha3  = x[18*i+2];
2368:     alpha4  = x[18*i+3];
2369:     alpha5  = x[18*i+4];
2370:     alpha6  = x[18*i+5];
2371:     alpha7  = x[18*i+6];
2372:     alpha8  = x[18*i+7];
2373:     alpha9  = x[18*i+8];
2374:     alpha10 = x[18*i+9];
2375:     alpha11 = x[18*i+10];
2376:     alpha12 = x[18*i+11];
2377:     alpha13 = x[18*i+12];
2378:     alpha14 = x[18*i+13];
2379:     alpha15 = x[18*i+14];
2380:     alpha16 = x[18*i+15];
2381:     alpha17 = x[18*i+16];
2382:     alpha18 = x[18*i+17];
2383:     while (n-->0) {
2384:       y[18*(*idx)]    += alpha1*(*v);
2385:       y[18*(*idx)+1]  += alpha2*(*v);
2386:       y[18*(*idx)+2]  += alpha3*(*v);
2387:       y[18*(*idx)+3]  += alpha4*(*v);
2388:       y[18*(*idx)+4]  += alpha5*(*v);
2389:       y[18*(*idx)+5]  += alpha6*(*v);
2390:       y[18*(*idx)+6]  += alpha7*(*v);
2391:       y[18*(*idx)+7]  += alpha8*(*v);
2392:       y[18*(*idx)+8]  += alpha9*(*v);
2393:       y[18*(*idx)+9]  += alpha10*(*v);
2394:       y[18*(*idx)+10] += alpha11*(*v);
2395:       y[18*(*idx)+11] += alpha12*(*v);
2396:       y[18*(*idx)+12] += alpha13*(*v);
2397:       y[18*(*idx)+13] += alpha14*(*v);
2398:       y[18*(*idx)+14] += alpha15*(*v);
2399:       y[18*(*idx)+15] += alpha16*(*v);
2400:       y[18*(*idx)+16] += alpha17*(*v);
2401:       y[18*(*idx)+17] += alpha18*(*v);
2402:       idx++; v++;
2403:     }
2404:   }
2405:   PetscLogFlops(36.0*a->nz);
2406:   VecRestoreArrayRead(xx,&x);
2407:   VecRestoreArray(yy,&y);
2408:   return 0;
2409: }

2411: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2412: {
2413:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2414:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2415:   const PetscScalar *x,*v;
2416:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2417:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2418:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2419:   PetscInt          n,i,jrow,j;

2421:   if (yy != zz) VecCopy(yy,zz);
2422:   VecGetArrayRead(xx,&x);
2423:   VecGetArray(zz,&y);
2424:   idx  = a->j;
2425:   v    = a->a;
2426:   ii   = a->i;

2428:   for (i=0; i<m; i++) {
2429:     jrow  = ii[i];
2430:     n     = ii[i+1] - jrow;
2431:     sum1  = 0.0;
2432:     sum2  = 0.0;
2433:     sum3  = 0.0;
2434:     sum4  = 0.0;
2435:     sum5  = 0.0;
2436:     sum6  = 0.0;
2437:     sum7  = 0.0;
2438:     sum8  = 0.0;
2439:     sum9  = 0.0;
2440:     sum10 = 0.0;
2441:     sum11 = 0.0;
2442:     sum12 = 0.0;
2443:     sum13 = 0.0;
2444:     sum14 = 0.0;
2445:     sum15 = 0.0;
2446:     sum16 = 0.0;
2447:     sum17 = 0.0;
2448:     sum18 = 0.0;
2449:     for (j=0; j<n; j++) {
2450:       sum1  += v[jrow]*x[18*idx[jrow]];
2451:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2452:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2453:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2454:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2455:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2456:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2457:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2458:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2459:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2460:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2461:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2462:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2463:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2464:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2465:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2466:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2467:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2468:       jrow++;
2469:     }
2470:     y[18*i]    += sum1;
2471:     y[18*i+1]  += sum2;
2472:     y[18*i+2]  += sum3;
2473:     y[18*i+3]  += sum4;
2474:     y[18*i+4]  += sum5;
2475:     y[18*i+5]  += sum6;
2476:     y[18*i+6]  += sum7;
2477:     y[18*i+7]  += sum8;
2478:     y[18*i+8]  += sum9;
2479:     y[18*i+9]  += sum10;
2480:     y[18*i+10] += sum11;
2481:     y[18*i+11] += sum12;
2482:     y[18*i+12] += sum13;
2483:     y[18*i+13] += sum14;
2484:     y[18*i+14] += sum15;
2485:     y[18*i+15] += sum16;
2486:     y[18*i+16] += sum17;
2487:     y[18*i+17] += sum18;
2488:   }

2490:   PetscLogFlops(36.0*a->nz);
2491:   VecRestoreArrayRead(xx,&x);
2492:   VecRestoreArray(zz,&y);
2493:   return 0;
2494: }

2496: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2497: {
2498:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2499:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2500:   const PetscScalar *x,*v;
2501:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2502:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2503:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2504:   PetscInt          n,i;

2506:   if (yy != zz) VecCopy(yy,zz);
2507:   VecGetArrayRead(xx,&x);
2508:   VecGetArray(zz,&y);
2509:   for (i=0; i<m; i++) {
2510:     idx     = a->j + a->i[i];
2511:     v       = a->a + a->i[i];
2512:     n       = a->i[i+1] - a->i[i];
2513:     alpha1  = x[18*i];
2514:     alpha2  = x[18*i+1];
2515:     alpha3  = x[18*i+2];
2516:     alpha4  = x[18*i+3];
2517:     alpha5  = x[18*i+4];
2518:     alpha6  = x[18*i+5];
2519:     alpha7  = x[18*i+6];
2520:     alpha8  = x[18*i+7];
2521:     alpha9  = x[18*i+8];
2522:     alpha10 = x[18*i+9];
2523:     alpha11 = x[18*i+10];
2524:     alpha12 = x[18*i+11];
2525:     alpha13 = x[18*i+12];
2526:     alpha14 = x[18*i+13];
2527:     alpha15 = x[18*i+14];
2528:     alpha16 = x[18*i+15];
2529:     alpha17 = x[18*i+16];
2530:     alpha18 = x[18*i+17];
2531:     while (n-->0) {
2532:       y[18*(*idx)]    += alpha1*(*v);
2533:       y[18*(*idx)+1]  += alpha2*(*v);
2534:       y[18*(*idx)+2]  += alpha3*(*v);
2535:       y[18*(*idx)+3]  += alpha4*(*v);
2536:       y[18*(*idx)+4]  += alpha5*(*v);
2537:       y[18*(*idx)+5]  += alpha6*(*v);
2538:       y[18*(*idx)+6]  += alpha7*(*v);
2539:       y[18*(*idx)+7]  += alpha8*(*v);
2540:       y[18*(*idx)+8]  += alpha9*(*v);
2541:       y[18*(*idx)+9]  += alpha10*(*v);
2542:       y[18*(*idx)+10] += alpha11*(*v);
2543:       y[18*(*idx)+11] += alpha12*(*v);
2544:       y[18*(*idx)+12] += alpha13*(*v);
2545:       y[18*(*idx)+13] += alpha14*(*v);
2546:       y[18*(*idx)+14] += alpha15*(*v);
2547:       y[18*(*idx)+15] += alpha16*(*v);
2548:       y[18*(*idx)+16] += alpha17*(*v);
2549:       y[18*(*idx)+17] += alpha18*(*v);
2550:       idx++; v++;
2551:     }
2552:   }
2553:   PetscLogFlops(36.0*a->nz);
2554:   VecRestoreArrayRead(xx,&x);
2555:   VecRestoreArray(zz,&y);
2556:   return 0;
2557: }

2559: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2560: {
2561:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2562:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2563:   const PetscScalar *x,*v;
2564:   PetscScalar       *y,*sums;
2565:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2566:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2568:   VecGetArrayRead(xx,&x);
2569:   VecSet(yy,0.0);
2570:   VecGetArray(yy,&y);
2571:   idx  = a->j;
2572:   v    = a->a;
2573:   ii   = a->i;

2575:   for (i=0; i<m; i++) {
2576:     jrow = ii[i];
2577:     n    = ii[i+1] - jrow;
2578:     sums = y + dof*i;
2579:     for (j=0; j<n; j++) {
2580:       for (k=0; k<dof; k++) {
2581:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2582:       }
2583:       jrow++;
2584:     }
2585:   }

2587:   PetscLogFlops(2.0*dof*a->nz);
2588:   VecRestoreArrayRead(xx,&x);
2589:   VecRestoreArray(yy,&y);
2590:   return 0;
2591: }

2593: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2594: {
2595:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2596:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2597:   const PetscScalar *x,*v;
2598:   PetscScalar       *y,*sums;
2599:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2600:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2602:   if (yy != zz) VecCopy(yy,zz);
2603:   VecGetArrayRead(xx,&x);
2604:   VecGetArray(zz,&y);
2605:   idx  = a->j;
2606:   v    = a->a;
2607:   ii   = a->i;

2609:   for (i=0; i<m; i++) {
2610:     jrow = ii[i];
2611:     n    = ii[i+1] - jrow;
2612:     sums = y + dof*i;
2613:     for (j=0; j<n; j++) {
2614:       for (k=0; k<dof; k++) {
2615:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2616:       }
2617:       jrow++;
2618:     }
2619:   }

2621:   PetscLogFlops(2.0*dof*a->nz);
2622:   VecRestoreArrayRead(xx,&x);
2623:   VecRestoreArray(zz,&y);
2624:   return 0;
2625: }

2627: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2628: {
2629:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2630:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2631:   const PetscScalar *x,*v,*alpha;
2632:   PetscScalar       *y;
2633:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2634:   PetscInt          n,i,k;

2636:   VecGetArrayRead(xx,&x);
2637:   VecSet(yy,0.0);
2638:   VecGetArray(yy,&y);
2639:   for (i=0; i<m; i++) {
2640:     idx   = a->j + a->i[i];
2641:     v     = a->a + a->i[i];
2642:     n     = a->i[i+1] - a->i[i];
2643:     alpha = x + dof*i;
2644:     while (n-->0) {
2645:       for (k=0; k<dof; k++) {
2646:         y[dof*(*idx)+k] += alpha[k]*(*v);
2647:       }
2648:       idx++; v++;
2649:     }
2650:   }
2651:   PetscLogFlops(2.0*dof*a->nz);
2652:   VecRestoreArrayRead(xx,&x);
2653:   VecRestoreArray(yy,&y);
2654:   return 0;
2655: }

2657: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2658: {
2659:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2660:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2661:   const PetscScalar *x,*v,*alpha;
2662:   PetscScalar       *y;
2663:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2664:   PetscInt          n,i,k;

2666:   if (yy != zz) VecCopy(yy,zz);
2667:   VecGetArrayRead(xx,&x);
2668:   VecGetArray(zz,&y);
2669:   for (i=0; i<m; i++) {
2670:     idx   = a->j + a->i[i];
2671:     v     = a->a + a->i[i];
2672:     n     = a->i[i+1] - a->i[i];
2673:     alpha = x + dof*i;
2674:     while (n-->0) {
2675:       for (k=0; k<dof; k++) {
2676:         y[dof*(*idx)+k] += alpha[k]*(*v);
2677:       }
2678:       idx++; v++;
2679:     }
2680:   }
2681:   PetscLogFlops(2.0*dof*a->nz);
2682:   VecRestoreArrayRead(xx,&x);
2683:   VecRestoreArray(zz,&y);
2684:   return 0;
2685: }

2687: /*===================================================================================*/
2688: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2689: {
2690:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2692:   /* start the scatter */
2693:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2694:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2695:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2696:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2697:   return 0;
2698: }

2700: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2701: {
2702:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2704:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2705:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2706:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2707:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2708:   return 0;
2709: }

2711: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2712: {
2713:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2715:   /* start the scatter */
2716:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2717:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2718:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2719:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2720:   return 0;
2721: }

2723: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2724: {
2725:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2727:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2728:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2729:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2730:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2731:   return 0;
2732: }

2734: /* ----------------------------------------------------------------*/
2735: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2736: {
2737:   Mat_Product    *product = C->product;

2739:   if (product->type == MATPRODUCT_PtAP) {
2740:     C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2741:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2742:   return 0;
2743: }

2745: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2746: {
2748:   Mat_Product    *product = C->product;
2749:   PetscBool      flg = PETSC_FALSE;
2750:   Mat            A=product->A,P=product->B;
2751:   PetscInt       alg=1; /* set default algorithm */
2752: #if !defined(PETSC_HAVE_HYPRE)
2753:   const char     *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2754:   PetscInt       nalg=4;
2755: #else
2756:   const char     *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2757:   PetscInt       nalg=5;
2758: #endif


2762:   /* PtAP */
2763:   /* Check matrix local sizes */

2767:   /* Set the default algorithm */
2768:   PetscStrcmp(C->product->alg,"default",&flg);
2769:   if (flg) {
2770:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2771:   }

2773:   /* Get runtime option */
2774:   PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2775:   PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2776:   if (flg) {
2777:     MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2778:   }
2779:   PetscOptionsEnd();

2781:   PetscStrcmp(C->product->alg,"allatonce",&flg);
2782:   if (flg) {
2783:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2784:     return 0;
2785:   }

2787:   PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2788:   if (flg) {
2789:     C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2790:     return 0;
2791:   }

2793:   /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2794:   PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2795:   MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2796:   MatProductSetFromOptions(C);
2797:   return 0;
2798: }

2800: /* ----------------------------------------------------------------*/
2801: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2802: {
2803:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2804:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2805:   Mat                P         =pp->AIJ;
2806:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2807:   PetscInt           *pti,*ptj,*ptJ;
2808:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2809:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2810:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2811:   MatScalar          *ca;
2812:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2814:   /* Get ij structure of P^T */
2815:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2817:   cn = pn*ppdof;
2818:   /* Allocate ci array, arrays for fill computation and */
2819:   /* free space for accumulating nonzero column info */
2820:   PetscMalloc1(cn+1,&ci);
2821:   ci[0] = 0;

2823:   /* Work arrays for rows of P^T*A */
2824:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2825:   PetscArrayzero(ptadenserow,an);
2826:   PetscArrayzero(denserow,cn);

2828:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2829:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2830:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2831:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2832:   current_space = free_space;

2834:   /* Determine symbolic info for each row of C: */
2835:   for (i=0; i<pn; i++) {
2836:     ptnzi = pti[i+1] - pti[i];
2837:     ptJ   = ptj + pti[i];
2838:     for (dof=0; dof<ppdof; dof++) {
2839:       ptanzi = 0;
2840:       /* Determine symbolic row of PtA: */
2841:       for (j=0; j<ptnzi; j++) {
2842:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2843:         arow = ptJ[j]*ppdof + dof;
2844:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2845:         anzj = ai[arow+1] - ai[arow];
2846:         ajj  = aj + ai[arow];
2847:         for (k=0; k<anzj; k++) {
2848:           if (!ptadenserow[ajj[k]]) {
2849:             ptadenserow[ajj[k]]    = -1;
2850:             ptasparserow[ptanzi++] = ajj[k];
2851:           }
2852:         }
2853:       }
2854:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2855:       ptaj = ptasparserow;
2856:       cnzi = 0;
2857:       for (j=0; j<ptanzi; j++) {
2858:         /* Get offset within block of P */
2859:         pshift = *ptaj%ppdof;
2860:         /* Get block row of P */
2861:         prow = (*ptaj++)/ppdof; /* integer division */
2862:         /* P has same number of nonzeros per row as the compressed form */
2863:         pnzj = pi[prow+1] - pi[prow];
2864:         pjj  = pj + pi[prow];
2865:         for (k=0;k<pnzj;k++) {
2866:           /* Locations in C are shifted by the offset within the block */
2867:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2868:           if (!denserow[pjj[k]*ppdof+pshift]) {
2869:             denserow[pjj[k]*ppdof+pshift] = -1;
2870:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2871:           }
2872:         }
2873:       }

2875:       /* sort sparserow */
2876:       PetscSortInt(cnzi,sparserow);

2878:       /* If free space is not available, make more free space */
2879:       /* Double the amount of total space in the list */
2880:       if (current_space->local_remaining<cnzi) {
2881:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
2882:       }

2884:       /* Copy data into free space, and zero out denserows */
2885:       PetscArraycpy(current_space->array,sparserow,cnzi);

2887:       current_space->array           += cnzi;
2888:       current_space->local_used      += cnzi;
2889:       current_space->local_remaining -= cnzi;

2891:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2892:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

2894:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2895:       /*        For now, we will recompute what is needed. */
2896:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2897:     }
2898:   }
2899:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2900:   /* Allocate space for cj, initialize cj, and */
2901:   /* destroy list of free space and other temporary array(s) */
2902:   PetscMalloc1(ci[cn]+1,&cj);
2903:   PetscFreeSpaceContiguous(&free_space,cj);
2904:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

2906:   /* Allocate space for ca */
2907:   PetscCalloc1(ci[cn]+1,&ca);

2909:   /* put together the new matrix */
2910:   MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
2911:   MatSetBlockSize(C,pp->dof);

2913:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2914:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2915:   c          = (Mat_SeqAIJ*)(C->data);
2916:   c->free_a  = PETSC_TRUE;
2917:   c->free_ij = PETSC_TRUE;
2918:   c->nonew   = 0;

2920:   C->ops->ptapnumeric    = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2921:   C->ops->productnumeric = MatProductNumeric_PtAP;

2923:   /* Clean up. */
2924:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2925:   return 0;
2926: }

2928: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2929: {
2930:   /* This routine requires testing -- first draft only */
2931:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
2932:   Mat             P  =pp->AIJ;
2933:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
2934:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
2935:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
2936:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
2937:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
2938:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
2939:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
2940:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
2941:   MatScalar       *ca=c->a,*caj,*apa;

2943:   /* Allocate temporary array for storage of one row of A*P */
2944:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

2946:   /* Clear old values in C */
2947:   PetscArrayzero(ca,ci[cm]);

2949:   for (i=0; i<am; i++) {
2950:     /* Form sparse row of A*P */
2951:     anzi  = ai[i+1] - ai[i];
2952:     apnzj = 0;
2953:     for (j=0; j<anzi; j++) {
2954:       /* Get offset within block of P */
2955:       pshift = *aj%ppdof;
2956:       /* Get block row of P */
2957:       prow = *aj++/ppdof;   /* integer division */
2958:       pnzj = pi[prow+1] - pi[prow];
2959:       pjj  = pj + pi[prow];
2960:       paj  = pa + pi[prow];
2961:       for (k=0; k<pnzj; k++) {
2962:         poffset = pjj[k]*ppdof+pshift;
2963:         if (!apjdense[poffset]) {
2964:           apjdense[poffset] = -1;
2965:           apj[apnzj++]      = poffset;
2966:         }
2967:         apa[poffset] += (*aa)*paj[k];
2968:       }
2969:       PetscLogFlops(2.0*pnzj);
2970:       aa++;
2971:     }

2973:     /* Sort the j index array for quick sparse axpy. */
2974:     /* Note: a array does not need sorting as it is in dense storage locations. */
2975:     PetscSortInt(apnzj,apj);

2977:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2978:     prow    = i/ppdof; /* integer division */
2979:     pshift  = i%ppdof;
2980:     poffset = pi[prow];
2981:     pnzi    = pi[prow+1] - poffset;
2982:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2983:     pJ = pj+poffset;
2984:     pA = pa+poffset;
2985:     for (j=0; j<pnzi; j++) {
2986:       crow = (*pJ)*ppdof+pshift;
2987:       cjj  = cj + ci[crow];
2988:       caj  = ca + ci[crow];
2989:       pJ++;
2990:       /* Perform sparse axpy operation.  Note cjj includes apj. */
2991:       for (k=0,nextap=0; nextap<apnzj; k++) {
2992:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
2993:       }
2994:       PetscLogFlops(2.0*apnzj);
2995:       pA++;
2996:     }

2998:     /* Zero the current row info for A*P */
2999:     for (j=0; j<apnzj; j++) {
3000:       apa[apj[j]]      = 0.;
3001:       apjdense[apj[j]] = 0;
3002:     }
3003:   }

3005:   /* Assemble the final matrix and clean up */
3006:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3007:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3008:   PetscFree3(apa,apj,apjdense);
3009:   return 0;
3010: }

3012: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3013: {
3014:   Mat_Product         *product = C->product;
3015:   Mat                 A=product->A,P=product->B;

3017:   MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3018:   return 0;
3019: }

3021: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3022: {
3023:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3024: }

3026: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3027: {
3028:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3029: }

3031: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);

3033: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3034: {
3035:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;


3038:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3039:   return 0;
3040: }

3042: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);

3044: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3045: {
3046:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;

3048:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3049:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3050:   return 0;
3051: }

3053: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);

3055: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3056: {
3057:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;


3060:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3061:   return 0;
3062: }

3064: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);

3066: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3067: {
3068:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;


3071:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3072:   C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3073:   return 0;
3074: }

3076: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3077: {
3078:   Mat_Product         *product = C->product;
3079:   Mat                 A=product->A,P=product->B;
3080:   PetscBool           flg;

3082:   PetscStrcmp(product->alg,"allatonce",&flg);
3083:   if (flg) {
3084:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3085:     C->ops->productnumeric = MatProductNumeric_PtAP;
3086:     return 0;
3087:   }

3089:   PetscStrcmp(product->alg,"allatonce_merged",&flg);
3090:   if (flg) {
3091:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3092:     C->ops->productnumeric = MatProductNumeric_PtAP;
3093:     return 0;
3094:   }

3096:   SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3097: }

3099: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3100: {
3101:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3102:   Mat            a    = b->AIJ,B;
3103:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3104:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3105:   PetscInt       *cols;
3106:   PetscScalar    *vals;

3108:   MatGetSize(a,&m,&n);
3109:   PetscMalloc1(dof*m,&ilen);
3110:   for (i=0; i<m; i++) {
3111:     nmax = PetscMax(nmax,aij->ilen[i]);
3112:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3113:   }
3114:   MatCreate(PETSC_COMM_SELF,&B);
3115:   MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3116:   MatSetType(B,newtype);
3117:   MatSeqAIJSetPreallocation(B,0,ilen);
3118:   PetscFree(ilen);
3119:   PetscMalloc1(nmax,&icols);
3120:   ii   = 0;
3121:   for (i=0; i<m; i++) {
3122:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3123:     for (j=0; j<dof; j++) {
3124:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3125:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3126:       ii++;
3127:     }
3128:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3129:   }
3130:   PetscFree(icols);
3131:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3132:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3134:   if (reuse == MAT_INPLACE_MATRIX) {
3135:     MatHeaderReplace(A,&B);
3136:   } else {
3137:     *newmat = B;
3138:   }
3139:   return 0;
3140: }

3142: #include <../src/mat/impls/aij/mpi/mpiaij.h>

3144: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3145: {
3146:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3147:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3148:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3149:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3150:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3151:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3152:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3153:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3154:   PetscInt       rstart,cstart,*garray,ii,k;
3155:   PetscScalar    *vals,*ovals;

3157:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3158:   for (i=0; i<A->rmap->n/dof; i++) {
3159:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3160:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3161:     for (j=0; j<dof; j++) {
3162:       dnz[dof*i+j] = AIJ->ilen[i];
3163:       onz[dof*i+j] = OAIJ->ilen[i];
3164:     }
3165:   }
3166:   MatCreate(PetscObjectComm((PetscObject)A),&B);
3167:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3168:   MatSetType(B,newtype);
3169:   MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3170:   MatSetBlockSize(B,dof);
3171:   PetscFree2(dnz,onz);

3173:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3174:   rstart = dof*maij->A->rmap->rstart;
3175:   cstart = dof*maij->A->cmap->rstart;
3176:   garray = mpiaij->garray;

3178:   ii = rstart;
3179:   for (i=0; i<A->rmap->n/dof; i++) {
3180:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3181:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3182:     for (j=0; j<dof; j++) {
3183:       for (k=0; k<ncols; k++) {
3184:         icols[k] = cstart + dof*cols[k]+j;
3185:       }
3186:       for (k=0; k<oncols; k++) {
3187:         oicols[k] = dof*garray[ocols[k]]+j;
3188:       }
3189:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3190:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3191:       ii++;
3192:     }
3193:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3194:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3195:   }
3196:   PetscFree2(icols,oicols);

3198:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3199:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3201:   if (reuse == MAT_INPLACE_MATRIX) {
3202:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3203:     ((PetscObject)A)->refct = 1;

3205:     MatHeaderReplace(A,&B);

3207:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3208:   } else {
3209:     *newmat = B;
3210:   }
3211:   return 0;
3212: }

3214: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3215: {
3216:   Mat            A;

3218:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3219:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3220:   MatDestroy(&A);
3221:   return 0;
3222: }

3224: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3225: {
3226:   Mat            A;

3228:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3229:   MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3230:   MatDestroy(&A);
3231:   return 0;
3232: }

3234: /* ---------------------------------------------------------------------------------- */
3235: /*@
3236:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3237:   operations for multicomponent problems.  It interpolates each component the same
3238:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3239:   and MATMPIAIJ for distributed matrices.

3241:   Collective

3243:   Input Parameters:
3244: + A - the AIJ matrix describing the action on blocks
3245: - dof - the block size (number of components per node)

3247:   Output Parameter:
3248: . maij - the new MAIJ matrix

3250:   Operations provided:
3251: .vb
3252:     MatMult
3253:     MatMultTranspose
3254:     MatMultAdd
3255:     MatMultTransposeAdd
3256:     MatView
3257: .ve

3259:   Level: advanced

3261: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3262: @*/
3263: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3264: {
3265:   PetscMPIInt    size;
3266:   PetscInt       n;
3267:   Mat            B;
3268: #if defined(PETSC_HAVE_CUDA)
3269:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3270:   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3271: #endif

3273:   dof  = PetscAbs(dof);
3274:   PetscObjectReference((PetscObject)A);

3276:   if (dof == 1) *maij = A;
3277:   else {
3278:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3279:     /* propagate vec type */
3280:     MatSetVecType(B,A->defaultvectype);
3281:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3282:     PetscLayoutSetBlockSize(B->rmap,dof);
3283:     PetscLayoutSetBlockSize(B->cmap,dof);
3284:     PetscLayoutSetUp(B->rmap);
3285:     PetscLayoutSetUp(B->cmap);

3287:     B->assembled = PETSC_TRUE;

3289:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3290:     if (size == 1) {
3291:       Mat_SeqMAIJ *b;

3293:       MatSetType(B,MATSEQMAIJ);

3295:       B->ops->setup   = NULL;
3296:       B->ops->destroy = MatDestroy_SeqMAIJ;
3297:       B->ops->view    = MatView_SeqMAIJ;

3299:       b               = (Mat_SeqMAIJ*)B->data;
3300:       b->dof          = dof;
3301:       b->AIJ          = A;

3303:       if (dof == 2) {
3304:         B->ops->mult             = MatMult_SeqMAIJ_2;
3305:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3306:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3307:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3308:       } else if (dof == 3) {
3309:         B->ops->mult             = MatMult_SeqMAIJ_3;
3310:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3311:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3312:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3313:       } else if (dof == 4) {
3314:         B->ops->mult             = MatMult_SeqMAIJ_4;
3315:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3316:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3317:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3318:       } else if (dof == 5) {
3319:         B->ops->mult             = MatMult_SeqMAIJ_5;
3320:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3321:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3322:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3323:       } else if (dof == 6) {
3324:         B->ops->mult             = MatMult_SeqMAIJ_6;
3325:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3326:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3327:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3328:       } else if (dof == 7) {
3329:         B->ops->mult             = MatMult_SeqMAIJ_7;
3330:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3331:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3332:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3333:       } else if (dof == 8) {
3334:         B->ops->mult             = MatMult_SeqMAIJ_8;
3335:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3336:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3337:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3338:       } else if (dof == 9) {
3339:         B->ops->mult             = MatMult_SeqMAIJ_9;
3340:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3341:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3342:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3343:       } else if (dof == 10) {
3344:         B->ops->mult             = MatMult_SeqMAIJ_10;
3345:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3346:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3347:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3348:       } else if (dof == 11) {
3349:         B->ops->mult             = MatMult_SeqMAIJ_11;
3350:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3351:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3352:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3353:       } else if (dof == 16) {
3354:         B->ops->mult             = MatMult_SeqMAIJ_16;
3355:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3356:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3357:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3358:       } else if (dof == 18) {
3359:         B->ops->mult             = MatMult_SeqMAIJ_18;
3360:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3361:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3362:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3363:       } else {
3364:         B->ops->mult             = MatMult_SeqMAIJ_N;
3365:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3366:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3367:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3368:       }
3369: #if defined(PETSC_HAVE_CUDA)
3370:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3371: #endif
3372:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3373:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3374:     } else {
3375:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3376:       Mat_MPIMAIJ *b;
3377:       IS          from,to;
3378:       Vec         gvec;

3380:       MatSetType(B,MATMPIMAIJ);

3382:       B->ops->setup            = NULL;
3383:       B->ops->destroy          = MatDestroy_MPIMAIJ;
3384:       B->ops->view             = MatView_MPIMAIJ;

3386:       b      = (Mat_MPIMAIJ*)B->data;
3387:       b->dof = dof;
3388:       b->A   = A;

3390:       MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3391:       MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);

3393:       VecGetSize(mpiaij->lvec,&n);
3394:       VecCreate(PETSC_COMM_SELF,&b->w);
3395:       VecSetSizes(b->w,n*dof,n*dof);
3396:       VecSetBlockSize(b->w,dof);
3397:       VecSetType(b->w,VECSEQ);

3399:       /* create two temporary Index sets for build scatter gather */
3400:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3401:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3403:       /* create temporary global vector to generate scatter context */
3404:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3406:       /* generate the scatter context */
3407:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3409:       ISDestroy(&from);
3410:       ISDestroy(&to);
3411:       VecDestroy(&gvec);

3413:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3414:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3415:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3416:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3418: #if defined(PETSC_HAVE_CUDA)
3419:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3420: #endif
3421:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3422:       PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3423:     }
3424:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3425:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3426:     MatSetUp(B);
3427: #if defined(PETSC_HAVE_CUDA)
3428:     /* temporary until we have CUDA implementation of MAIJ */
3429:     {
3430:       PetscBool flg;
3431:       if (convert) {
3432:         PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3433:         if (flg) {
3434:           MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3435:         }
3436:       }
3437:     }
3438: #endif
3439:     *maij = B;
3440:   }
3441:   return 0;
3442: }