Actual source code: maij.c

petsc-3.12.5 2020-03-29
Report Typos and Errors

  2: /*
  3:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  4:   This format is used for restriction and interpolation operations for
  5:   multicomponent problems. It interpolates each component the same way
  6:   independently.

  8:      We provide:
  9:          MatMult()
 10:          MatMultTranspose()
 11:          MatMultTransposeAdd()
 12:          MatMultAdd()
 13:           and
 14:          MatCreateMAIJ(Mat,dof,Mat*)

 16:      This single directory handles both the sequential and parallel codes
 17: */

 19:  #include <../src/mat/impls/maij/maij.h>
 20:  #include <../src/mat/utils/freespace.h>

 22: /*@
 23:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 25:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 27:    Input Parameter:
 28: .  A - the MAIJ matrix

 30:    Output Parameter:
 31: .  B - the AIJ matrix

 33:    Level: advanced

 35:    Notes:
 36:     The reference count on the AIJ matrix is not increased so you should not destroy it.

 38: .seealso: MatCreateMAIJ()
 39: @*/
 40: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 41: {
 43:   PetscBool      ismpimaij,isseqmaij;

 46:   PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 47:   PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 48:   if (ismpimaij) {
 49:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 51:     *B = b->A;
 52:   } else if (isseqmaij) {
 53:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 55:     *B = b->AIJ;
 56:   } else {
 57:     *B = A;
 58:   }
 59:   return(0);
 60: }

 62: /*@
 63:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 65:    Logically Collective

 67:    Input Parameter:
 68: +  A - the MAIJ matrix
 69: -  dof - the block size for the new matrix

 71:    Output Parameter:
 72: .  B - the new MAIJ matrix

 74:    Level: advanced

 76: .seealso: MatCreateMAIJ()
 77: @*/
 78: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 79: {
 81:   Mat            Aij = NULL;

 85:   MatMAIJGetAIJ(A,&Aij);
 86:   MatCreateMAIJ(Aij,dof,B);
 87:   return(0);
 88: }

 90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 91: {
 93:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

 96:   MatDestroy(&b->AIJ);
 97:   PetscFree(A->data);
 98:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
 99:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
100:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);
101:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);
102:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqmaij_C",NULL);
103:   return(0);
104: }

106: PetscErrorCode MatSetUp_MAIJ(Mat A)
107: {
109:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
110:   return(0);
111: }

113: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
114: {
116:   Mat            B;

119:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
120:   MatView(B,viewer);
121:   MatDestroy(&B);
122:   return(0);
123: }

125: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
126: {
128:   Mat            B;

131:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
132:   MatView(B,viewer);
133:   MatDestroy(&B);
134:   return(0);
135: }

137: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
138: {
140:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

143:   MatDestroy(&b->AIJ);
144:   MatDestroy(&b->OAIJ);
145:   MatDestroy(&b->A);
146:   VecScatterDestroy(&b->ctx);
147:   VecDestroy(&b->w);
148:   PetscFree(A->data);
149:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
150:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
151:   PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_mpimaij_C",NULL);
152:   PetscObjectChangeTypeName((PetscObject)A,0);
153:   return(0);
154: }

156: /*MC
157:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
158:   multicomponent problems, interpolating or restricting each component the same way independently.
159:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

161:   Operations provided:
162: + MatMult
163: . MatMultTranspose
164: . MatMultAdd
165: - MatMultTransposeAdd

167:   Level: advanced

169: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
170: M*/

172: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
173: {
175:   Mat_MPIMAIJ    *b;
176:   PetscMPIInt    size;

179:   PetscNewLog(A,&b);
180:   A->data  = (void*)b;

182:   PetscMemzero(A->ops,sizeof(struct _MatOps));

184:   A->ops->setup = MatSetUp_MAIJ;

186:   b->AIJ  = 0;
187:   b->dof  = 0;
188:   b->OAIJ = 0;
189:   b->ctx  = 0;
190:   b->w    = 0;
191:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
192:   if (size == 1) {
193:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
194:   } else {
195:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
196:   }
197:   A->preallocated  = PETSC_TRUE;
198:   A->assembled     = PETSC_TRUE;
199:   return(0);
200: }

202: /* --------------------------------------------------------------------------------------*/
203: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
204: {
205:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
206:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
207:   const PetscScalar *x,*v;
208:   PetscScalar       *y, sum1, sum2;
209:   PetscErrorCode    ierr;
210:   PetscInt          nonzerorow=0,n,i,jrow,j;
211:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

214:   VecGetArrayRead(xx,&x);
215:   VecGetArray(yy,&y);
216:   idx  = a->j;
217:   v    = a->a;
218:   ii   = a->i;

220:   for (i=0; i<m; i++) {
221:     jrow  = ii[i];
222:     n     = ii[i+1] - jrow;
223:     sum1  = 0.0;
224:     sum2  = 0.0;

226:     nonzerorow += (n>0);
227:     for (j=0; j<n; j++) {
228:       sum1 += v[jrow]*x[2*idx[jrow]];
229:       sum2 += v[jrow]*x[2*idx[jrow]+1];
230:       jrow++;
231:     }
232:     y[2*i]   = sum1;
233:     y[2*i+1] = sum2;
234:   }

236:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
237:   VecRestoreArrayRead(xx,&x);
238:   VecRestoreArray(yy,&y);
239:   return(0);
240: }

242: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
243: {
244:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
245:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
246:   const PetscScalar *x,*v;
247:   PetscScalar       *y,alpha1,alpha2;
248:   PetscErrorCode    ierr;
249:   PetscInt          n,i;
250:   const PetscInt    m = b->AIJ->rmap->n,*idx;

253:   VecSet(yy,0.0);
254:   VecGetArrayRead(xx,&x);
255:   VecGetArray(yy,&y);

257:   for (i=0; i<m; i++) {
258:     idx    = a->j + a->i[i];
259:     v      = a->a + a->i[i];
260:     n      = a->i[i+1] - a->i[i];
261:     alpha1 = x[2*i];
262:     alpha2 = x[2*i+1];
263:     while (n-->0) {
264:       y[2*(*idx)]   += alpha1*(*v);
265:       y[2*(*idx)+1] += alpha2*(*v);
266:       idx++; v++;
267:     }
268:   }
269:   PetscLogFlops(4.0*a->nz);
270:   VecRestoreArrayRead(xx,&x);
271:   VecRestoreArray(yy,&y);
272:   return(0);
273: }

275: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
276: {
277:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
278:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
279:   const PetscScalar *x,*v;
280:   PetscScalar       *y,sum1, sum2;
281:   PetscErrorCode    ierr;
282:   PetscInt          n,i,jrow,j;
283:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

286:   if (yy != zz) {VecCopy(yy,zz);}
287:   VecGetArrayRead(xx,&x);
288:   VecGetArray(zz,&y);
289:   idx  = a->j;
290:   v    = a->a;
291:   ii   = a->i;

293:   for (i=0; i<m; i++) {
294:     jrow = ii[i];
295:     n    = ii[i+1] - jrow;
296:     sum1 = 0.0;
297:     sum2 = 0.0;
298:     for (j=0; j<n; j++) {
299:       sum1 += v[jrow]*x[2*idx[jrow]];
300:       sum2 += v[jrow]*x[2*idx[jrow]+1];
301:       jrow++;
302:     }
303:     y[2*i]   += sum1;
304:     y[2*i+1] += sum2;
305:   }

307:   PetscLogFlops(4.0*a->nz);
308:   VecRestoreArrayRead(xx,&x);
309:   VecRestoreArray(zz,&y);
310:   return(0);
311: }
312: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
313: {
314:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
315:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
316:   const PetscScalar *x,*v;
317:   PetscScalar       *y,alpha1,alpha2;
318:   PetscErrorCode    ierr;
319:   PetscInt          n,i;
320:   const PetscInt    m = b->AIJ->rmap->n,*idx;

323:   if (yy != zz) {VecCopy(yy,zz);}
324:   VecGetArrayRead(xx,&x);
325:   VecGetArray(zz,&y);

327:   for (i=0; i<m; i++) {
328:     idx    = a->j + a->i[i];
329:     v      = a->a + a->i[i];
330:     n      = a->i[i+1] - a->i[i];
331:     alpha1 = x[2*i];
332:     alpha2 = x[2*i+1];
333:     while (n-->0) {
334:       y[2*(*idx)]   += alpha1*(*v);
335:       y[2*(*idx)+1] += alpha2*(*v);
336:       idx++; v++;
337:     }
338:   }
339:   PetscLogFlops(4.0*a->nz);
340:   VecRestoreArrayRead(xx,&x);
341:   VecRestoreArray(zz,&y);
342:   return(0);
343: }
344: /* --------------------------------------------------------------------------------------*/
345: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
346: {
347:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
348:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
349:   const PetscScalar *x,*v;
350:   PetscScalar       *y,sum1, sum2, sum3;
351:   PetscErrorCode    ierr;
352:   PetscInt          nonzerorow=0,n,i,jrow,j;
353:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

356:   VecGetArrayRead(xx,&x);
357:   VecGetArray(yy,&y);
358:   idx  = a->j;
359:   v    = a->a;
360:   ii   = a->i;

362:   for (i=0; i<m; i++) {
363:     jrow  = ii[i];
364:     n     = ii[i+1] - jrow;
365:     sum1  = 0.0;
366:     sum2  = 0.0;
367:     sum3  = 0.0;

369:     nonzerorow += (n>0);
370:     for (j=0; j<n; j++) {
371:       sum1 += v[jrow]*x[3*idx[jrow]];
372:       sum2 += v[jrow]*x[3*idx[jrow]+1];
373:       sum3 += v[jrow]*x[3*idx[jrow]+2];
374:       jrow++;
375:      }
376:     y[3*i]   = sum1;
377:     y[3*i+1] = sum2;
378:     y[3*i+2] = sum3;
379:   }

381:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
382:   VecRestoreArrayRead(xx,&x);
383:   VecRestoreArray(yy,&y);
384:   return(0);
385: }

387: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
388: {
389:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
390:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
391:   const PetscScalar *x,*v;
392:   PetscScalar       *y,alpha1,alpha2,alpha3;
393:   PetscErrorCode    ierr;
394:   PetscInt          n,i;
395:   const PetscInt    m = b->AIJ->rmap->n,*idx;

398:   VecSet(yy,0.0);
399:   VecGetArrayRead(xx,&x);
400:   VecGetArray(yy,&y);

402:   for (i=0; i<m; i++) {
403:     idx    = a->j + a->i[i];
404:     v      = a->a + a->i[i];
405:     n      = a->i[i+1] - a->i[i];
406:     alpha1 = x[3*i];
407:     alpha2 = x[3*i+1];
408:     alpha3 = x[3*i+2];
409:     while (n-->0) {
410:       y[3*(*idx)]   += alpha1*(*v);
411:       y[3*(*idx)+1] += alpha2*(*v);
412:       y[3*(*idx)+2] += alpha3*(*v);
413:       idx++; v++;
414:     }
415:   }
416:   PetscLogFlops(6.0*a->nz);
417:   VecRestoreArrayRead(xx,&x);
418:   VecRestoreArray(yy,&y);
419:   return(0);
420: }

422: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
423: {
424:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
425:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
426:   const PetscScalar *x,*v;
427:   PetscScalar       *y,sum1, sum2, sum3;
428:   PetscErrorCode    ierr;
429:   PetscInt          n,i,jrow,j;
430:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

433:   if (yy != zz) {VecCopy(yy,zz);}
434:   VecGetArrayRead(xx,&x);
435:   VecGetArray(zz,&y);
436:   idx  = a->j;
437:   v    = a->a;
438:   ii   = a->i;

440:   for (i=0; i<m; i++) {
441:     jrow = ii[i];
442:     n    = ii[i+1] - jrow;
443:     sum1 = 0.0;
444:     sum2 = 0.0;
445:     sum3 = 0.0;
446:     for (j=0; j<n; j++) {
447:       sum1 += v[jrow]*x[3*idx[jrow]];
448:       sum2 += v[jrow]*x[3*idx[jrow]+1];
449:       sum3 += v[jrow]*x[3*idx[jrow]+2];
450:       jrow++;
451:     }
452:     y[3*i]   += sum1;
453:     y[3*i+1] += sum2;
454:     y[3*i+2] += sum3;
455:   }

457:   PetscLogFlops(6.0*a->nz);
458:   VecRestoreArrayRead(xx,&x);
459:   VecRestoreArray(zz,&y);
460:   return(0);
461: }
462: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
463: {
464:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
465:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
466:   const PetscScalar *x,*v;
467:   PetscScalar       *y,alpha1,alpha2,alpha3;
468:   PetscErrorCode    ierr;
469:   PetscInt          n,i;
470:   const PetscInt    m = b->AIJ->rmap->n,*idx;

473:   if (yy != zz) {VecCopy(yy,zz);}
474:   VecGetArrayRead(xx,&x);
475:   VecGetArray(zz,&y);
476:   for (i=0; i<m; i++) {
477:     idx    = a->j + a->i[i];
478:     v      = a->a + a->i[i];
479:     n      = a->i[i+1] - a->i[i];
480:     alpha1 = x[3*i];
481:     alpha2 = x[3*i+1];
482:     alpha3 = x[3*i+2];
483:     while (n-->0) {
484:       y[3*(*idx)]   += alpha1*(*v);
485:       y[3*(*idx)+1] += alpha2*(*v);
486:       y[3*(*idx)+2] += alpha3*(*v);
487:       idx++; v++;
488:     }
489:   }
490:   PetscLogFlops(6.0*a->nz);
491:   VecRestoreArrayRead(xx,&x);
492:   VecRestoreArray(zz,&y);
493:   return(0);
494: }

496: /* ------------------------------------------------------------------------------*/
497: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
498: {
499:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
500:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
501:   const PetscScalar *x,*v;
502:   PetscScalar       *y,sum1, sum2, sum3, sum4;
503:   PetscErrorCode    ierr;
504:   PetscInt          nonzerorow=0,n,i,jrow,j;
505:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

508:   VecGetArrayRead(xx,&x);
509:   VecGetArray(yy,&y);
510:   idx  = a->j;
511:   v    = a->a;
512:   ii   = a->i;

514:   for (i=0; i<m; i++) {
515:     jrow        = ii[i];
516:     n           = ii[i+1] - jrow;
517:     sum1        = 0.0;
518:     sum2        = 0.0;
519:     sum3        = 0.0;
520:     sum4        = 0.0;
521:     nonzerorow += (n>0);
522:     for (j=0; j<n; j++) {
523:       sum1 += v[jrow]*x[4*idx[jrow]];
524:       sum2 += v[jrow]*x[4*idx[jrow]+1];
525:       sum3 += v[jrow]*x[4*idx[jrow]+2];
526:       sum4 += v[jrow]*x[4*idx[jrow]+3];
527:       jrow++;
528:     }
529:     y[4*i]   = sum1;
530:     y[4*i+1] = sum2;
531:     y[4*i+2] = sum3;
532:     y[4*i+3] = sum4;
533:   }

535:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
536:   VecRestoreArrayRead(xx,&x);
537:   VecRestoreArray(yy,&y);
538:   return(0);
539: }

541: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
542: {
543:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
544:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
545:   const PetscScalar *x,*v;
546:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
547:   PetscErrorCode    ierr;
548:   PetscInt          n,i;
549:   const PetscInt    m = b->AIJ->rmap->n,*idx;

552:   VecSet(yy,0.0);
553:   VecGetArrayRead(xx,&x);
554:   VecGetArray(yy,&y);
555:   for (i=0; i<m; i++) {
556:     idx    = a->j + a->i[i];
557:     v      = a->a + a->i[i];
558:     n      = a->i[i+1] - a->i[i];
559:     alpha1 = x[4*i];
560:     alpha2 = x[4*i+1];
561:     alpha3 = x[4*i+2];
562:     alpha4 = x[4*i+3];
563:     while (n-->0) {
564:       y[4*(*idx)]   += alpha1*(*v);
565:       y[4*(*idx)+1] += alpha2*(*v);
566:       y[4*(*idx)+2] += alpha3*(*v);
567:       y[4*(*idx)+3] += alpha4*(*v);
568:       idx++; v++;
569:     }
570:   }
571:   PetscLogFlops(8.0*a->nz);
572:   VecRestoreArrayRead(xx,&x);
573:   VecRestoreArray(yy,&y);
574:   return(0);
575: }

577: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
578: {
579:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
580:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
581:   const PetscScalar *x,*v;
582:   PetscScalar       *y,sum1, sum2, sum3, sum4;
583:   PetscErrorCode    ierr;
584:   PetscInt          n,i,jrow,j;
585:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

588:   if (yy != zz) {VecCopy(yy,zz);}
589:   VecGetArrayRead(xx,&x);
590:   VecGetArray(zz,&y);
591:   idx  = a->j;
592:   v    = a->a;
593:   ii   = a->i;

595:   for (i=0; i<m; i++) {
596:     jrow = ii[i];
597:     n    = ii[i+1] - jrow;
598:     sum1 = 0.0;
599:     sum2 = 0.0;
600:     sum3 = 0.0;
601:     sum4 = 0.0;
602:     for (j=0; j<n; j++) {
603:       sum1 += v[jrow]*x[4*idx[jrow]];
604:       sum2 += v[jrow]*x[4*idx[jrow]+1];
605:       sum3 += v[jrow]*x[4*idx[jrow]+2];
606:       sum4 += v[jrow]*x[4*idx[jrow]+3];
607:       jrow++;
608:     }
609:     y[4*i]   += sum1;
610:     y[4*i+1] += sum2;
611:     y[4*i+2] += sum3;
612:     y[4*i+3] += sum4;
613:   }

615:   PetscLogFlops(8.0*a->nz);
616:   VecRestoreArrayRead(xx,&x);
617:   VecRestoreArray(zz,&y);
618:   return(0);
619: }
620: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
621: {
622:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
623:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
624:   const PetscScalar *x,*v;
625:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
626:   PetscErrorCode    ierr;
627:   PetscInt          n,i;
628:   const PetscInt    m = b->AIJ->rmap->n,*idx;

631:   if (yy != zz) {VecCopy(yy,zz);}
632:   VecGetArrayRead(xx,&x);
633:   VecGetArray(zz,&y);

635:   for (i=0; i<m; i++) {
636:     idx    = a->j + a->i[i];
637:     v      = a->a + a->i[i];
638:     n      = a->i[i+1] - a->i[i];
639:     alpha1 = x[4*i];
640:     alpha2 = x[4*i+1];
641:     alpha3 = x[4*i+2];
642:     alpha4 = x[4*i+3];
643:     while (n-->0) {
644:       y[4*(*idx)]   += alpha1*(*v);
645:       y[4*(*idx)+1] += alpha2*(*v);
646:       y[4*(*idx)+2] += alpha3*(*v);
647:       y[4*(*idx)+3] += alpha4*(*v);
648:       idx++; v++;
649:     }
650:   }
651:   PetscLogFlops(8.0*a->nz);
652:   VecRestoreArrayRead(xx,&x);
653:   VecRestoreArray(zz,&y);
654:   return(0);
655: }
656: /* ------------------------------------------------------------------------------*/

658: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
659: {
660:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
661:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
662:   const PetscScalar *x,*v;
663:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
664:   PetscErrorCode    ierr;
665:   PetscInt          nonzerorow=0,n,i,jrow,j;
666:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

669:   VecGetArrayRead(xx,&x);
670:   VecGetArray(yy,&y);
671:   idx  = a->j;
672:   v    = a->a;
673:   ii   = a->i;

675:   for (i=0; i<m; i++) {
676:     jrow  = ii[i];
677:     n     = ii[i+1] - jrow;
678:     sum1  = 0.0;
679:     sum2  = 0.0;
680:     sum3  = 0.0;
681:     sum4  = 0.0;
682:     sum5  = 0.0;

684:     nonzerorow += (n>0);
685:     for (j=0; j<n; j++) {
686:       sum1 += v[jrow]*x[5*idx[jrow]];
687:       sum2 += v[jrow]*x[5*idx[jrow]+1];
688:       sum3 += v[jrow]*x[5*idx[jrow]+2];
689:       sum4 += v[jrow]*x[5*idx[jrow]+3];
690:       sum5 += v[jrow]*x[5*idx[jrow]+4];
691:       jrow++;
692:     }
693:     y[5*i]   = sum1;
694:     y[5*i+1] = sum2;
695:     y[5*i+2] = sum3;
696:     y[5*i+3] = sum4;
697:     y[5*i+4] = sum5;
698:   }

700:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
701:   VecRestoreArrayRead(xx,&x);
702:   VecRestoreArray(yy,&y);
703:   return(0);
704: }

706: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
707: {
708:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
709:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
710:   const PetscScalar *x,*v;
711:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
712:   PetscErrorCode    ierr;
713:   PetscInt          n,i;
714:   const PetscInt    m = b->AIJ->rmap->n,*idx;

717:   VecSet(yy,0.0);
718:   VecGetArrayRead(xx,&x);
719:   VecGetArray(yy,&y);

721:   for (i=0; i<m; i++) {
722:     idx    = a->j + a->i[i];
723:     v      = a->a + a->i[i];
724:     n      = a->i[i+1] - a->i[i];
725:     alpha1 = x[5*i];
726:     alpha2 = x[5*i+1];
727:     alpha3 = x[5*i+2];
728:     alpha4 = x[5*i+3];
729:     alpha5 = x[5*i+4];
730:     while (n-->0) {
731:       y[5*(*idx)]   += alpha1*(*v);
732:       y[5*(*idx)+1] += alpha2*(*v);
733:       y[5*(*idx)+2] += alpha3*(*v);
734:       y[5*(*idx)+3] += alpha4*(*v);
735:       y[5*(*idx)+4] += alpha5*(*v);
736:       idx++; v++;
737:     }
738:   }
739:   PetscLogFlops(10.0*a->nz);
740:   VecRestoreArrayRead(xx,&x);
741:   VecRestoreArray(yy,&y);
742:   return(0);
743: }

745: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
746: {
747:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
748:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
749:   const PetscScalar *x,*v;
750:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
751:   PetscErrorCode    ierr;
752:   PetscInt          n,i,jrow,j;
753:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

756:   if (yy != zz) {VecCopy(yy,zz);}
757:   VecGetArrayRead(xx,&x);
758:   VecGetArray(zz,&y);
759:   idx  = a->j;
760:   v    = a->a;
761:   ii   = a->i;

763:   for (i=0; i<m; i++) {
764:     jrow = ii[i];
765:     n    = ii[i+1] - jrow;
766:     sum1 = 0.0;
767:     sum2 = 0.0;
768:     sum3 = 0.0;
769:     sum4 = 0.0;
770:     sum5 = 0.0;
771:     for (j=0; j<n; j++) {
772:       sum1 += v[jrow]*x[5*idx[jrow]];
773:       sum2 += v[jrow]*x[5*idx[jrow]+1];
774:       sum3 += v[jrow]*x[5*idx[jrow]+2];
775:       sum4 += v[jrow]*x[5*idx[jrow]+3];
776:       sum5 += v[jrow]*x[5*idx[jrow]+4];
777:       jrow++;
778:     }
779:     y[5*i]   += sum1;
780:     y[5*i+1] += sum2;
781:     y[5*i+2] += sum3;
782:     y[5*i+3] += sum4;
783:     y[5*i+4] += sum5;
784:   }

786:   PetscLogFlops(10.0*a->nz);
787:   VecRestoreArrayRead(xx,&x);
788:   VecRestoreArray(zz,&y);
789:   return(0);
790: }

792: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
793: {
794:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
795:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
796:   const PetscScalar *x,*v;
797:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
798:   PetscErrorCode    ierr;
799:   PetscInt          n,i;
800:   const PetscInt    m = b->AIJ->rmap->n,*idx;

803:   if (yy != zz) {VecCopy(yy,zz);}
804:   VecGetArrayRead(xx,&x);
805:   VecGetArray(zz,&y);

807:   for (i=0; i<m; i++) {
808:     idx    = a->j + a->i[i];
809:     v      = a->a + a->i[i];
810:     n      = a->i[i+1] - a->i[i];
811:     alpha1 = x[5*i];
812:     alpha2 = x[5*i+1];
813:     alpha3 = x[5*i+2];
814:     alpha4 = x[5*i+3];
815:     alpha5 = x[5*i+4];
816:     while (n-->0) {
817:       y[5*(*idx)]   += alpha1*(*v);
818:       y[5*(*idx)+1] += alpha2*(*v);
819:       y[5*(*idx)+2] += alpha3*(*v);
820:       y[5*(*idx)+3] += alpha4*(*v);
821:       y[5*(*idx)+4] += alpha5*(*v);
822:       idx++; v++;
823:     }
824:   }
825:   PetscLogFlops(10.0*a->nz);
826:   VecRestoreArrayRead(xx,&x);
827:   VecRestoreArray(zz,&y);
828:   return(0);
829: }

831: /* ------------------------------------------------------------------------------*/
832: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
833: {
834:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
835:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
836:   const PetscScalar *x,*v;
837:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
838:   PetscErrorCode    ierr;
839:   PetscInt          nonzerorow=0,n,i,jrow,j;
840:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;

843:   VecGetArrayRead(xx,&x);
844:   VecGetArray(yy,&y);
845:   idx  = a->j;
846:   v    = a->a;
847:   ii   = a->i;

849:   for (i=0; i<m; i++) {
850:     jrow  = ii[i];
851:     n     = ii[i+1] - jrow;
852:     sum1  = 0.0;
853:     sum2  = 0.0;
854:     sum3  = 0.0;
855:     sum4  = 0.0;
856:     sum5  = 0.0;
857:     sum6  = 0.0;

859:     nonzerorow += (n>0);
860:     for (j=0; j<n; j++) {
861:       sum1 += v[jrow]*x[6*idx[jrow]];
862:       sum2 += v[jrow]*x[6*idx[jrow]+1];
863:       sum3 += v[jrow]*x[6*idx[jrow]+2];
864:       sum4 += v[jrow]*x[6*idx[jrow]+3];
865:       sum5 += v[jrow]*x[6*idx[jrow]+4];
866:       sum6 += v[jrow]*x[6*idx[jrow]+5];
867:       jrow++;
868:     }
869:     y[6*i]   = sum1;
870:     y[6*i+1] = sum2;
871:     y[6*i+2] = sum3;
872:     y[6*i+3] = sum4;
873:     y[6*i+4] = sum5;
874:     y[6*i+5] = sum6;
875:   }

877:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
878:   VecRestoreArrayRead(xx,&x);
879:   VecRestoreArray(yy,&y);
880:   return(0);
881: }

883: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
884: {
885:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
886:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
887:   const PetscScalar *x,*v;
888:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
889:   PetscErrorCode    ierr;
890:   PetscInt          n,i;
891:   const PetscInt    m = b->AIJ->rmap->n,*idx;

894:   VecSet(yy,0.0);
895:   VecGetArrayRead(xx,&x);
896:   VecGetArray(yy,&y);

898:   for (i=0; i<m; i++) {
899:     idx    = a->j + a->i[i];
900:     v      = a->a + a->i[i];
901:     n      = a->i[i+1] - a->i[i];
902:     alpha1 = x[6*i];
903:     alpha2 = x[6*i+1];
904:     alpha3 = x[6*i+2];
905:     alpha4 = x[6*i+3];
906:     alpha5 = x[6*i+4];
907:     alpha6 = x[6*i+5];
908:     while (n-->0) {
909:       y[6*(*idx)]   += alpha1*(*v);
910:       y[6*(*idx)+1] += alpha2*(*v);
911:       y[6*(*idx)+2] += alpha3*(*v);
912:       y[6*(*idx)+3] += alpha4*(*v);
913:       y[6*(*idx)+4] += alpha5*(*v);
914:       y[6*(*idx)+5] += alpha6*(*v);
915:       idx++; v++;
916:     }
917:   }
918:   PetscLogFlops(12.0*a->nz);
919:   VecRestoreArrayRead(xx,&x);
920:   VecRestoreArray(yy,&y);
921:   return(0);
922: }

924: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
925: {
926:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
927:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
928:   const PetscScalar *x,*v;
929:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
930:   PetscErrorCode    ierr;
931:   PetscInt          n,i,jrow,j;
932:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

935:   if (yy != zz) {VecCopy(yy,zz);}
936:   VecGetArrayRead(xx,&x);
937:   VecGetArray(zz,&y);
938:   idx  = a->j;
939:   v    = a->a;
940:   ii   = a->i;

942:   for (i=0; i<m; i++) {
943:     jrow = ii[i];
944:     n    = ii[i+1] - jrow;
945:     sum1 = 0.0;
946:     sum2 = 0.0;
947:     sum3 = 0.0;
948:     sum4 = 0.0;
949:     sum5 = 0.0;
950:     sum6 = 0.0;
951:     for (j=0; j<n; j++) {
952:       sum1 += v[jrow]*x[6*idx[jrow]];
953:       sum2 += v[jrow]*x[6*idx[jrow]+1];
954:       sum3 += v[jrow]*x[6*idx[jrow]+2];
955:       sum4 += v[jrow]*x[6*idx[jrow]+3];
956:       sum5 += v[jrow]*x[6*idx[jrow]+4];
957:       sum6 += v[jrow]*x[6*idx[jrow]+5];
958:       jrow++;
959:     }
960:     y[6*i]   += sum1;
961:     y[6*i+1] += sum2;
962:     y[6*i+2] += sum3;
963:     y[6*i+3] += sum4;
964:     y[6*i+4] += sum5;
965:     y[6*i+5] += sum6;
966:   }

968:   PetscLogFlops(12.0*a->nz);
969:   VecRestoreArrayRead(xx,&x);
970:   VecRestoreArray(zz,&y);
971:   return(0);
972: }

974: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
975: {
976:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
977:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
978:   const PetscScalar *x,*v;
979:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
980:   PetscErrorCode    ierr;
981:   PetscInt          n,i;
982:   const PetscInt    m = b->AIJ->rmap->n,*idx;

985:   if (yy != zz) {VecCopy(yy,zz);}
986:   VecGetArrayRead(xx,&x);
987:   VecGetArray(zz,&y);

989:   for (i=0; i<m; i++) {
990:     idx    = a->j + a->i[i];
991:     v      = a->a + a->i[i];
992:     n      = a->i[i+1] - a->i[i];
993:     alpha1 = x[6*i];
994:     alpha2 = x[6*i+1];
995:     alpha3 = x[6*i+2];
996:     alpha4 = x[6*i+3];
997:     alpha5 = x[6*i+4];
998:     alpha6 = x[6*i+5];
999:     while (n-->0) {
1000:       y[6*(*idx)]   += alpha1*(*v);
1001:       y[6*(*idx)+1] += alpha2*(*v);
1002:       y[6*(*idx)+2] += alpha3*(*v);
1003:       y[6*(*idx)+3] += alpha4*(*v);
1004:       y[6*(*idx)+4] += alpha5*(*v);
1005:       y[6*(*idx)+5] += alpha6*(*v);
1006:       idx++; v++;
1007:     }
1008:   }
1009:   PetscLogFlops(12.0*a->nz);
1010:   VecRestoreArrayRead(xx,&x);
1011:   VecRestoreArray(zz,&y);
1012:   return(0);
1013: }

1015: /* ------------------------------------------------------------------------------*/
1016: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1017: {
1018:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1019:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1020:   const PetscScalar *x,*v;
1021:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1022:   PetscErrorCode    ierr;
1023:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1024:   PetscInt          nonzerorow=0,n,i,jrow,j;

1027:   VecGetArrayRead(xx,&x);
1028:   VecGetArray(yy,&y);
1029:   idx  = a->j;
1030:   v    = a->a;
1031:   ii   = a->i;

1033:   for (i=0; i<m; i++) {
1034:     jrow  = ii[i];
1035:     n     = ii[i+1] - jrow;
1036:     sum1  = 0.0;
1037:     sum2  = 0.0;
1038:     sum3  = 0.0;
1039:     sum4  = 0.0;
1040:     sum5  = 0.0;
1041:     sum6  = 0.0;
1042:     sum7  = 0.0;

1044:     nonzerorow += (n>0);
1045:     for (j=0; j<n; j++) {
1046:       sum1 += v[jrow]*x[7*idx[jrow]];
1047:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1048:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1049:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1050:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1051:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1052:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1053:       jrow++;
1054:     }
1055:     y[7*i]   = sum1;
1056:     y[7*i+1] = sum2;
1057:     y[7*i+2] = sum3;
1058:     y[7*i+3] = sum4;
1059:     y[7*i+4] = sum5;
1060:     y[7*i+5] = sum6;
1061:     y[7*i+6] = sum7;
1062:   }

1064:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1065:   VecRestoreArrayRead(xx,&x);
1066:   VecRestoreArray(yy,&y);
1067:   return(0);
1068: }

1070: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1071: {
1072:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1073:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1074:   const PetscScalar *x,*v;
1075:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1076:   PetscErrorCode    ierr;
1077:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1078:   PetscInt          n,i;

1081:   VecSet(yy,0.0);
1082:   VecGetArrayRead(xx,&x);
1083:   VecGetArray(yy,&y);

1085:   for (i=0; i<m; i++) {
1086:     idx    = a->j + a->i[i];
1087:     v      = a->a + a->i[i];
1088:     n      = a->i[i+1] - a->i[i];
1089:     alpha1 = x[7*i];
1090:     alpha2 = x[7*i+1];
1091:     alpha3 = x[7*i+2];
1092:     alpha4 = x[7*i+3];
1093:     alpha5 = x[7*i+4];
1094:     alpha6 = x[7*i+5];
1095:     alpha7 = x[7*i+6];
1096:     while (n-->0) {
1097:       y[7*(*idx)]   += alpha1*(*v);
1098:       y[7*(*idx)+1] += alpha2*(*v);
1099:       y[7*(*idx)+2] += alpha3*(*v);
1100:       y[7*(*idx)+3] += alpha4*(*v);
1101:       y[7*(*idx)+4] += alpha5*(*v);
1102:       y[7*(*idx)+5] += alpha6*(*v);
1103:       y[7*(*idx)+6] += alpha7*(*v);
1104:       idx++; v++;
1105:     }
1106:   }
1107:   PetscLogFlops(14.0*a->nz);
1108:   VecRestoreArrayRead(xx,&x);
1109:   VecRestoreArray(yy,&y);
1110:   return(0);
1111: }

1113: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1114: {
1115:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1116:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1117:   const PetscScalar *x,*v;
1118:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1119:   PetscErrorCode    ierr;
1120:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1121:   PetscInt          n,i,jrow,j;

1124:   if (yy != zz) {VecCopy(yy,zz);}
1125:   VecGetArrayRead(xx,&x);
1126:   VecGetArray(zz,&y);
1127:   idx  = a->j;
1128:   v    = a->a;
1129:   ii   = a->i;

1131:   for (i=0; i<m; i++) {
1132:     jrow = ii[i];
1133:     n    = ii[i+1] - jrow;
1134:     sum1 = 0.0;
1135:     sum2 = 0.0;
1136:     sum3 = 0.0;
1137:     sum4 = 0.0;
1138:     sum5 = 0.0;
1139:     sum6 = 0.0;
1140:     sum7 = 0.0;
1141:     for (j=0; j<n; j++) {
1142:       sum1 += v[jrow]*x[7*idx[jrow]];
1143:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1144:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1145:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1146:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1147:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1148:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1149:       jrow++;
1150:     }
1151:     y[7*i]   += sum1;
1152:     y[7*i+1] += sum2;
1153:     y[7*i+2] += sum3;
1154:     y[7*i+3] += sum4;
1155:     y[7*i+4] += sum5;
1156:     y[7*i+5] += sum6;
1157:     y[7*i+6] += sum7;
1158:   }

1160:   PetscLogFlops(14.0*a->nz);
1161:   VecRestoreArrayRead(xx,&x);
1162:   VecRestoreArray(zz,&y);
1163:   return(0);
1164: }

1166: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1167: {
1168:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1169:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1170:   const PetscScalar *x,*v;
1171:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1172:   PetscErrorCode    ierr;
1173:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1174:   PetscInt          n,i;

1177:   if (yy != zz) {VecCopy(yy,zz);}
1178:   VecGetArrayRead(xx,&x);
1179:   VecGetArray(zz,&y);
1180:   for (i=0; i<m; i++) {
1181:     idx    = a->j + a->i[i];
1182:     v      = a->a + a->i[i];
1183:     n      = a->i[i+1] - a->i[i];
1184:     alpha1 = x[7*i];
1185:     alpha2 = x[7*i+1];
1186:     alpha3 = x[7*i+2];
1187:     alpha4 = x[7*i+3];
1188:     alpha5 = x[7*i+4];
1189:     alpha6 = x[7*i+5];
1190:     alpha7 = x[7*i+6];
1191:     while (n-->0) {
1192:       y[7*(*idx)]   += alpha1*(*v);
1193:       y[7*(*idx)+1] += alpha2*(*v);
1194:       y[7*(*idx)+2] += alpha3*(*v);
1195:       y[7*(*idx)+3] += alpha4*(*v);
1196:       y[7*(*idx)+4] += alpha5*(*v);
1197:       y[7*(*idx)+5] += alpha6*(*v);
1198:       y[7*(*idx)+6] += alpha7*(*v);
1199:       idx++; v++;
1200:     }
1201:   }
1202:   PetscLogFlops(14.0*a->nz);
1203:   VecRestoreArrayRead(xx,&x);
1204:   VecRestoreArray(zz,&y);
1205:   return(0);
1206: }

1208: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1209: {
1210:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1211:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1212:   const PetscScalar *x,*v;
1213:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1214:   PetscErrorCode    ierr;
1215:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1216:   PetscInt          nonzerorow=0,n,i,jrow,j;

1219:   VecGetArrayRead(xx,&x);
1220:   VecGetArray(yy,&y);
1221:   idx  = a->j;
1222:   v    = a->a;
1223:   ii   = a->i;

1225:   for (i=0; i<m; i++) {
1226:     jrow  = ii[i];
1227:     n     = ii[i+1] - jrow;
1228:     sum1  = 0.0;
1229:     sum2  = 0.0;
1230:     sum3  = 0.0;
1231:     sum4  = 0.0;
1232:     sum5  = 0.0;
1233:     sum6  = 0.0;
1234:     sum7  = 0.0;
1235:     sum8  = 0.0;

1237:     nonzerorow += (n>0);
1238:     for (j=0; j<n; j++) {
1239:       sum1 += v[jrow]*x[8*idx[jrow]];
1240:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1241:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1242:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1243:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1244:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1245:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1246:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1247:       jrow++;
1248:     }
1249:     y[8*i]   = sum1;
1250:     y[8*i+1] = sum2;
1251:     y[8*i+2] = sum3;
1252:     y[8*i+3] = sum4;
1253:     y[8*i+4] = sum5;
1254:     y[8*i+5] = sum6;
1255:     y[8*i+6] = sum7;
1256:     y[8*i+7] = sum8;
1257:   }

1259:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1260:   VecRestoreArrayRead(xx,&x);
1261:   VecRestoreArray(yy,&y);
1262:   return(0);
1263: }

1265: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1266: {
1267:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1268:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1269:   const PetscScalar *x,*v;
1270:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1271:   PetscErrorCode    ierr;
1272:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1273:   PetscInt          n,i;

1276:   VecSet(yy,0.0);
1277:   VecGetArrayRead(xx,&x);
1278:   VecGetArray(yy,&y);

1280:   for (i=0; i<m; i++) {
1281:     idx    = a->j + a->i[i];
1282:     v      = a->a + a->i[i];
1283:     n      = a->i[i+1] - a->i[i];
1284:     alpha1 = x[8*i];
1285:     alpha2 = x[8*i+1];
1286:     alpha3 = x[8*i+2];
1287:     alpha4 = x[8*i+3];
1288:     alpha5 = x[8*i+4];
1289:     alpha6 = x[8*i+5];
1290:     alpha7 = x[8*i+6];
1291:     alpha8 = x[8*i+7];
1292:     while (n-->0) {
1293:       y[8*(*idx)]   += alpha1*(*v);
1294:       y[8*(*idx)+1] += alpha2*(*v);
1295:       y[8*(*idx)+2] += alpha3*(*v);
1296:       y[8*(*idx)+3] += alpha4*(*v);
1297:       y[8*(*idx)+4] += alpha5*(*v);
1298:       y[8*(*idx)+5] += alpha6*(*v);
1299:       y[8*(*idx)+6] += alpha7*(*v);
1300:       y[8*(*idx)+7] += alpha8*(*v);
1301:       idx++; v++;
1302:     }
1303:   }
1304:   PetscLogFlops(16.0*a->nz);
1305:   VecRestoreArrayRead(xx,&x);
1306:   VecRestoreArray(yy,&y);
1307:   return(0);
1308: }

1310: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1311: {
1312:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1313:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1314:   const PetscScalar *x,*v;
1315:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1316:   PetscErrorCode    ierr;
1317:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1318:   PetscInt          n,i,jrow,j;

1321:   if (yy != zz) {VecCopy(yy,zz);}
1322:   VecGetArrayRead(xx,&x);
1323:   VecGetArray(zz,&y);
1324:   idx  = a->j;
1325:   v    = a->a;
1326:   ii   = a->i;

1328:   for (i=0; i<m; i++) {
1329:     jrow = ii[i];
1330:     n    = ii[i+1] - jrow;
1331:     sum1 = 0.0;
1332:     sum2 = 0.0;
1333:     sum3 = 0.0;
1334:     sum4 = 0.0;
1335:     sum5 = 0.0;
1336:     sum6 = 0.0;
1337:     sum7 = 0.0;
1338:     sum8 = 0.0;
1339:     for (j=0; j<n; j++) {
1340:       sum1 += v[jrow]*x[8*idx[jrow]];
1341:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1342:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1343:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1344:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1345:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1346:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1347:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1348:       jrow++;
1349:     }
1350:     y[8*i]   += sum1;
1351:     y[8*i+1] += sum2;
1352:     y[8*i+2] += sum3;
1353:     y[8*i+3] += sum4;
1354:     y[8*i+4] += sum5;
1355:     y[8*i+5] += sum6;
1356:     y[8*i+6] += sum7;
1357:     y[8*i+7] += sum8;
1358:   }

1360:   PetscLogFlops(16.0*a->nz);
1361:   VecRestoreArrayRead(xx,&x);
1362:   VecRestoreArray(zz,&y);
1363:   return(0);
1364: }

1366: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1367: {
1368:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1369:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1370:   const PetscScalar *x,*v;
1371:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1372:   PetscErrorCode    ierr;
1373:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1374:   PetscInt          n,i;

1377:   if (yy != zz) {VecCopy(yy,zz);}
1378:   VecGetArrayRead(xx,&x);
1379:   VecGetArray(zz,&y);
1380:   for (i=0; i<m; i++) {
1381:     idx    = a->j + a->i[i];
1382:     v      = a->a + a->i[i];
1383:     n      = a->i[i+1] - a->i[i];
1384:     alpha1 = x[8*i];
1385:     alpha2 = x[8*i+1];
1386:     alpha3 = x[8*i+2];
1387:     alpha4 = x[8*i+3];
1388:     alpha5 = x[8*i+4];
1389:     alpha6 = x[8*i+5];
1390:     alpha7 = x[8*i+6];
1391:     alpha8 = x[8*i+7];
1392:     while (n-->0) {
1393:       y[8*(*idx)]   += alpha1*(*v);
1394:       y[8*(*idx)+1] += alpha2*(*v);
1395:       y[8*(*idx)+2] += alpha3*(*v);
1396:       y[8*(*idx)+3] += alpha4*(*v);
1397:       y[8*(*idx)+4] += alpha5*(*v);
1398:       y[8*(*idx)+5] += alpha6*(*v);
1399:       y[8*(*idx)+6] += alpha7*(*v);
1400:       y[8*(*idx)+7] += alpha8*(*v);
1401:       idx++; v++;
1402:     }
1403:   }
1404:   PetscLogFlops(16.0*a->nz);
1405:   VecRestoreArrayRead(xx,&x);
1406:   VecRestoreArray(zz,&y);
1407:   return(0);
1408: }

1410: /* ------------------------------------------------------------------------------*/
1411: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1412: {
1413:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1414:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1415:   const PetscScalar *x,*v;
1416:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1417:   PetscErrorCode    ierr;
1418:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1419:   PetscInt          nonzerorow=0,n,i,jrow,j;

1422:   VecGetArrayRead(xx,&x);
1423:   VecGetArray(yy,&y);
1424:   idx  = a->j;
1425:   v    = a->a;
1426:   ii   = a->i;

1428:   for (i=0; i<m; i++) {
1429:     jrow  = ii[i];
1430:     n     = ii[i+1] - jrow;
1431:     sum1  = 0.0;
1432:     sum2  = 0.0;
1433:     sum3  = 0.0;
1434:     sum4  = 0.0;
1435:     sum5  = 0.0;
1436:     sum6  = 0.0;
1437:     sum7  = 0.0;
1438:     sum8  = 0.0;
1439:     sum9  = 0.0;

1441:     nonzerorow += (n>0);
1442:     for (j=0; j<n; j++) {
1443:       sum1 += v[jrow]*x[9*idx[jrow]];
1444:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1445:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1446:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1447:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1448:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1449:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1450:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1451:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1452:       jrow++;
1453:     }
1454:     y[9*i]   = sum1;
1455:     y[9*i+1] = sum2;
1456:     y[9*i+2] = sum3;
1457:     y[9*i+3] = sum4;
1458:     y[9*i+4] = sum5;
1459:     y[9*i+5] = sum6;
1460:     y[9*i+6] = sum7;
1461:     y[9*i+7] = sum8;
1462:     y[9*i+8] = sum9;
1463:   }

1465:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1466:   VecRestoreArrayRead(xx,&x);
1467:   VecRestoreArray(yy,&y);
1468:   return(0);
1469: }

1471: /* ------------------------------------------------------------------------------*/

1473: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1474: {
1475:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1476:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1477:   const PetscScalar *x,*v;
1478:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1479:   PetscErrorCode    ierr;
1480:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1481:   PetscInt          n,i;

1484:   VecSet(yy,0.0);
1485:   VecGetArrayRead(xx,&x);
1486:   VecGetArray(yy,&y);

1488:   for (i=0; i<m; i++) {
1489:     idx    = a->j + a->i[i];
1490:     v      = a->a + a->i[i];
1491:     n      = a->i[i+1] - a->i[i];
1492:     alpha1 = x[9*i];
1493:     alpha2 = x[9*i+1];
1494:     alpha3 = x[9*i+2];
1495:     alpha4 = x[9*i+3];
1496:     alpha5 = x[9*i+4];
1497:     alpha6 = x[9*i+5];
1498:     alpha7 = x[9*i+6];
1499:     alpha8 = x[9*i+7];
1500:     alpha9 = x[9*i+8];
1501:     while (n-->0) {
1502:       y[9*(*idx)]   += alpha1*(*v);
1503:       y[9*(*idx)+1] += alpha2*(*v);
1504:       y[9*(*idx)+2] += alpha3*(*v);
1505:       y[9*(*idx)+3] += alpha4*(*v);
1506:       y[9*(*idx)+4] += alpha5*(*v);
1507:       y[9*(*idx)+5] += alpha6*(*v);
1508:       y[9*(*idx)+6] += alpha7*(*v);
1509:       y[9*(*idx)+7] += alpha8*(*v);
1510:       y[9*(*idx)+8] += alpha9*(*v);
1511:       idx++; v++;
1512:     }
1513:   }
1514:   PetscLogFlops(18.0*a->nz);
1515:   VecRestoreArrayRead(xx,&x);
1516:   VecRestoreArray(yy,&y);
1517:   return(0);
1518: }

1520: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1521: {
1522:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1523:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1524:   const PetscScalar *x,*v;
1525:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1526:   PetscErrorCode    ierr;
1527:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1528:   PetscInt          n,i,jrow,j;

1531:   if (yy != zz) {VecCopy(yy,zz);}
1532:   VecGetArrayRead(xx,&x);
1533:   VecGetArray(zz,&y);
1534:   idx  = a->j;
1535:   v    = a->a;
1536:   ii   = a->i;

1538:   for (i=0; i<m; i++) {
1539:     jrow = ii[i];
1540:     n    = ii[i+1] - jrow;
1541:     sum1 = 0.0;
1542:     sum2 = 0.0;
1543:     sum3 = 0.0;
1544:     sum4 = 0.0;
1545:     sum5 = 0.0;
1546:     sum6 = 0.0;
1547:     sum7 = 0.0;
1548:     sum8 = 0.0;
1549:     sum9 = 0.0;
1550:     for (j=0; j<n; j++) {
1551:       sum1 += v[jrow]*x[9*idx[jrow]];
1552:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1553:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1554:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1555:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1556:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1557:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1558:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1559:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1560:       jrow++;
1561:     }
1562:     y[9*i]   += sum1;
1563:     y[9*i+1] += sum2;
1564:     y[9*i+2] += sum3;
1565:     y[9*i+3] += sum4;
1566:     y[9*i+4] += sum5;
1567:     y[9*i+5] += sum6;
1568:     y[9*i+6] += sum7;
1569:     y[9*i+7] += sum8;
1570:     y[9*i+8] += sum9;
1571:   }

1573:   PetscLogFlops(18*a->nz);
1574:   VecRestoreArrayRead(xx,&x);
1575:   VecRestoreArray(zz,&y);
1576:   return(0);
1577: }

1579: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1580: {
1581:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1582:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1583:   const PetscScalar *x,*v;
1584:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1585:   PetscErrorCode    ierr;
1586:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1587:   PetscInt          n,i;

1590:   if (yy != zz) {VecCopy(yy,zz);}
1591:   VecGetArrayRead(xx,&x);
1592:   VecGetArray(zz,&y);
1593:   for (i=0; i<m; i++) {
1594:     idx    = a->j + a->i[i];
1595:     v      = a->a + a->i[i];
1596:     n      = a->i[i+1] - a->i[i];
1597:     alpha1 = x[9*i];
1598:     alpha2 = x[9*i+1];
1599:     alpha3 = x[9*i+2];
1600:     alpha4 = x[9*i+3];
1601:     alpha5 = x[9*i+4];
1602:     alpha6 = x[9*i+5];
1603:     alpha7 = x[9*i+6];
1604:     alpha8 = x[9*i+7];
1605:     alpha9 = x[9*i+8];
1606:     while (n-->0) {
1607:       y[9*(*idx)]   += alpha1*(*v);
1608:       y[9*(*idx)+1] += alpha2*(*v);
1609:       y[9*(*idx)+2] += alpha3*(*v);
1610:       y[9*(*idx)+3] += alpha4*(*v);
1611:       y[9*(*idx)+4] += alpha5*(*v);
1612:       y[9*(*idx)+5] += alpha6*(*v);
1613:       y[9*(*idx)+6] += alpha7*(*v);
1614:       y[9*(*idx)+7] += alpha8*(*v);
1615:       y[9*(*idx)+8] += alpha9*(*v);
1616:       idx++; v++;
1617:     }
1618:   }
1619:   PetscLogFlops(18.0*a->nz);
1620:   VecRestoreArrayRead(xx,&x);
1621:   VecRestoreArray(zz,&y);
1622:   return(0);
1623: }
1624: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1625: {
1626:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1627:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1628:   const PetscScalar *x,*v;
1629:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1630:   PetscErrorCode    ierr;
1631:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1632:   PetscInt          nonzerorow=0,n,i,jrow,j;

1635:   VecGetArrayRead(xx,&x);
1636:   VecGetArray(yy,&y);
1637:   idx  = a->j;
1638:   v    = a->a;
1639:   ii   = a->i;

1641:   for (i=0; i<m; i++) {
1642:     jrow  = ii[i];
1643:     n     = ii[i+1] - jrow;
1644:     sum1  = 0.0;
1645:     sum2  = 0.0;
1646:     sum3  = 0.0;
1647:     sum4  = 0.0;
1648:     sum5  = 0.0;
1649:     sum6  = 0.0;
1650:     sum7  = 0.0;
1651:     sum8  = 0.0;
1652:     sum9  = 0.0;
1653:     sum10 = 0.0;

1655:     nonzerorow += (n>0);
1656:     for (j=0; j<n; j++) {
1657:       sum1  += v[jrow]*x[10*idx[jrow]];
1658:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1659:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1660:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1661:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1662:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1663:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1664:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1665:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1666:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1667:       jrow++;
1668:     }
1669:     y[10*i]   = sum1;
1670:     y[10*i+1] = sum2;
1671:     y[10*i+2] = sum3;
1672:     y[10*i+3] = sum4;
1673:     y[10*i+4] = sum5;
1674:     y[10*i+5] = sum6;
1675:     y[10*i+6] = sum7;
1676:     y[10*i+7] = sum8;
1677:     y[10*i+8] = sum9;
1678:     y[10*i+9] = sum10;
1679:   }

1681:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1682:   VecRestoreArrayRead(xx,&x);
1683:   VecRestoreArray(yy,&y);
1684:   return(0);
1685: }

1687: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1688: {
1689:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1690:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1691:   const PetscScalar *x,*v;
1692:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1693:   PetscErrorCode    ierr;
1694:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1695:   PetscInt          n,i,jrow,j;

1698:   if (yy != zz) {VecCopy(yy,zz);}
1699:   VecGetArrayRead(xx,&x);
1700:   VecGetArray(zz,&y);
1701:   idx  = a->j;
1702:   v    = a->a;
1703:   ii   = a->i;

1705:   for (i=0; i<m; i++) {
1706:     jrow  = ii[i];
1707:     n     = ii[i+1] - jrow;
1708:     sum1  = 0.0;
1709:     sum2  = 0.0;
1710:     sum3  = 0.0;
1711:     sum4  = 0.0;
1712:     sum5  = 0.0;
1713:     sum6  = 0.0;
1714:     sum7  = 0.0;
1715:     sum8  = 0.0;
1716:     sum9  = 0.0;
1717:     sum10 = 0.0;
1718:     for (j=0; j<n; j++) {
1719:       sum1  += v[jrow]*x[10*idx[jrow]];
1720:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1721:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1722:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1723:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1724:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1725:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1726:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1727:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1728:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1729:       jrow++;
1730:     }
1731:     y[10*i]   += sum1;
1732:     y[10*i+1] += sum2;
1733:     y[10*i+2] += sum3;
1734:     y[10*i+3] += sum4;
1735:     y[10*i+4] += sum5;
1736:     y[10*i+5] += sum6;
1737:     y[10*i+6] += sum7;
1738:     y[10*i+7] += sum8;
1739:     y[10*i+8] += sum9;
1740:     y[10*i+9] += sum10;
1741:   }

1743:   PetscLogFlops(20.0*a->nz);
1744:   VecRestoreArrayRead(xx,&x);
1745:   VecRestoreArray(yy,&y);
1746:   return(0);
1747: }

1749: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1750: {
1751:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1752:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1753:   const PetscScalar *x,*v;
1754:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1755:   PetscErrorCode    ierr;
1756:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1757:   PetscInt          n,i;

1760:   VecSet(yy,0.0);
1761:   VecGetArrayRead(xx,&x);
1762:   VecGetArray(yy,&y);

1764:   for (i=0; i<m; i++) {
1765:     idx     = a->j + a->i[i];
1766:     v       = a->a + a->i[i];
1767:     n       = a->i[i+1] - a->i[i];
1768:     alpha1  = x[10*i];
1769:     alpha2  = x[10*i+1];
1770:     alpha3  = x[10*i+2];
1771:     alpha4  = x[10*i+3];
1772:     alpha5  = x[10*i+4];
1773:     alpha6  = x[10*i+5];
1774:     alpha7  = x[10*i+6];
1775:     alpha8  = x[10*i+7];
1776:     alpha9  = x[10*i+8];
1777:     alpha10 = x[10*i+9];
1778:     while (n-->0) {
1779:       y[10*(*idx)]   += alpha1*(*v);
1780:       y[10*(*idx)+1] += alpha2*(*v);
1781:       y[10*(*idx)+2] += alpha3*(*v);
1782:       y[10*(*idx)+3] += alpha4*(*v);
1783:       y[10*(*idx)+4] += alpha5*(*v);
1784:       y[10*(*idx)+5] += alpha6*(*v);
1785:       y[10*(*idx)+6] += alpha7*(*v);
1786:       y[10*(*idx)+7] += alpha8*(*v);
1787:       y[10*(*idx)+8] += alpha9*(*v);
1788:       y[10*(*idx)+9] += alpha10*(*v);
1789:       idx++; v++;
1790:     }
1791:   }
1792:   PetscLogFlops(20.0*a->nz);
1793:   VecRestoreArrayRead(xx,&x);
1794:   VecRestoreArray(yy,&y);
1795:   return(0);
1796: }

1798: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1799: {
1800:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1801:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1802:   const PetscScalar *x,*v;
1803:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1804:   PetscErrorCode    ierr;
1805:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1806:   PetscInt          n,i;

1809:   if (yy != zz) {VecCopy(yy,zz);}
1810:   VecGetArrayRead(xx,&x);
1811:   VecGetArray(zz,&y);
1812:   for (i=0; i<m; i++) {
1813:     idx     = a->j + a->i[i];
1814:     v       = a->a + a->i[i];
1815:     n       = a->i[i+1] - a->i[i];
1816:     alpha1  = x[10*i];
1817:     alpha2  = x[10*i+1];
1818:     alpha3  = x[10*i+2];
1819:     alpha4  = x[10*i+3];
1820:     alpha5  = x[10*i+4];
1821:     alpha6  = x[10*i+5];
1822:     alpha7  = x[10*i+6];
1823:     alpha8  = x[10*i+7];
1824:     alpha9  = x[10*i+8];
1825:     alpha10 = x[10*i+9];
1826:     while (n-->0) {
1827:       y[10*(*idx)]   += alpha1*(*v);
1828:       y[10*(*idx)+1] += alpha2*(*v);
1829:       y[10*(*idx)+2] += alpha3*(*v);
1830:       y[10*(*idx)+3] += alpha4*(*v);
1831:       y[10*(*idx)+4] += alpha5*(*v);
1832:       y[10*(*idx)+5] += alpha6*(*v);
1833:       y[10*(*idx)+6] += alpha7*(*v);
1834:       y[10*(*idx)+7] += alpha8*(*v);
1835:       y[10*(*idx)+8] += alpha9*(*v);
1836:       y[10*(*idx)+9] += alpha10*(*v);
1837:       idx++; v++;
1838:     }
1839:   }
1840:   PetscLogFlops(20.0*a->nz);
1841:   VecRestoreArrayRead(xx,&x);
1842:   VecRestoreArray(zz,&y);
1843:   return(0);
1844: }


1847: /*--------------------------------------------------------------------------------------------*/
1848: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1849: {
1850:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1851:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1852:   const PetscScalar *x,*v;
1853:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1854:   PetscErrorCode    ierr;
1855:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
1856:   PetscInt          nonzerorow=0,n,i,jrow,j;

1859:   VecGetArrayRead(xx,&x);
1860:   VecGetArray(yy,&y);
1861:   idx  = a->j;
1862:   v    = a->a;
1863:   ii   = a->i;

1865:   for (i=0; i<m; i++) {
1866:     jrow  = ii[i];
1867:     n     = ii[i+1] - jrow;
1868:     sum1  = 0.0;
1869:     sum2  = 0.0;
1870:     sum3  = 0.0;
1871:     sum4  = 0.0;
1872:     sum5  = 0.0;
1873:     sum6  = 0.0;
1874:     sum7  = 0.0;
1875:     sum8  = 0.0;
1876:     sum9  = 0.0;
1877:     sum10 = 0.0;
1878:     sum11 = 0.0;

1880:     nonzerorow += (n>0);
1881:     for (j=0; j<n; j++) {
1882:       sum1  += v[jrow]*x[11*idx[jrow]];
1883:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1884:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1885:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1886:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1887:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1888:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1889:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1890:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1891:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1892:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1893:       jrow++;
1894:     }
1895:     y[11*i]    = sum1;
1896:     y[11*i+1]  = sum2;
1897:     y[11*i+2]  = sum3;
1898:     y[11*i+3]  = sum4;
1899:     y[11*i+4]  = sum5;
1900:     y[11*i+5]  = sum6;
1901:     y[11*i+6]  = sum7;
1902:     y[11*i+7]  = sum8;
1903:     y[11*i+8]  = sum9;
1904:     y[11*i+9]  = sum10;
1905:     y[11*i+10] = sum11;
1906:   }

1908:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1909:   VecRestoreArrayRead(xx,&x);
1910:   VecRestoreArray(yy,&y);
1911:   return(0);
1912: }

1914: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1915: {
1916:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1917:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1918:   const PetscScalar *x,*v;
1919:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1920:   PetscErrorCode    ierr;
1921:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1922:   PetscInt          n,i,jrow,j;

1925:   if (yy != zz) {VecCopy(yy,zz);}
1926:   VecGetArrayRead(xx,&x);
1927:   VecGetArray(zz,&y);
1928:   idx  = a->j;
1929:   v    = a->a;
1930:   ii   = a->i;

1932:   for (i=0; i<m; i++) {
1933:     jrow  = ii[i];
1934:     n     = ii[i+1] - jrow;
1935:     sum1  = 0.0;
1936:     sum2  = 0.0;
1937:     sum3  = 0.0;
1938:     sum4  = 0.0;
1939:     sum5  = 0.0;
1940:     sum6  = 0.0;
1941:     sum7  = 0.0;
1942:     sum8  = 0.0;
1943:     sum9  = 0.0;
1944:     sum10 = 0.0;
1945:     sum11 = 0.0;
1946:     for (j=0; j<n; j++) {
1947:       sum1  += v[jrow]*x[11*idx[jrow]];
1948:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1949:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1950:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1951:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1952:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1953:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1954:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1955:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1956:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1957:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1958:       jrow++;
1959:     }
1960:     y[11*i]    += sum1;
1961:     y[11*i+1]  += sum2;
1962:     y[11*i+2]  += sum3;
1963:     y[11*i+3]  += sum4;
1964:     y[11*i+4]  += sum5;
1965:     y[11*i+5]  += sum6;
1966:     y[11*i+6]  += sum7;
1967:     y[11*i+7]  += sum8;
1968:     y[11*i+8]  += sum9;
1969:     y[11*i+9]  += sum10;
1970:     y[11*i+10] += sum11;
1971:   }

1973:   PetscLogFlops(22*a->nz);
1974:   VecRestoreArrayRead(xx,&x);
1975:   VecRestoreArray(yy,&y);
1976:   return(0);
1977: }

1979: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1980: {
1981:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1982:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1983:   const PetscScalar *x,*v;
1984:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1985:   PetscErrorCode    ierr;
1986:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1987:   PetscInt          n,i;

1990:   VecSet(yy,0.0);
1991:   VecGetArrayRead(xx,&x);
1992:   VecGetArray(yy,&y);

1994:   for (i=0; i<m; i++) {
1995:     idx     = a->j + a->i[i];
1996:     v       = a->a + a->i[i];
1997:     n       = a->i[i+1] - a->i[i];
1998:     alpha1  = x[11*i];
1999:     alpha2  = x[11*i+1];
2000:     alpha3  = x[11*i+2];
2001:     alpha4  = x[11*i+3];
2002:     alpha5  = x[11*i+4];
2003:     alpha6  = x[11*i+5];
2004:     alpha7  = x[11*i+6];
2005:     alpha8  = x[11*i+7];
2006:     alpha9  = x[11*i+8];
2007:     alpha10 = x[11*i+9];
2008:     alpha11 = x[11*i+10];
2009:     while (n-->0) {
2010:       y[11*(*idx)]    += alpha1*(*v);
2011:       y[11*(*idx)+1]  += alpha2*(*v);
2012:       y[11*(*idx)+2]  += alpha3*(*v);
2013:       y[11*(*idx)+3]  += alpha4*(*v);
2014:       y[11*(*idx)+4]  += alpha5*(*v);
2015:       y[11*(*idx)+5]  += alpha6*(*v);
2016:       y[11*(*idx)+6]  += alpha7*(*v);
2017:       y[11*(*idx)+7]  += alpha8*(*v);
2018:       y[11*(*idx)+8]  += alpha9*(*v);
2019:       y[11*(*idx)+9]  += alpha10*(*v);
2020:       y[11*(*idx)+10] += alpha11*(*v);
2021:       idx++; v++;
2022:     }
2023:   }
2024:   PetscLogFlops(22*a->nz);
2025:   VecRestoreArrayRead(xx,&x);
2026:   VecRestoreArray(yy,&y);
2027:   return(0);
2028: }

2030: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2031: {
2032:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2033:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2034:   const PetscScalar *x,*v;
2035:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2036:   PetscErrorCode    ierr;
2037:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2038:   PetscInt          n,i;

2041:   if (yy != zz) {VecCopy(yy,zz);}
2042:   VecGetArrayRead(xx,&x);
2043:   VecGetArray(zz,&y);
2044:   for (i=0; i<m; i++) {
2045:     idx     = a->j + a->i[i];
2046:     v       = a->a + a->i[i];
2047:     n       = a->i[i+1] - a->i[i];
2048:     alpha1  = x[11*i];
2049:     alpha2  = x[11*i+1];
2050:     alpha3  = x[11*i+2];
2051:     alpha4  = x[11*i+3];
2052:     alpha5  = x[11*i+4];
2053:     alpha6  = x[11*i+5];
2054:     alpha7  = x[11*i+6];
2055:     alpha8  = x[11*i+7];
2056:     alpha9  = x[11*i+8];
2057:     alpha10 = x[11*i+9];
2058:     alpha11 = x[11*i+10];
2059:     while (n-->0) {
2060:       y[11*(*idx)]    += alpha1*(*v);
2061:       y[11*(*idx)+1]  += alpha2*(*v);
2062:       y[11*(*idx)+2]  += alpha3*(*v);
2063:       y[11*(*idx)+3]  += alpha4*(*v);
2064:       y[11*(*idx)+4]  += alpha5*(*v);
2065:       y[11*(*idx)+5]  += alpha6*(*v);
2066:       y[11*(*idx)+6]  += alpha7*(*v);
2067:       y[11*(*idx)+7]  += alpha8*(*v);
2068:       y[11*(*idx)+8]  += alpha9*(*v);
2069:       y[11*(*idx)+9]  += alpha10*(*v);
2070:       y[11*(*idx)+10] += alpha11*(*v);
2071:       idx++; v++;
2072:     }
2073:   }
2074:   PetscLogFlops(22*a->nz);
2075:   VecRestoreArrayRead(xx,&x);
2076:   VecRestoreArray(zz,&y);
2077:   return(0);
2078: }


2081: /*--------------------------------------------------------------------------------------------*/
2082: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2083: {
2084:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2085:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2086:   const PetscScalar *x,*v;
2087:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2088:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2089:   PetscErrorCode    ierr;
2090:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2091:   PetscInt          nonzerorow=0,n,i,jrow,j;

2094:   VecGetArrayRead(xx,&x);
2095:   VecGetArray(yy,&y);
2096:   idx  = a->j;
2097:   v    = a->a;
2098:   ii   = a->i;

2100:   for (i=0; i<m; i++) {
2101:     jrow  = ii[i];
2102:     n     = ii[i+1] - jrow;
2103:     sum1  = 0.0;
2104:     sum2  = 0.0;
2105:     sum3  = 0.0;
2106:     sum4  = 0.0;
2107:     sum5  = 0.0;
2108:     sum6  = 0.0;
2109:     sum7  = 0.0;
2110:     sum8  = 0.0;
2111:     sum9  = 0.0;
2112:     sum10 = 0.0;
2113:     sum11 = 0.0;
2114:     sum12 = 0.0;
2115:     sum13 = 0.0;
2116:     sum14 = 0.0;
2117:     sum15 = 0.0;
2118:     sum16 = 0.0;

2120:     nonzerorow += (n>0);
2121:     for (j=0; j<n; j++) {
2122:       sum1  += v[jrow]*x[16*idx[jrow]];
2123:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2124:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2125:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2126:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2127:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2128:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2129:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2130:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2131:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2132:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2133:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2134:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2135:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2136:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2137:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2138:       jrow++;
2139:     }
2140:     y[16*i]    = sum1;
2141:     y[16*i+1]  = sum2;
2142:     y[16*i+2]  = sum3;
2143:     y[16*i+3]  = sum4;
2144:     y[16*i+4]  = sum5;
2145:     y[16*i+5]  = sum6;
2146:     y[16*i+6]  = sum7;
2147:     y[16*i+7]  = sum8;
2148:     y[16*i+8]  = sum9;
2149:     y[16*i+9]  = sum10;
2150:     y[16*i+10] = sum11;
2151:     y[16*i+11] = sum12;
2152:     y[16*i+12] = sum13;
2153:     y[16*i+13] = sum14;
2154:     y[16*i+14] = sum15;
2155:     y[16*i+15] = sum16;
2156:   }

2158:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2159:   VecRestoreArrayRead(xx,&x);
2160:   VecRestoreArray(yy,&y);
2161:   return(0);
2162: }

2164: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2165: {
2166:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2167:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2168:   const PetscScalar *x,*v;
2169:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2170:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2171:   PetscErrorCode    ierr;
2172:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2173:   PetscInt          n,i;

2176:   VecSet(yy,0.0);
2177:   VecGetArrayRead(xx,&x);
2178:   VecGetArray(yy,&y);

2180:   for (i=0; i<m; i++) {
2181:     idx     = a->j + a->i[i];
2182:     v       = a->a + a->i[i];
2183:     n       = a->i[i+1] - a->i[i];
2184:     alpha1  = x[16*i];
2185:     alpha2  = x[16*i+1];
2186:     alpha3  = x[16*i+2];
2187:     alpha4  = x[16*i+3];
2188:     alpha5  = x[16*i+4];
2189:     alpha6  = x[16*i+5];
2190:     alpha7  = x[16*i+6];
2191:     alpha8  = x[16*i+7];
2192:     alpha9  = x[16*i+8];
2193:     alpha10 = x[16*i+9];
2194:     alpha11 = x[16*i+10];
2195:     alpha12 = x[16*i+11];
2196:     alpha13 = x[16*i+12];
2197:     alpha14 = x[16*i+13];
2198:     alpha15 = x[16*i+14];
2199:     alpha16 = x[16*i+15];
2200:     while (n-->0) {
2201:       y[16*(*idx)]    += alpha1*(*v);
2202:       y[16*(*idx)+1]  += alpha2*(*v);
2203:       y[16*(*idx)+2]  += alpha3*(*v);
2204:       y[16*(*idx)+3]  += alpha4*(*v);
2205:       y[16*(*idx)+4]  += alpha5*(*v);
2206:       y[16*(*idx)+5]  += alpha6*(*v);
2207:       y[16*(*idx)+6]  += alpha7*(*v);
2208:       y[16*(*idx)+7]  += alpha8*(*v);
2209:       y[16*(*idx)+8]  += alpha9*(*v);
2210:       y[16*(*idx)+9]  += alpha10*(*v);
2211:       y[16*(*idx)+10] += alpha11*(*v);
2212:       y[16*(*idx)+11] += alpha12*(*v);
2213:       y[16*(*idx)+12] += alpha13*(*v);
2214:       y[16*(*idx)+13] += alpha14*(*v);
2215:       y[16*(*idx)+14] += alpha15*(*v);
2216:       y[16*(*idx)+15] += alpha16*(*v);
2217:       idx++; v++;
2218:     }
2219:   }
2220:   PetscLogFlops(32.0*a->nz);
2221:   VecRestoreArrayRead(xx,&x);
2222:   VecRestoreArray(yy,&y);
2223:   return(0);
2224: }

2226: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2227: {
2228:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2229:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2230:   const PetscScalar *x,*v;
2231:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2232:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2233:   PetscErrorCode    ierr;
2234:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2235:   PetscInt          n,i,jrow,j;

2238:   if (yy != zz) {VecCopy(yy,zz);}
2239:   VecGetArrayRead(xx,&x);
2240:   VecGetArray(zz,&y);
2241:   idx  = a->j;
2242:   v    = a->a;
2243:   ii   = a->i;

2245:   for (i=0; i<m; i++) {
2246:     jrow  = ii[i];
2247:     n     = ii[i+1] - jrow;
2248:     sum1  = 0.0;
2249:     sum2  = 0.0;
2250:     sum3  = 0.0;
2251:     sum4  = 0.0;
2252:     sum5  = 0.0;
2253:     sum6  = 0.0;
2254:     sum7  = 0.0;
2255:     sum8  = 0.0;
2256:     sum9  = 0.0;
2257:     sum10 = 0.0;
2258:     sum11 = 0.0;
2259:     sum12 = 0.0;
2260:     sum13 = 0.0;
2261:     sum14 = 0.0;
2262:     sum15 = 0.0;
2263:     sum16 = 0.0;
2264:     for (j=0; j<n; j++) {
2265:       sum1  += v[jrow]*x[16*idx[jrow]];
2266:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2267:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2268:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2269:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2270:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2271:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2272:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2273:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2274:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2275:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2276:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2277:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2278:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2279:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2280:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2281:       jrow++;
2282:     }
2283:     y[16*i]    += sum1;
2284:     y[16*i+1]  += sum2;
2285:     y[16*i+2]  += sum3;
2286:     y[16*i+3]  += sum4;
2287:     y[16*i+4]  += sum5;
2288:     y[16*i+5]  += sum6;
2289:     y[16*i+6]  += sum7;
2290:     y[16*i+7]  += sum8;
2291:     y[16*i+8]  += sum9;
2292:     y[16*i+9]  += sum10;
2293:     y[16*i+10] += sum11;
2294:     y[16*i+11] += sum12;
2295:     y[16*i+12] += sum13;
2296:     y[16*i+13] += sum14;
2297:     y[16*i+14] += sum15;
2298:     y[16*i+15] += sum16;
2299:   }

2301:   PetscLogFlops(32.0*a->nz);
2302:   VecRestoreArrayRead(xx,&x);
2303:   VecRestoreArray(zz,&y);
2304:   return(0);
2305: }

2307: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2308: {
2309:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2310:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2311:   const PetscScalar *x,*v;
2312:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2313:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2314:   PetscErrorCode    ierr;
2315:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2316:   PetscInt          n,i;

2319:   if (yy != zz) {VecCopy(yy,zz);}
2320:   VecGetArrayRead(xx,&x);
2321:   VecGetArray(zz,&y);
2322:   for (i=0; i<m; i++) {
2323:     idx     = a->j + a->i[i];
2324:     v       = a->a + a->i[i];
2325:     n       = a->i[i+1] - a->i[i];
2326:     alpha1  = x[16*i];
2327:     alpha2  = x[16*i+1];
2328:     alpha3  = x[16*i+2];
2329:     alpha4  = x[16*i+3];
2330:     alpha5  = x[16*i+4];
2331:     alpha6  = x[16*i+5];
2332:     alpha7  = x[16*i+6];
2333:     alpha8  = x[16*i+7];
2334:     alpha9  = x[16*i+8];
2335:     alpha10 = x[16*i+9];
2336:     alpha11 = x[16*i+10];
2337:     alpha12 = x[16*i+11];
2338:     alpha13 = x[16*i+12];
2339:     alpha14 = x[16*i+13];
2340:     alpha15 = x[16*i+14];
2341:     alpha16 = x[16*i+15];
2342:     while (n-->0) {
2343:       y[16*(*idx)]    += alpha1*(*v);
2344:       y[16*(*idx)+1]  += alpha2*(*v);
2345:       y[16*(*idx)+2]  += alpha3*(*v);
2346:       y[16*(*idx)+3]  += alpha4*(*v);
2347:       y[16*(*idx)+4]  += alpha5*(*v);
2348:       y[16*(*idx)+5]  += alpha6*(*v);
2349:       y[16*(*idx)+6]  += alpha7*(*v);
2350:       y[16*(*idx)+7]  += alpha8*(*v);
2351:       y[16*(*idx)+8]  += alpha9*(*v);
2352:       y[16*(*idx)+9]  += alpha10*(*v);
2353:       y[16*(*idx)+10] += alpha11*(*v);
2354:       y[16*(*idx)+11] += alpha12*(*v);
2355:       y[16*(*idx)+12] += alpha13*(*v);
2356:       y[16*(*idx)+13] += alpha14*(*v);
2357:       y[16*(*idx)+14] += alpha15*(*v);
2358:       y[16*(*idx)+15] += alpha16*(*v);
2359:       idx++; v++;
2360:     }
2361:   }
2362:   PetscLogFlops(32.0*a->nz);
2363:   VecRestoreArrayRead(xx,&x);
2364:   VecRestoreArray(zz,&y);
2365:   return(0);
2366: }

2368: /*--------------------------------------------------------------------------------------------*/
2369: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2370: {
2371:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2372:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2373:   const PetscScalar *x,*v;
2374:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2375:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2376:   PetscErrorCode    ierr;
2377:   const PetscInt    m         = b->AIJ->rmap->n,*idx,*ii;
2378:   PetscInt          nonzerorow=0,n,i,jrow,j;

2381:   VecGetArrayRead(xx,&x);
2382:   VecGetArray(yy,&y);
2383:   idx  = a->j;
2384:   v    = a->a;
2385:   ii   = a->i;

2387:   for (i=0; i<m; i++) {
2388:     jrow  = ii[i];
2389:     n     = ii[i+1] - jrow;
2390:     sum1  = 0.0;
2391:     sum2  = 0.0;
2392:     sum3  = 0.0;
2393:     sum4  = 0.0;
2394:     sum5  = 0.0;
2395:     sum6  = 0.0;
2396:     sum7  = 0.0;
2397:     sum8  = 0.0;
2398:     sum9  = 0.0;
2399:     sum10 = 0.0;
2400:     sum11 = 0.0;
2401:     sum12 = 0.0;
2402:     sum13 = 0.0;
2403:     sum14 = 0.0;
2404:     sum15 = 0.0;
2405:     sum16 = 0.0;
2406:     sum17 = 0.0;
2407:     sum18 = 0.0;

2409:     nonzerorow += (n>0);
2410:     for (j=0; j<n; j++) {
2411:       sum1  += v[jrow]*x[18*idx[jrow]];
2412:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2413:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2414:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2415:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2416:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2417:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2418:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2419:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2420:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2421:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2422:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2423:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2424:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2425:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2426:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2427:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2428:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2429:       jrow++;
2430:     }
2431:     y[18*i]    = sum1;
2432:     y[18*i+1]  = sum2;
2433:     y[18*i+2]  = sum3;
2434:     y[18*i+3]  = sum4;
2435:     y[18*i+4]  = sum5;
2436:     y[18*i+5]  = sum6;
2437:     y[18*i+6]  = sum7;
2438:     y[18*i+7]  = sum8;
2439:     y[18*i+8]  = sum9;
2440:     y[18*i+9]  = sum10;
2441:     y[18*i+10] = sum11;
2442:     y[18*i+11] = sum12;
2443:     y[18*i+12] = sum13;
2444:     y[18*i+13] = sum14;
2445:     y[18*i+14] = sum15;
2446:     y[18*i+15] = sum16;
2447:     y[18*i+16] = sum17;
2448:     y[18*i+17] = sum18;
2449:   }

2451:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2452:   VecRestoreArrayRead(xx,&x);
2453:   VecRestoreArray(yy,&y);
2454:   return(0);
2455: }

2457: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2458: {
2459:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2460:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2461:   const PetscScalar *x,*v;
2462:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2463:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2464:   PetscErrorCode    ierr;
2465:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2466:   PetscInt          n,i;

2469:   VecSet(yy,0.0);
2470:   VecGetArrayRead(xx,&x);
2471:   VecGetArray(yy,&y);

2473:   for (i=0; i<m; i++) {
2474:     idx     = a->j + a->i[i];
2475:     v       = a->a + a->i[i];
2476:     n       = a->i[i+1] - a->i[i];
2477:     alpha1  = x[18*i];
2478:     alpha2  = x[18*i+1];
2479:     alpha3  = x[18*i+2];
2480:     alpha4  = x[18*i+3];
2481:     alpha5  = x[18*i+4];
2482:     alpha6  = x[18*i+5];
2483:     alpha7  = x[18*i+6];
2484:     alpha8  = x[18*i+7];
2485:     alpha9  = x[18*i+8];
2486:     alpha10 = x[18*i+9];
2487:     alpha11 = x[18*i+10];
2488:     alpha12 = x[18*i+11];
2489:     alpha13 = x[18*i+12];
2490:     alpha14 = x[18*i+13];
2491:     alpha15 = x[18*i+14];
2492:     alpha16 = x[18*i+15];
2493:     alpha17 = x[18*i+16];
2494:     alpha18 = x[18*i+17];
2495:     while (n-->0) {
2496:       y[18*(*idx)]    += alpha1*(*v);
2497:       y[18*(*idx)+1]  += alpha2*(*v);
2498:       y[18*(*idx)+2]  += alpha3*(*v);
2499:       y[18*(*idx)+3]  += alpha4*(*v);
2500:       y[18*(*idx)+4]  += alpha5*(*v);
2501:       y[18*(*idx)+5]  += alpha6*(*v);
2502:       y[18*(*idx)+6]  += alpha7*(*v);
2503:       y[18*(*idx)+7]  += alpha8*(*v);
2504:       y[18*(*idx)+8]  += alpha9*(*v);
2505:       y[18*(*idx)+9]  += alpha10*(*v);
2506:       y[18*(*idx)+10] += alpha11*(*v);
2507:       y[18*(*idx)+11] += alpha12*(*v);
2508:       y[18*(*idx)+12] += alpha13*(*v);
2509:       y[18*(*idx)+13] += alpha14*(*v);
2510:       y[18*(*idx)+14] += alpha15*(*v);
2511:       y[18*(*idx)+15] += alpha16*(*v);
2512:       y[18*(*idx)+16] += alpha17*(*v);
2513:       y[18*(*idx)+17] += alpha18*(*v);
2514:       idx++; v++;
2515:     }
2516:   }
2517:   PetscLogFlops(36.0*a->nz);
2518:   VecRestoreArrayRead(xx,&x);
2519:   VecRestoreArray(yy,&y);
2520:   return(0);
2521: }

2523: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2524: {
2525:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2526:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2527:   const PetscScalar *x,*v;
2528:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2529:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2530:   PetscErrorCode    ierr;
2531:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2532:   PetscInt          n,i,jrow,j;

2535:   if (yy != zz) {VecCopy(yy,zz);}
2536:   VecGetArrayRead(xx,&x);
2537:   VecGetArray(zz,&y);
2538:   idx  = a->j;
2539:   v    = a->a;
2540:   ii   = a->i;

2542:   for (i=0; i<m; i++) {
2543:     jrow  = ii[i];
2544:     n     = ii[i+1] - jrow;
2545:     sum1  = 0.0;
2546:     sum2  = 0.0;
2547:     sum3  = 0.0;
2548:     sum4  = 0.0;
2549:     sum5  = 0.0;
2550:     sum6  = 0.0;
2551:     sum7  = 0.0;
2552:     sum8  = 0.0;
2553:     sum9  = 0.0;
2554:     sum10 = 0.0;
2555:     sum11 = 0.0;
2556:     sum12 = 0.0;
2557:     sum13 = 0.0;
2558:     sum14 = 0.0;
2559:     sum15 = 0.0;
2560:     sum16 = 0.0;
2561:     sum17 = 0.0;
2562:     sum18 = 0.0;
2563:     for (j=0; j<n; j++) {
2564:       sum1  += v[jrow]*x[18*idx[jrow]];
2565:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2566:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2567:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2568:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2569:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2570:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2571:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2572:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2573:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2574:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2575:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2576:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2577:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2578:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2579:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2580:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2581:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2582:       jrow++;
2583:     }
2584:     y[18*i]    += sum1;
2585:     y[18*i+1]  += sum2;
2586:     y[18*i+2]  += sum3;
2587:     y[18*i+3]  += sum4;
2588:     y[18*i+4]  += sum5;
2589:     y[18*i+5]  += sum6;
2590:     y[18*i+6]  += sum7;
2591:     y[18*i+7]  += sum8;
2592:     y[18*i+8]  += sum9;
2593:     y[18*i+9]  += sum10;
2594:     y[18*i+10] += sum11;
2595:     y[18*i+11] += sum12;
2596:     y[18*i+12] += sum13;
2597:     y[18*i+13] += sum14;
2598:     y[18*i+14] += sum15;
2599:     y[18*i+15] += sum16;
2600:     y[18*i+16] += sum17;
2601:     y[18*i+17] += sum18;
2602:   }

2604:   PetscLogFlops(36.0*a->nz);
2605:   VecRestoreArrayRead(xx,&x);
2606:   VecRestoreArray(zz,&y);
2607:   return(0);
2608: }

2610: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2611: {
2612:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2613:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2614:   const PetscScalar *x,*v;
2615:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2616:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2617:   PetscErrorCode    ierr;
2618:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2619:   PetscInt          n,i;

2622:   if (yy != zz) {VecCopy(yy,zz);}
2623:   VecGetArrayRead(xx,&x);
2624:   VecGetArray(zz,&y);
2625:   for (i=0; i<m; i++) {
2626:     idx     = a->j + a->i[i];
2627:     v       = a->a + a->i[i];
2628:     n       = a->i[i+1] - a->i[i];
2629:     alpha1  = x[18*i];
2630:     alpha2  = x[18*i+1];
2631:     alpha3  = x[18*i+2];
2632:     alpha4  = x[18*i+3];
2633:     alpha5  = x[18*i+4];
2634:     alpha6  = x[18*i+5];
2635:     alpha7  = x[18*i+6];
2636:     alpha8  = x[18*i+7];
2637:     alpha9  = x[18*i+8];
2638:     alpha10 = x[18*i+9];
2639:     alpha11 = x[18*i+10];
2640:     alpha12 = x[18*i+11];
2641:     alpha13 = x[18*i+12];
2642:     alpha14 = x[18*i+13];
2643:     alpha15 = x[18*i+14];
2644:     alpha16 = x[18*i+15];
2645:     alpha17 = x[18*i+16];
2646:     alpha18 = x[18*i+17];
2647:     while (n-->0) {
2648:       y[18*(*idx)]    += alpha1*(*v);
2649:       y[18*(*idx)+1]  += alpha2*(*v);
2650:       y[18*(*idx)+2]  += alpha3*(*v);
2651:       y[18*(*idx)+3]  += alpha4*(*v);
2652:       y[18*(*idx)+4]  += alpha5*(*v);
2653:       y[18*(*idx)+5]  += alpha6*(*v);
2654:       y[18*(*idx)+6]  += alpha7*(*v);
2655:       y[18*(*idx)+7]  += alpha8*(*v);
2656:       y[18*(*idx)+8]  += alpha9*(*v);
2657:       y[18*(*idx)+9]  += alpha10*(*v);
2658:       y[18*(*idx)+10] += alpha11*(*v);
2659:       y[18*(*idx)+11] += alpha12*(*v);
2660:       y[18*(*idx)+12] += alpha13*(*v);
2661:       y[18*(*idx)+13] += alpha14*(*v);
2662:       y[18*(*idx)+14] += alpha15*(*v);
2663:       y[18*(*idx)+15] += alpha16*(*v);
2664:       y[18*(*idx)+16] += alpha17*(*v);
2665:       y[18*(*idx)+17] += alpha18*(*v);
2666:       idx++; v++;
2667:     }
2668:   }
2669:   PetscLogFlops(36.0*a->nz);
2670:   VecRestoreArrayRead(xx,&x);
2671:   VecRestoreArray(zz,&y);
2672:   return(0);
2673: }

2675: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2676: {
2677:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2678:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2679:   const PetscScalar *x,*v;
2680:   PetscScalar       *y,*sums;
2681:   PetscErrorCode    ierr;
2682:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2683:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2686:   VecGetArrayRead(xx,&x);
2687:   VecSet(yy,0.0);
2688:   VecGetArray(yy,&y);
2689:   idx  = a->j;
2690:   v    = a->a;
2691:   ii   = a->i;

2693:   for (i=0; i<m; i++) {
2694:     jrow = ii[i];
2695:     n    = ii[i+1] - jrow;
2696:     sums = y + dof*i;
2697:     for (j=0; j<n; j++) {
2698:       for (k=0; k<dof; k++) {
2699:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2700:       }
2701:       jrow++;
2702:     }
2703:   }

2705:   PetscLogFlops(2.0*dof*a->nz);
2706:   VecRestoreArrayRead(xx,&x);
2707:   VecRestoreArray(yy,&y);
2708:   return(0);
2709: }

2711: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2712: {
2713:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2714:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2715:   const PetscScalar *x,*v;
2716:   PetscScalar       *y,*sums;
2717:   PetscErrorCode    ierr;
2718:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2719:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2722:   if (yy != zz) {VecCopy(yy,zz);}
2723:   VecGetArrayRead(xx,&x);
2724:   VecGetArray(zz,&y);
2725:   idx  = a->j;
2726:   v    = a->a;
2727:   ii   = a->i;

2729:   for (i=0; i<m; i++) {
2730:     jrow = ii[i];
2731:     n    = ii[i+1] - jrow;
2732:     sums = y + dof*i;
2733:     for (j=0; j<n; j++) {
2734:       for (k=0; k<dof; k++) {
2735:         sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2736:       }
2737:       jrow++;
2738:     }
2739:   }

2741:   PetscLogFlops(2.0*dof*a->nz);
2742:   VecRestoreArrayRead(xx,&x);
2743:   VecRestoreArray(zz,&y);
2744:   return(0);
2745: }

2747: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2748: {
2749:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2750:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2751:   const PetscScalar *x,*v,*alpha;
2752:   PetscScalar       *y;
2753:   PetscErrorCode    ierr;
2754:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2755:   PetscInt          n,i,k;

2758:   VecGetArrayRead(xx,&x);
2759:   VecSet(yy,0.0);
2760:   VecGetArray(yy,&y);
2761:   for (i=0; i<m; i++) {
2762:     idx   = a->j + a->i[i];
2763:     v     = a->a + a->i[i];
2764:     n     = a->i[i+1] - a->i[i];
2765:     alpha = x + dof*i;
2766:     while (n-->0) {
2767:       for (k=0; k<dof; k++) {
2768:         y[dof*(*idx)+k] += alpha[k]*(*v);
2769:       }
2770:       idx++; v++;
2771:     }
2772:   }
2773:   PetscLogFlops(2.0*dof*a->nz);
2774:   VecRestoreArrayRead(xx,&x);
2775:   VecRestoreArray(yy,&y);
2776:   return(0);
2777: }

2779: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2780: {
2781:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2782:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2783:   const PetscScalar *x,*v,*alpha;
2784:   PetscScalar       *y;
2785:   PetscErrorCode    ierr;
2786:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2787:   PetscInt          n,i,k;

2790:   if (yy != zz) {VecCopy(yy,zz);}
2791:   VecGetArrayRead(xx,&x);
2792:   VecGetArray(zz,&y);
2793:   for (i=0; i<m; i++) {
2794:     idx   = a->j + a->i[i];
2795:     v     = a->a + a->i[i];
2796:     n     = a->i[i+1] - a->i[i];
2797:     alpha = x + dof*i;
2798:     while (n-->0) {
2799:       for (k=0; k<dof; k++) {
2800:         y[dof*(*idx)+k] += alpha[k]*(*v);
2801:       }
2802:       idx++; v++;
2803:     }
2804:   }
2805:   PetscLogFlops(2.0*dof*a->nz);
2806:   VecRestoreArrayRead(xx,&x);
2807:   VecRestoreArray(zz,&y);
2808:   return(0);
2809: }

2811: /*===================================================================================*/
2812: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2813: {
2814:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2818:   /* start the scatter */
2819:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2820:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2821:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2822:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2823:   return(0);
2824: }

2826: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2827: {
2828:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2832:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2833:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2834:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2835:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2836:   return(0);
2837: }

2839: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2840: {
2841:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2845:   /* start the scatter */
2846:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2847:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2848:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2849:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2850:   return(0);
2851: }

2853: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2854: {
2855:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2859:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2860:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2861:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2862:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2863:   return(0);
2864: }

2866: /* ----------------------------------------------------------------*/
2867: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2868: {
2869:   PetscErrorCode     ierr;
2870:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2871:   Mat_SeqMAIJ        *pp       =(Mat_SeqMAIJ*)PP->data;
2872:   Mat                P         =pp->AIJ;
2873:   Mat_SeqAIJ         *a        =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2874:   PetscInt           *pti,*ptj,*ptJ;
2875:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2876:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2877:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2878:   MatScalar          *ca;
2879:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2882:   /* Get ij structure of P^T */
2883:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2885:   cn = pn*ppdof;
2886:   /* Allocate ci array, arrays for fill computation and */
2887:   /* free space for accumulating nonzero column info */
2888:   PetscMalloc1(cn+1,&ci);
2889:   ci[0] = 0;

2891:   /* Work arrays for rows of P^T*A */
2892:   PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2893:   PetscArrayzero(ptadenserow,an);
2894:   PetscArrayzero(denserow,cn);

2896:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2897:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2898:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2899:   PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2900:   current_space = free_space;

2902:   /* Determine symbolic info for each row of C: */
2903:   for (i=0; i<pn; i++) {
2904:     ptnzi = pti[i+1] - pti[i];
2905:     ptJ   = ptj + pti[i];
2906:     for (dof=0; dof<ppdof; dof++) {
2907:       ptanzi = 0;
2908:       /* Determine symbolic row of PtA: */
2909:       for (j=0; j<ptnzi; j++) {
2910:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2911:         arow = ptJ[j]*ppdof + dof;
2912:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2913:         anzj = ai[arow+1] - ai[arow];
2914:         ajj  = aj + ai[arow];
2915:         for (k=0; k<anzj; k++) {
2916:           if (!ptadenserow[ajj[k]]) {
2917:             ptadenserow[ajj[k]]    = -1;
2918:             ptasparserow[ptanzi++] = ajj[k];
2919:           }
2920:         }
2921:       }
2922:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2923:       ptaj = ptasparserow;
2924:       cnzi = 0;
2925:       for (j=0; j<ptanzi; j++) {
2926:         /* Get offset within block of P */
2927:         pshift = *ptaj%ppdof;
2928:         /* Get block row of P */
2929:         prow = (*ptaj++)/ppdof; /* integer division */
2930:         /* P has same number of nonzeros per row as the compressed form */
2931:         pnzj = pi[prow+1] - pi[prow];
2932:         pjj  = pj + pi[prow];
2933:         for (k=0;k<pnzj;k++) {
2934:           /* Locations in C are shifted by the offset within the block */
2935:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2936:           if (!denserow[pjj[k]*ppdof+pshift]) {
2937:             denserow[pjj[k]*ppdof+pshift] = -1;
2938:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
2939:           }
2940:         }
2941:       }

2943:       /* sort sparserow */
2944:       PetscSortInt(cnzi,sparserow);

2946:       /* If free space is not available, make more free space */
2947:       /* Double the amount of total space in the list */
2948:       if (current_space->local_remaining<cnzi) {
2949:         PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),&current_space);
2950:       }

2952:       /* Copy data into free space, and zero out denserows */
2953:       PetscArraycpy(current_space->array,sparserow,cnzi);

2955:       current_space->array           += cnzi;
2956:       current_space->local_used      += cnzi;
2957:       current_space->local_remaining -= cnzi;

2959:       for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2960:       for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;

2962:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2963:       /*        For now, we will recompute what is needed. */
2964:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2965:     }
2966:   }
2967:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2968:   /* Allocate space for cj, initialize cj, and */
2969:   /* destroy list of free space and other temporary array(s) */
2970:   PetscMalloc1(ci[cn]+1,&cj);
2971:   PetscFreeSpaceContiguous(&free_space,cj);
2972:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);

2974:   /* Allocate space for ca */
2975:   PetscCalloc1(ci[cn]+1,&ca);

2977:   /* put together the new matrix */
2978:   MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
2979:   MatSetBlockSize(*C,pp->dof);

2981:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2982:   /* Since these are PETSc arrays, change flags to free them as necessary. */
2983:   c          = (Mat_SeqAIJ*)((*C)->data);
2984:   c->free_a  = PETSC_TRUE;
2985:   c->free_ij = PETSC_TRUE;
2986:   c->nonew   = 0;

2988:   (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;

2990:   /* Clean up. */
2991:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2992:   return(0);
2993: }

2995: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2996: {
2997:   /* This routine requires testing -- first draft only */
2998:   PetscErrorCode  ierr;
2999:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3000:   Mat             P  =pp->AIJ;
3001:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*) A->data;
3002:   Mat_SeqAIJ      *p = (Mat_SeqAIJ*) P->data;
3003:   Mat_SeqAIJ      *c = (Mat_SeqAIJ*) C->data;
3004:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3005:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3006:   const PetscInt  am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3007:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3008:   const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3009:   MatScalar       *ca=c->a,*caj,*apa;

3012:   /* Allocate temporary array for storage of one row of A*P */
3013:   PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);

3015:   /* Clear old values in C */
3016:   PetscArrayzero(ca,ci[cm]);

3018:   for (i=0; i<am; i++) {
3019:     /* Form sparse row of A*P */
3020:     anzi  = ai[i+1] - ai[i];
3021:     apnzj = 0;
3022:     for (j=0; j<anzi; j++) {
3023:       /* Get offset within block of P */
3024:       pshift = *aj%ppdof;
3025:       /* Get block row of P */
3026:       prow = *aj++/ppdof;   /* integer division */
3027:       pnzj = pi[prow+1] - pi[prow];
3028:       pjj  = pj + pi[prow];
3029:       paj  = pa + pi[prow];
3030:       for (k=0; k<pnzj; k++) {
3031:         poffset = pjj[k]*ppdof+pshift;
3032:         if (!apjdense[poffset]) {
3033:           apjdense[poffset] = -1;
3034:           apj[apnzj++]      = poffset;
3035:         }
3036:         apa[poffset] += (*aa)*paj[k];
3037:       }
3038:       PetscLogFlops(2.0*pnzj);
3039:       aa++;
3040:     }

3042:     /* Sort the j index array for quick sparse axpy. */
3043:     /* Note: a array does not need sorting as it is in dense storage locations. */
3044:     PetscSortInt(apnzj,apj);

3046:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3047:     prow    = i/ppdof; /* integer division */
3048:     pshift  = i%ppdof;
3049:     poffset = pi[prow];
3050:     pnzi    = pi[prow+1] - poffset;
3051:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3052:     pJ = pj+poffset;
3053:     pA = pa+poffset;
3054:     for (j=0; j<pnzi; j++) {
3055:       crow = (*pJ)*ppdof+pshift;
3056:       cjj  = cj + ci[crow];
3057:       caj  = ca + ci[crow];
3058:       pJ++;
3059:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3060:       for (k=0,nextap=0; nextap<apnzj; k++) {
3061:         if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3062:       }
3063:       PetscLogFlops(2.0*apnzj);
3064:       pA++;
3065:     }

3067:     /* Zero the current row info for A*P */
3068:     for (j=0; j<apnzj; j++) {
3069:       apa[apj[j]]      = 0.;
3070:       apjdense[apj[j]] = 0;
3071:     }
3072:   }

3074:   /* Assemble the final matrix and clean up */
3075:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3076:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3077:   PetscFree3(apa,apj,apjdense);
3078:   return(0);
3079: }

3081: PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3082: {

3086:   if (scall == MAT_INITIAL_MATRIX) {
3087:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3088:     MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3089:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3090:   }
3091:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3092:   MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3093:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3094:   return(0);
3095: }

3097: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3098: {
3100:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3101:   return(0);
3102: }

3104: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3105: {
3107:   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3108:   return(0);
3109: }

3111: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);

3113: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3114: {

3118:   if (scall == MAT_INITIAL_MATRIX) {
3119:     PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ");
3120:     MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
3121:   }
3122:   MatPtAP_MPIAIJ_MPIAIJ(A,P,scall,fill,C);
3123:   return(0);
3124: }

3126: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);

3128: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3129: {
3130:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3131:   PetscErrorCode  ierr;


3135:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3136:   return(0);
3137: }

3139: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat*);

3141: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat *C)
3142: {
3143:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3144:   PetscErrorCode  ierr;


3148:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3149:   (*C)->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3150:   return(0);
3151: }

3153: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3154: {
3155:   PetscErrorCode   ierr;
3156:   Mat_MPIAIJ       *c;
3157:   Mat_APMPI        *ap;
3159:   if (scall == MAT_INITIAL_MATRIX) {
3160:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3161:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,fill,C);
3162:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3163:   }

3165:   c  = (Mat_MPIAIJ*)(*C)->data;
3166:   ap = c->ap;
3167:   PetscOptionsBegin(PetscObjectComm((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures","Mat");
3168:   ap->freestruct = PETSC_FALSE;
3169:   PetscOptionsBool("-mat_freeintermediatedatastructures","Free intermediate data structures", "MatFreeIntermediateDataStructures",ap->freestruct,&ap->freestruct, NULL);
3170:   PetscOptionsEnd();

3172:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3173:   (*(*C)->ops->ptapnumeric)(A,P,*C);
3174:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3175:   return(0);
3176: }

3178: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);

3180: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3181: {
3182:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3183:   PetscErrorCode  ierr;


3187:   MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3188:   return(0);
3189: }

3191: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat*);

3193: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat *C)
3194: {
3195:   Mat_MPIMAIJ     *maij = (Mat_MPIMAIJ*)P->data;
3196:   PetscErrorCode  ierr;


3200:   MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3201:   (*C)->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3202:   return(0);
3203: }

3205: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3206: {
3208:   Mat_MPIAIJ     *c;
3209:   Mat_APMPI      *ap;

3212:   if (scall == MAT_INITIAL_MATRIX) {
3213:     PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3214:     MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,fill,C);
3215:     PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3216:   }

3218:   c  = (Mat_MPIAIJ*)(*C)->data;
3219:   ap = c->ap;
3220:   PetscOptionsBegin(PetscObjectComm((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures","Mat");
3221:   ap->freestruct = PETSC_FALSE;
3222:   PetscOptionsBool("-mat_freeintermediatedatastructures","Free intermediate data structures", "MatFreeIntermediateDataStructures",ap->freestruct,&ap->freestruct, NULL);
3223:   PetscOptionsEnd();

3225:   PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3226:   (*(*C)->ops->ptapnumeric)(A,P,*C);
3227:   PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3228:   return(0);
3229: }

3231: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3232: {
3233:   Mat_SeqMAIJ    *b   = (Mat_SeqMAIJ*)A->data;
3234:   Mat            a    = b->AIJ,B;
3235:   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)a->data;
3237:   PetscInt       m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3238:   PetscInt       *cols;
3239:   PetscScalar    *vals;

3242:   MatGetSize(a,&m,&n);
3243:   PetscMalloc1(dof*m,&ilen);
3244:   for (i=0; i<m; i++) {
3245:     nmax = PetscMax(nmax,aij->ilen[i]);
3246:     for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3247:   }
3248:   MatCreate(PETSC_COMM_SELF,&B);
3249:   MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3250:   MatSetType(B,newtype);
3251:   MatSeqAIJSetPreallocation(B,0,ilen);
3252:   PetscFree(ilen);
3253:   PetscMalloc1(nmax,&icols);
3254:   ii   = 0;
3255:   for (i=0; i<m; i++) {
3256:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3257:     for (j=0; j<dof; j++) {
3258:       for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3259:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3260:       ii++;
3261:     }
3262:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3263:   }
3264:   PetscFree(icols);
3265:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3266:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3268:   if (reuse == MAT_INPLACE_MATRIX) {
3269:     MatHeaderReplace(A,&B);
3270:   } else {
3271:     *newmat = B;
3272:   }
3273:   return(0);
3274: }

3276:  #include <../src/mat/impls/aij/mpi/mpiaij.h>

3278: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3279: {
3280:   Mat_MPIMAIJ    *maij   = (Mat_MPIMAIJ*)A->data;
3281:   Mat            MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3282:   Mat            MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3283:   Mat_SeqAIJ     *AIJ    = (Mat_SeqAIJ*) MatAIJ->data;
3284:   Mat_SeqAIJ     *OAIJ   =(Mat_SeqAIJ*) MatOAIJ->data;
3285:   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3286:   PetscInt       dof     = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3287:   PetscInt       *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3288:   PetscInt       rstart,cstart,*garray,ii,k;
3290:   PetscScalar    *vals,*ovals;

3293:   PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3294:   for (i=0; i<A->rmap->n/dof; i++) {
3295:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3296:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3297:     for (j=0; j<dof; j++) {
3298:       dnz[dof*i+j] = AIJ->ilen[i];
3299:       onz[dof*i+j] = OAIJ->ilen[i];
3300:     }
3301:   }
3302:   MatCreate(PetscObjectComm((PetscObject)A),&B);
3303:   MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3304:   MatSetType(B,newtype);
3305:   MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3306:   MatSetBlockSize(B,dof);
3307:   PetscFree2(dnz,onz);

3309:   PetscMalloc2(nmax,&icols,onmax,&oicols);
3310:   rstart = dof*maij->A->rmap->rstart;
3311:   cstart = dof*maij->A->cmap->rstart;
3312:   garray = mpiaij->garray;

3314:   ii = rstart;
3315:   for (i=0; i<A->rmap->n/dof; i++) {
3316:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3317:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3318:     for (j=0; j<dof; j++) {
3319:       for (k=0; k<ncols; k++) {
3320:         icols[k] = cstart + dof*cols[k]+j;
3321:       }
3322:       for (k=0; k<oncols; k++) {
3323:         oicols[k] = dof*garray[ocols[k]]+j;
3324:       }
3325:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3326:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3327:       ii++;
3328:     }
3329:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3330:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3331:   }
3332:   PetscFree2(icols,oicols);

3334:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3335:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3337:   if (reuse == MAT_INPLACE_MATRIX) {
3338:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3339:     ((PetscObject)A)->refct = 1;

3341:     MatHeaderReplace(A,&B);

3343:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3344:   } else {
3345:     *newmat = B;
3346:   }
3347:   return(0);
3348: }

3350: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3351: {
3353:   Mat            A;

3356:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3357:   MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3358:   MatDestroy(&A);
3359:   return(0);
3360: }

3362: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3363: {
3365:   Mat            A;

3368:   MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3369:   MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3370:   MatDestroy(&A);
3371:   return(0);
3372: }

3374: PetscErrorCode MatSetFromOptions_MAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
3375: {
3376:   PetscErrorCode       ierr;
3377:   /* By default, we convert MAIJ to MPIAIJ and then do PtAP.
3378:    * If we convert it to MPIAIJ, it is better to use MPIAIJ at the first place.
3379:    * However, not sure other PETSc developers want to change it or not. So I
3380:    * still leave it as is.
3381:    */
3382:   const char          *algTypes[3] = {"mpiaij","allatonce","allatonce_merged"};
3383:   PetscInt             nalg=3,alg=0;
3384:   PetscMPIInt          size;

3387:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3388:   if (size <= 1) return(0);
3389:   PetscOptionsHead(PetscOptionsObject,"MPIMAIJ options");
3390:   PetscOptionsEList("-matmaijptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,NULL);
3391:   switch (alg) {
3392:   case 0: /* Do not do anything */
3393:     break;
3394:   case 1:
3395:     PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ_allatonce);
3396:     break;
3397:   case 2:
3398:     PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ_allatonce_merged);
3399:     break;
3400:   }
3401:   PetscOptionsTail();
3402:   return(0);
3403: }

3405: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);


3408: /* ---------------------------------------------------------------------------------- */
3409: /*@
3410:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3411:   operations for multicomponent problems.  It interpolates each component the same
3412:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3413:   and MATMPIAIJ for distributed matrices.

3415:   Collective

3417:   Input Parameters:
3418: + A - the AIJ matrix describing the action on blocks
3419: - dof - the block size (number of components per node)

3421:   Output Parameter:
3422: . maij - the new MAIJ matrix

3424:   Operations provided:
3425: + MatMult
3426: . MatMultTranspose
3427: . MatMultAdd
3428: . MatMultTransposeAdd
3429: - MatView

3431:   Level: advanced

3433: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3434: @*/
3435: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3436: {
3438:   PetscMPIInt    size;
3439:   PetscInt       n;
3440:   Mat            B;
3441: #if defined(PETSC_HAVE_CUDA)
3442:   /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3443:   PetscBool      convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3444: #endif

3447:   dof  = PetscAbs(dof);
3448:   PetscObjectReference((PetscObject)A);

3450:   if (dof == 1) *maij = A;
3451:   else {
3452:     MatCreate(PetscObjectComm((PetscObject)A),&B);
3453:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3454:     PetscLayoutSetBlockSize(B->rmap,dof);
3455:     PetscLayoutSetBlockSize(B->cmap,dof);
3456:     PetscLayoutSetUp(B->rmap);
3457:     PetscLayoutSetUp(B->cmap);

3459:     B->assembled = PETSC_TRUE;

3461:     MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3462:     if (size == 1) {
3463:       Mat_SeqMAIJ *b;

3465:       MatSetType(B,MATSEQMAIJ);

3467:       B->ops->setup   = NULL;
3468:       B->ops->destroy = MatDestroy_SeqMAIJ;
3469:       B->ops->view    = MatView_SeqMAIJ;
3470:       b               = (Mat_SeqMAIJ*)B->data;
3471:       b->dof          = dof;
3472:       b->AIJ          = A;

3474:       if (dof == 2) {
3475:         B->ops->mult             = MatMult_SeqMAIJ_2;
3476:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3477:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3478:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3479:       } else if (dof == 3) {
3480:         B->ops->mult             = MatMult_SeqMAIJ_3;
3481:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3482:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3483:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3484:       } else if (dof == 4) {
3485:         B->ops->mult             = MatMult_SeqMAIJ_4;
3486:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3487:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3488:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3489:       } else if (dof == 5) {
3490:         B->ops->mult             = MatMult_SeqMAIJ_5;
3491:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3492:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3493:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3494:       } else if (dof == 6) {
3495:         B->ops->mult             = MatMult_SeqMAIJ_6;
3496:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3497:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3498:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3499:       } else if (dof == 7) {
3500:         B->ops->mult             = MatMult_SeqMAIJ_7;
3501:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3502:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3503:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3504:       } else if (dof == 8) {
3505:         B->ops->mult             = MatMult_SeqMAIJ_8;
3506:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3507:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3508:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3509:       } else if (dof == 9) {
3510:         B->ops->mult             = MatMult_SeqMAIJ_9;
3511:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3512:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3513:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3514:       } else if (dof == 10) {
3515:         B->ops->mult             = MatMult_SeqMAIJ_10;
3516:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3517:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3518:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3519:       } else if (dof == 11) {
3520:         B->ops->mult             = MatMult_SeqMAIJ_11;
3521:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3522:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3523:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3524:       } else if (dof == 16) {
3525:         B->ops->mult             = MatMult_SeqMAIJ_16;
3526:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3527:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3528:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3529:       } else if (dof == 18) {
3530:         B->ops->mult             = MatMult_SeqMAIJ_18;
3531:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3532:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3533:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3534:       } else {
3535:         B->ops->mult             = MatMult_SeqMAIJ_N;
3536:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3537:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3538:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3539:       }
3540: #if defined(PETSC_HAVE_CUDA)
3541:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3542: #endif
3543:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3544:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3545:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3546:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3547:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqmaij_C",MatPtAP_IS_XAIJ);
3548:     } else {
3549:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ*)A->data;
3550:       Mat_MPIMAIJ *b;
3551:       IS          from,to;
3552:       Vec         gvec;

3554:       MatSetType(B,MATMPIMAIJ);

3556:       B->ops->setup            = NULL;
3557:       B->ops->destroy          = MatDestroy_MPIMAIJ;
3558:       B->ops->view             = MatView_MPIMAIJ;

3560:       b      = (Mat_MPIMAIJ*)B->data;
3561:       b->dof = dof;
3562:       b->A   = A;

3564:       MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3565:       MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);

3567:       VecGetSize(mpiaij->lvec,&n);
3568:       VecCreate(PETSC_COMM_SELF,&b->w);
3569:       VecSetSizes(b->w,n*dof,n*dof);
3570:       VecSetBlockSize(b->w,dof);
3571:       VecSetType(b->w,VECSEQ);

3573:       /* create two temporary Index sets for build scatter gather */
3574:       ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3575:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3577:       /* create temporary global vector to generate scatter context */
3578:       VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);

3580:       /* generate the scatter context */
3581:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3583:       ISDestroy(&from);
3584:       ISDestroy(&to);
3585:       VecDestroy(&gvec);

3587:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3588:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3589:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3590:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;

3592: #if defined(PETSC_HAVE_CUDA)
3593:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3594: #endif
3595:       PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3596:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3597:       PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpimaij_C",MatPtAP_IS_XAIJ);
3598:     }
3599:     B->ops->createsubmatrix   = MatCreateSubMatrix_MAIJ;
3600:     B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3601:     B->ops->setfromoptions    = MatSetFromOptions_MAIJ;
3602:     MatSetFromOptions(B);
3603:     MatSetUp(B);
3604: #if defined(PETSC_HAVE_CUDA)
3605:     /* temporary until we have CUDA implementation of MAIJ */
3606:     {
3607:       PetscBool flg;
3608:       if (convert) {
3609:         PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3610:         if (flg) {
3611:           MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3612:         }
3613:       }
3614:     }
3615: #endif
3616:     *maij = B;
3617:     MatViewFromOptions(B,NULL,"-mat_view");
3618:   }

3620:   return(0);
3621: }