Actual source code: maij.c
petsc-3.6.1 2015-08-06
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20: #include <../src/mat/utils/freespace.h>
21: #include <petsc/private/vecimpl.h>
25: /*@
26: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30: Input Parameter:
31: . A - the MAIJ matrix
33: Output Parameter:
34: . B - the AIJ matrix
36: Level: advanced
38: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40: .seealso: MatCreateMAIJ()
41: @*/
42: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
43: {
45: PetscBool ismpimaij,isseqmaij;
48: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
49: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
50: if (ismpimaij) {
51: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53: *B = b->A;
54: } else if (isseqmaij) {
55: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57: *B = b->AIJ;
58: } else {
59: *B = A;
60: }
61: return(0);
62: }
66: /*@C
67: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69: Logically Collective
71: Input Parameter:
72: + A - the MAIJ matrix
73: - dof - the block size for the new matrix
75: Output Parameter:
76: . B - the new MAIJ matrix
78: Level: advanced
80: .seealso: MatCreateMAIJ()
81: @*/
82: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83: {
85: Mat Aij = NULL;
89: MatMAIJGetAIJ(A,&Aij);
90: MatCreateMAIJ(Aij,dof,B);
91: return(0);
92: }
96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97: {
99: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
102: MatDestroy(&b->AIJ);
103: PetscFree(A->data);
104: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
105: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
106: return(0);
107: }
111: PetscErrorCode MatSetUp_MAIJ(Mat A)
112: {
114: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115: return(0);
116: }
120: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121: {
123: Mat B;
126: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
127: MatView(B,viewer);
128: MatDestroy(&B);
129: return(0);
130: }
134: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135: {
137: Mat B;
140: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
141: MatView(B,viewer);
142: MatDestroy(&B);
143: return(0);
144: }
148: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149: {
151: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
154: MatDestroy(&b->AIJ);
155: MatDestroy(&b->OAIJ);
156: MatDestroy(&b->A);
157: VecScatterDestroy(&b->ctx);
158: VecDestroy(&b->w);
159: PetscFree(A->data);
160: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
161: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
162: PetscObjectChangeTypeName((PetscObject)A,0);
163: return(0);
164: }
166: /*MC
167: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168: multicomponent problems, interpolating or restricting each component the same way independently.
169: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
171: Operations provided:
172: . MatMult
173: . MatMultTranspose
174: . MatMultAdd
175: . MatMultTransposeAdd
177: Level: advanced
179: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180: M*/
184: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
185: {
187: Mat_MPIMAIJ *b;
188: PetscMPIInt size;
191: PetscNewLog(A,&b);
192: A->data = (void*)b;
194: PetscMemzero(A->ops,sizeof(struct _MatOps));
196: A->ops->setup = MatSetUp_MAIJ;
198: b->AIJ = 0;
199: b->dof = 0;
200: b->OAIJ = 0;
201: b->ctx = 0;
202: b->w = 0;
203: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
204: if (size == 1) {
205: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
206: } else {
207: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
208: }
209: A->preallocated = PETSC_TRUE;
210: A->assembled = PETSC_TRUE;
211: return(0);
212: }
214: /* --------------------------------------------------------------------------------------*/
217: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
218: {
219: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
220: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
221: const PetscScalar *x,*v;
222: PetscScalar *y, sum1, sum2;
223: PetscErrorCode ierr;
224: PetscInt nonzerorow=0,n,i,jrow,j;
225: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
228: VecGetArrayRead(xx,&x);
229: VecGetArray(yy,&y);
230: idx = a->j;
231: v = a->a;
232: ii = a->i;
234: for (i=0; i<m; i++) {
235: jrow = ii[i];
236: n = ii[i+1] - jrow;
237: sum1 = 0.0;
238: sum2 = 0.0;
240: nonzerorow += (n>0);
241: for (j=0; j<n; j++) {
242: sum1 += v[jrow]*x[2*idx[jrow]];
243: sum2 += v[jrow]*x[2*idx[jrow]+1];
244: jrow++;
245: }
246: y[2*i] = sum1;
247: y[2*i+1] = sum2;
248: }
250: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
251: VecRestoreArrayRead(xx,&x);
252: VecRestoreArray(yy,&y);
253: return(0);
254: }
258: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
259: {
260: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
261: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
262: const PetscScalar *x,*v;
263: PetscScalar *y,alpha1,alpha2;
264: PetscErrorCode ierr;
265: PetscInt n,i;
266: const PetscInt m = b->AIJ->rmap->n,*idx;
269: VecSet(yy,0.0);
270: VecGetArrayRead(xx,&x);
271: VecGetArray(yy,&y);
273: for (i=0; i<m; i++) {
274: idx = a->j + a->i[i];
275: v = a->a + a->i[i];
276: n = a->i[i+1] - a->i[i];
277: alpha1 = x[2*i];
278: alpha2 = x[2*i+1];
279: while (n-->0) {
280: y[2*(*idx)] += alpha1*(*v);
281: y[2*(*idx)+1] += alpha2*(*v);
282: idx++; v++;
283: }
284: }
285: PetscLogFlops(4.0*a->nz);
286: VecRestoreArrayRead(xx,&x);
287: VecRestoreArray(yy,&y);
288: return(0);
289: }
293: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
294: {
295: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
296: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
297: const PetscScalar *x,*v;
298: PetscScalar *y,sum1, sum2;
299: PetscErrorCode ierr;
300: PetscInt n,i,jrow,j;
301: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
304: if (yy != zz) {VecCopy(yy,zz);}
305: VecGetArrayRead(xx,&x);
306: VecGetArray(zz,&y);
307: idx = a->j;
308: v = a->a;
309: ii = a->i;
311: for (i=0; i<m; i++) {
312: jrow = ii[i];
313: n = ii[i+1] - jrow;
314: sum1 = 0.0;
315: sum2 = 0.0;
316: for (j=0; j<n; j++) {
317: sum1 += v[jrow]*x[2*idx[jrow]];
318: sum2 += v[jrow]*x[2*idx[jrow]+1];
319: jrow++;
320: }
321: y[2*i] += sum1;
322: y[2*i+1] += sum2;
323: }
325: PetscLogFlops(4.0*a->nz);
326: VecRestoreArrayRead(xx,&x);
327: VecRestoreArray(zz,&y);
328: return(0);
329: }
332: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
333: {
334: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
335: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
336: const PetscScalar *x,*v;
337: PetscScalar *y,alpha1,alpha2;
338: PetscErrorCode ierr;
339: PetscInt n,i;
340: const PetscInt m = b->AIJ->rmap->n,*idx;
343: if (yy != zz) {VecCopy(yy,zz);}
344: VecGetArrayRead(xx,&x);
345: VecGetArray(zz,&y);
347: for (i=0; i<m; i++) {
348: idx = a->j + a->i[i];
349: v = a->a + a->i[i];
350: n = a->i[i+1] - a->i[i];
351: alpha1 = x[2*i];
352: alpha2 = x[2*i+1];
353: while (n-->0) {
354: y[2*(*idx)] += alpha1*(*v);
355: y[2*(*idx)+1] += alpha2*(*v);
356: idx++; v++;
357: }
358: }
359: PetscLogFlops(4.0*a->nz);
360: VecRestoreArrayRead(xx,&x);
361: VecRestoreArray(zz,&y);
362: return(0);
363: }
364: /* --------------------------------------------------------------------------------------*/
367: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
368: {
369: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
370: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
371: const PetscScalar *x,*v;
372: PetscScalar *y,sum1, sum2, sum3;
373: PetscErrorCode ierr;
374: PetscInt nonzerorow=0,n,i,jrow,j;
375: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
378: VecGetArrayRead(xx,&x);
379: VecGetArray(yy,&y);
380: idx = a->j;
381: v = a->a;
382: ii = a->i;
384: for (i=0; i<m; i++) {
385: jrow = ii[i];
386: n = ii[i+1] - jrow;
387: sum1 = 0.0;
388: sum2 = 0.0;
389: sum3 = 0.0;
391: nonzerorow += (n>0);
392: for (j=0; j<n; j++) {
393: sum1 += v[jrow]*x[3*idx[jrow]];
394: sum2 += v[jrow]*x[3*idx[jrow]+1];
395: sum3 += v[jrow]*x[3*idx[jrow]+2];
396: jrow++;
397: }
398: y[3*i] = sum1;
399: y[3*i+1] = sum2;
400: y[3*i+2] = sum3;
401: }
403: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
404: VecRestoreArrayRead(xx,&x);
405: VecRestoreArray(yy,&y);
406: return(0);
407: }
411: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
412: {
413: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
414: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
415: const PetscScalar *x,*v;
416: PetscScalar *y,alpha1,alpha2,alpha3;
417: PetscErrorCode ierr;
418: PetscInt n,i;
419: const PetscInt m = b->AIJ->rmap->n,*idx;
422: VecSet(yy,0.0);
423: VecGetArrayRead(xx,&x);
424: VecGetArray(yy,&y);
426: for (i=0; i<m; i++) {
427: idx = a->j + a->i[i];
428: v = a->a + a->i[i];
429: n = a->i[i+1] - a->i[i];
430: alpha1 = x[3*i];
431: alpha2 = x[3*i+1];
432: alpha3 = x[3*i+2];
433: while (n-->0) {
434: y[3*(*idx)] += alpha1*(*v);
435: y[3*(*idx)+1] += alpha2*(*v);
436: y[3*(*idx)+2] += alpha3*(*v);
437: idx++; v++;
438: }
439: }
440: PetscLogFlops(6.0*a->nz);
441: VecRestoreArrayRead(xx,&x);
442: VecRestoreArray(yy,&y);
443: return(0);
444: }
448: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
449: {
450: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
451: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
452: const PetscScalar *x,*v;
453: PetscScalar *y,sum1, sum2, sum3;
454: PetscErrorCode ierr;
455: PetscInt n,i,jrow,j;
456: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
459: if (yy != zz) {VecCopy(yy,zz);}
460: VecGetArrayRead(xx,&x);
461: VecGetArray(zz,&y);
462: idx = a->j;
463: v = a->a;
464: ii = a->i;
466: for (i=0; i<m; i++) {
467: jrow = ii[i];
468: n = ii[i+1] - jrow;
469: sum1 = 0.0;
470: sum2 = 0.0;
471: sum3 = 0.0;
472: for (j=0; j<n; j++) {
473: sum1 += v[jrow]*x[3*idx[jrow]];
474: sum2 += v[jrow]*x[3*idx[jrow]+1];
475: sum3 += v[jrow]*x[3*idx[jrow]+2];
476: jrow++;
477: }
478: y[3*i] += sum1;
479: y[3*i+1] += sum2;
480: y[3*i+2] += sum3;
481: }
483: PetscLogFlops(6.0*a->nz);
484: VecRestoreArrayRead(xx,&x);
485: VecRestoreArray(zz,&y);
486: return(0);
487: }
490: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
491: {
492: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
493: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
494: const PetscScalar *x,*v;
495: PetscScalar *y,alpha1,alpha2,alpha3;
496: PetscErrorCode ierr;
497: PetscInt n,i;
498: const PetscInt m = b->AIJ->rmap->n,*idx;
501: if (yy != zz) {VecCopy(yy,zz);}
502: VecGetArrayRead(xx,&x);
503: VecGetArray(zz,&y);
504: for (i=0; i<m; i++) {
505: idx = a->j + a->i[i];
506: v = a->a + a->i[i];
507: n = a->i[i+1] - a->i[i];
508: alpha1 = x[3*i];
509: alpha2 = x[3*i+1];
510: alpha3 = x[3*i+2];
511: while (n-->0) {
512: y[3*(*idx)] += alpha1*(*v);
513: y[3*(*idx)+1] += alpha2*(*v);
514: y[3*(*idx)+2] += alpha3*(*v);
515: idx++; v++;
516: }
517: }
518: PetscLogFlops(6.0*a->nz);
519: VecRestoreArrayRead(xx,&x);
520: VecRestoreArray(zz,&y);
521: return(0);
522: }
524: /* ------------------------------------------------------------------------------*/
527: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
528: {
529: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
530: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
531: const PetscScalar *x,*v;
532: PetscScalar *y,sum1, sum2, sum3, sum4;
533: PetscErrorCode ierr;
534: PetscInt nonzerorow=0,n,i,jrow,j;
535: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
538: VecGetArrayRead(xx,&x);
539: VecGetArray(yy,&y);
540: idx = a->j;
541: v = a->a;
542: ii = a->i;
544: for (i=0; i<m; i++) {
545: jrow = ii[i];
546: n = ii[i+1] - jrow;
547: sum1 = 0.0;
548: sum2 = 0.0;
549: sum3 = 0.0;
550: sum4 = 0.0;
551: nonzerorow += (n>0);
552: for (j=0; j<n; j++) {
553: sum1 += v[jrow]*x[4*idx[jrow]];
554: sum2 += v[jrow]*x[4*idx[jrow]+1];
555: sum3 += v[jrow]*x[4*idx[jrow]+2];
556: sum4 += v[jrow]*x[4*idx[jrow]+3];
557: jrow++;
558: }
559: y[4*i] = sum1;
560: y[4*i+1] = sum2;
561: y[4*i+2] = sum3;
562: y[4*i+3] = sum4;
563: }
565: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
566: VecRestoreArrayRead(xx,&x);
567: VecRestoreArray(yy,&y);
568: return(0);
569: }
573: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
574: {
575: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
576: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
577: const PetscScalar *x,*v;
578: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
579: PetscErrorCode ierr;
580: PetscInt n,i;
581: const PetscInt m = b->AIJ->rmap->n,*idx;
584: VecSet(yy,0.0);
585: VecGetArrayRead(xx,&x);
586: VecGetArray(yy,&y);
587: for (i=0; i<m; i++) {
588: idx = a->j + a->i[i];
589: v = a->a + a->i[i];
590: n = a->i[i+1] - a->i[i];
591: alpha1 = x[4*i];
592: alpha2 = x[4*i+1];
593: alpha3 = x[4*i+2];
594: alpha4 = x[4*i+3];
595: while (n-->0) {
596: y[4*(*idx)] += alpha1*(*v);
597: y[4*(*idx)+1] += alpha2*(*v);
598: y[4*(*idx)+2] += alpha3*(*v);
599: y[4*(*idx)+3] += alpha4*(*v);
600: idx++; v++;
601: }
602: }
603: PetscLogFlops(8.0*a->nz);
604: VecRestoreArrayRead(xx,&x);
605: VecRestoreArray(yy,&y);
606: return(0);
607: }
611: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
612: {
613: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
614: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
615: const PetscScalar *x,*v;
616: PetscScalar *y,sum1, sum2, sum3, sum4;
617: PetscErrorCode ierr;
618: PetscInt n,i,jrow,j;
619: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
622: if (yy != zz) {VecCopy(yy,zz);}
623: VecGetArrayRead(xx,&x);
624: VecGetArray(zz,&y);
625: idx = a->j;
626: v = a->a;
627: ii = a->i;
629: for (i=0; i<m; i++) {
630: jrow = ii[i];
631: n = ii[i+1] - jrow;
632: sum1 = 0.0;
633: sum2 = 0.0;
634: sum3 = 0.0;
635: sum4 = 0.0;
636: for (j=0; j<n; j++) {
637: sum1 += v[jrow]*x[4*idx[jrow]];
638: sum2 += v[jrow]*x[4*idx[jrow]+1];
639: sum3 += v[jrow]*x[4*idx[jrow]+2];
640: sum4 += v[jrow]*x[4*idx[jrow]+3];
641: jrow++;
642: }
643: y[4*i] += sum1;
644: y[4*i+1] += sum2;
645: y[4*i+2] += sum3;
646: y[4*i+3] += sum4;
647: }
649: PetscLogFlops(8.0*a->nz);
650: VecRestoreArrayRead(xx,&x);
651: VecRestoreArray(zz,&y);
652: return(0);
653: }
656: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
657: {
658: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
659: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
660: const PetscScalar *x,*v;
661: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
662: PetscErrorCode ierr;
663: PetscInt n,i;
664: const PetscInt m = b->AIJ->rmap->n,*idx;
667: if (yy != zz) {VecCopy(yy,zz);}
668: VecGetArrayRead(xx,&x);
669: VecGetArray(zz,&y);
671: for (i=0; i<m; i++) {
672: idx = a->j + a->i[i];
673: v = a->a + a->i[i];
674: n = a->i[i+1] - a->i[i];
675: alpha1 = x[4*i];
676: alpha2 = x[4*i+1];
677: alpha3 = x[4*i+2];
678: alpha4 = x[4*i+3];
679: while (n-->0) {
680: y[4*(*idx)] += alpha1*(*v);
681: y[4*(*idx)+1] += alpha2*(*v);
682: y[4*(*idx)+2] += alpha3*(*v);
683: y[4*(*idx)+3] += alpha4*(*v);
684: idx++; v++;
685: }
686: }
687: PetscLogFlops(8.0*a->nz);
688: VecRestoreArrayRead(xx,&x);
689: VecRestoreArray(zz,&y);
690: return(0);
691: }
692: /* ------------------------------------------------------------------------------*/
696: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
697: {
698: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
699: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
700: const PetscScalar *x,*v;
701: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
702: PetscErrorCode ierr;
703: PetscInt nonzerorow=0,n,i,jrow,j;
704: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
707: VecGetArrayRead(xx,&x);
708: VecGetArray(yy,&y);
709: idx = a->j;
710: v = a->a;
711: ii = a->i;
713: for (i=0; i<m; i++) {
714: jrow = ii[i];
715: n = ii[i+1] - jrow;
716: sum1 = 0.0;
717: sum2 = 0.0;
718: sum3 = 0.0;
719: sum4 = 0.0;
720: sum5 = 0.0;
722: nonzerorow += (n>0);
723: for (j=0; j<n; j++) {
724: sum1 += v[jrow]*x[5*idx[jrow]];
725: sum2 += v[jrow]*x[5*idx[jrow]+1];
726: sum3 += v[jrow]*x[5*idx[jrow]+2];
727: sum4 += v[jrow]*x[5*idx[jrow]+3];
728: sum5 += v[jrow]*x[5*idx[jrow]+4];
729: jrow++;
730: }
731: y[5*i] = sum1;
732: y[5*i+1] = sum2;
733: y[5*i+2] = sum3;
734: y[5*i+3] = sum4;
735: y[5*i+4] = sum5;
736: }
738: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
739: VecRestoreArrayRead(xx,&x);
740: VecRestoreArray(yy,&y);
741: return(0);
742: }
746: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
747: {
748: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
749: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
750: const PetscScalar *x,*v;
751: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
752: PetscErrorCode ierr;
753: PetscInt n,i;
754: const PetscInt m = b->AIJ->rmap->n,*idx;
757: VecSet(yy,0.0);
758: VecGetArrayRead(xx,&x);
759: VecGetArray(yy,&y);
761: for (i=0; i<m; i++) {
762: idx = a->j + a->i[i];
763: v = a->a + a->i[i];
764: n = a->i[i+1] - a->i[i];
765: alpha1 = x[5*i];
766: alpha2 = x[5*i+1];
767: alpha3 = x[5*i+2];
768: alpha4 = x[5*i+3];
769: alpha5 = x[5*i+4];
770: while (n-->0) {
771: y[5*(*idx)] += alpha1*(*v);
772: y[5*(*idx)+1] += alpha2*(*v);
773: y[5*(*idx)+2] += alpha3*(*v);
774: y[5*(*idx)+3] += alpha4*(*v);
775: y[5*(*idx)+4] += alpha5*(*v);
776: idx++; v++;
777: }
778: }
779: PetscLogFlops(10.0*a->nz);
780: VecRestoreArrayRead(xx,&x);
781: VecRestoreArray(yy,&y);
782: return(0);
783: }
787: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788: {
789: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
790: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
791: const PetscScalar *x,*v;
792: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
793: PetscErrorCode ierr;
794: PetscInt n,i,jrow,j;
795: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
798: if (yy != zz) {VecCopy(yy,zz);}
799: VecGetArrayRead(xx,&x);
800: VecGetArray(zz,&y);
801: idx = a->j;
802: v = a->a;
803: ii = a->i;
805: for (i=0; i<m; i++) {
806: jrow = ii[i];
807: n = ii[i+1] - jrow;
808: sum1 = 0.0;
809: sum2 = 0.0;
810: sum3 = 0.0;
811: sum4 = 0.0;
812: sum5 = 0.0;
813: for (j=0; j<n; j++) {
814: sum1 += v[jrow]*x[5*idx[jrow]];
815: sum2 += v[jrow]*x[5*idx[jrow]+1];
816: sum3 += v[jrow]*x[5*idx[jrow]+2];
817: sum4 += v[jrow]*x[5*idx[jrow]+3];
818: sum5 += v[jrow]*x[5*idx[jrow]+4];
819: jrow++;
820: }
821: y[5*i] += sum1;
822: y[5*i+1] += sum2;
823: y[5*i+2] += sum3;
824: y[5*i+3] += sum4;
825: y[5*i+4] += sum5;
826: }
828: PetscLogFlops(10.0*a->nz);
829: VecRestoreArrayRead(xx,&x);
830: VecRestoreArray(zz,&y);
831: return(0);
832: }
836: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
837: {
838: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
839: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
840: const PetscScalar *x,*v;
841: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
842: PetscErrorCode ierr;
843: PetscInt n,i;
844: const PetscInt m = b->AIJ->rmap->n,*idx;
847: if (yy != zz) {VecCopy(yy,zz);}
848: VecGetArrayRead(xx,&x);
849: VecGetArray(zz,&y);
851: for (i=0; i<m; i++) {
852: idx = a->j + a->i[i];
853: v = a->a + a->i[i];
854: n = a->i[i+1] - a->i[i];
855: alpha1 = x[5*i];
856: alpha2 = x[5*i+1];
857: alpha3 = x[5*i+2];
858: alpha4 = x[5*i+3];
859: alpha5 = x[5*i+4];
860: while (n-->0) {
861: y[5*(*idx)] += alpha1*(*v);
862: y[5*(*idx)+1] += alpha2*(*v);
863: y[5*(*idx)+2] += alpha3*(*v);
864: y[5*(*idx)+3] += alpha4*(*v);
865: y[5*(*idx)+4] += alpha5*(*v);
866: idx++; v++;
867: }
868: }
869: PetscLogFlops(10.0*a->nz);
870: VecRestoreArrayRead(xx,&x);
871: VecRestoreArray(zz,&y);
872: return(0);
873: }
875: /* ------------------------------------------------------------------------------*/
878: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879: {
880: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
881: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
882: const PetscScalar *x,*v;
883: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
884: PetscErrorCode ierr;
885: PetscInt nonzerorow=0,n,i,jrow,j;
886: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
889: VecGetArrayRead(xx,&x);
890: VecGetArray(yy,&y);
891: idx = a->j;
892: v = a->a;
893: ii = a->i;
895: for (i=0; i<m; i++) {
896: jrow = ii[i];
897: n = ii[i+1] - jrow;
898: sum1 = 0.0;
899: sum2 = 0.0;
900: sum3 = 0.0;
901: sum4 = 0.0;
902: sum5 = 0.0;
903: sum6 = 0.0;
905: nonzerorow += (n>0);
906: for (j=0; j<n; j++) {
907: sum1 += v[jrow]*x[6*idx[jrow]];
908: sum2 += v[jrow]*x[6*idx[jrow]+1];
909: sum3 += v[jrow]*x[6*idx[jrow]+2];
910: sum4 += v[jrow]*x[6*idx[jrow]+3];
911: sum5 += v[jrow]*x[6*idx[jrow]+4];
912: sum6 += v[jrow]*x[6*idx[jrow]+5];
913: jrow++;
914: }
915: y[6*i] = sum1;
916: y[6*i+1] = sum2;
917: y[6*i+2] = sum3;
918: y[6*i+3] = sum4;
919: y[6*i+4] = sum5;
920: y[6*i+5] = sum6;
921: }
923: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
924: VecRestoreArrayRead(xx,&x);
925: VecRestoreArray(yy,&y);
926: return(0);
927: }
931: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
932: {
933: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
934: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
935: const PetscScalar *x,*v;
936: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
937: PetscErrorCode ierr;
938: PetscInt n,i;
939: const PetscInt m = b->AIJ->rmap->n,*idx;
942: VecSet(yy,0.0);
943: VecGetArrayRead(xx,&x);
944: VecGetArray(yy,&y);
946: for (i=0; i<m; i++) {
947: idx = a->j + a->i[i];
948: v = a->a + a->i[i];
949: n = a->i[i+1] - a->i[i];
950: alpha1 = x[6*i];
951: alpha2 = x[6*i+1];
952: alpha3 = x[6*i+2];
953: alpha4 = x[6*i+3];
954: alpha5 = x[6*i+4];
955: alpha6 = x[6*i+5];
956: while (n-->0) {
957: y[6*(*idx)] += alpha1*(*v);
958: y[6*(*idx)+1] += alpha2*(*v);
959: y[6*(*idx)+2] += alpha3*(*v);
960: y[6*(*idx)+3] += alpha4*(*v);
961: y[6*(*idx)+4] += alpha5*(*v);
962: y[6*(*idx)+5] += alpha6*(*v);
963: idx++; v++;
964: }
965: }
966: PetscLogFlops(12.0*a->nz);
967: VecRestoreArrayRead(xx,&x);
968: VecRestoreArray(yy,&y);
969: return(0);
970: }
974: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
975: {
976: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
977: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
978: const PetscScalar *x,*v;
979: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
980: PetscErrorCode ierr;
981: PetscInt n,i,jrow,j;
982: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
985: if (yy != zz) {VecCopy(yy,zz);}
986: VecGetArrayRead(xx,&x);
987: VecGetArray(zz,&y);
988: idx = a->j;
989: v = a->a;
990: ii = a->i;
992: for (i=0; i<m; i++) {
993: jrow = ii[i];
994: n = ii[i+1] - jrow;
995: sum1 = 0.0;
996: sum2 = 0.0;
997: sum3 = 0.0;
998: sum4 = 0.0;
999: sum5 = 0.0;
1000: sum6 = 0.0;
1001: for (j=0; j<n; j++) {
1002: sum1 += v[jrow]*x[6*idx[jrow]];
1003: sum2 += v[jrow]*x[6*idx[jrow]+1];
1004: sum3 += v[jrow]*x[6*idx[jrow]+2];
1005: sum4 += v[jrow]*x[6*idx[jrow]+3];
1006: sum5 += v[jrow]*x[6*idx[jrow]+4];
1007: sum6 += v[jrow]*x[6*idx[jrow]+5];
1008: jrow++;
1009: }
1010: y[6*i] += sum1;
1011: y[6*i+1] += sum2;
1012: y[6*i+2] += sum3;
1013: y[6*i+3] += sum4;
1014: y[6*i+4] += sum5;
1015: y[6*i+5] += sum6;
1016: }
1018: PetscLogFlops(12.0*a->nz);
1019: VecRestoreArrayRead(xx,&x);
1020: VecRestoreArray(zz,&y);
1021: return(0);
1022: }
1026: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1027: {
1028: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1029: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1030: const PetscScalar *x,*v;
1031: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1032: PetscErrorCode ierr;
1033: PetscInt n,i;
1034: const PetscInt m = b->AIJ->rmap->n,*idx;
1037: if (yy != zz) {VecCopy(yy,zz);}
1038: VecGetArrayRead(xx,&x);
1039: VecGetArray(zz,&y);
1041: for (i=0; i<m; i++) {
1042: idx = a->j + a->i[i];
1043: v = a->a + a->i[i];
1044: n = a->i[i+1] - a->i[i];
1045: alpha1 = x[6*i];
1046: alpha2 = x[6*i+1];
1047: alpha3 = x[6*i+2];
1048: alpha4 = x[6*i+3];
1049: alpha5 = x[6*i+4];
1050: alpha6 = x[6*i+5];
1051: while (n-->0) {
1052: y[6*(*idx)] += alpha1*(*v);
1053: y[6*(*idx)+1] += alpha2*(*v);
1054: y[6*(*idx)+2] += alpha3*(*v);
1055: y[6*(*idx)+3] += alpha4*(*v);
1056: y[6*(*idx)+4] += alpha5*(*v);
1057: y[6*(*idx)+5] += alpha6*(*v);
1058: idx++; v++;
1059: }
1060: }
1061: PetscLogFlops(12.0*a->nz);
1062: VecRestoreArrayRead(xx,&x);
1063: VecRestoreArray(zz,&y);
1064: return(0);
1065: }
1067: /* ------------------------------------------------------------------------------*/
1070: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1071: {
1072: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1073: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1074: const PetscScalar *x,*v;
1075: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1076: PetscErrorCode ierr;
1077: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1078: PetscInt nonzerorow=0,n,i,jrow,j;
1081: VecGetArrayRead(xx,&x);
1082: VecGetArray(yy,&y);
1083: idx = a->j;
1084: v = a->a;
1085: ii = a->i;
1087: for (i=0; i<m; i++) {
1088: jrow = ii[i];
1089: n = ii[i+1] - jrow;
1090: sum1 = 0.0;
1091: sum2 = 0.0;
1092: sum3 = 0.0;
1093: sum4 = 0.0;
1094: sum5 = 0.0;
1095: sum6 = 0.0;
1096: sum7 = 0.0;
1098: nonzerorow += (n>0);
1099: for (j=0; j<n; j++) {
1100: sum1 += v[jrow]*x[7*idx[jrow]];
1101: sum2 += v[jrow]*x[7*idx[jrow]+1];
1102: sum3 += v[jrow]*x[7*idx[jrow]+2];
1103: sum4 += v[jrow]*x[7*idx[jrow]+3];
1104: sum5 += v[jrow]*x[7*idx[jrow]+4];
1105: sum6 += v[jrow]*x[7*idx[jrow]+5];
1106: sum7 += v[jrow]*x[7*idx[jrow]+6];
1107: jrow++;
1108: }
1109: y[7*i] = sum1;
1110: y[7*i+1] = sum2;
1111: y[7*i+2] = sum3;
1112: y[7*i+3] = sum4;
1113: y[7*i+4] = sum5;
1114: y[7*i+5] = sum6;
1115: y[7*i+6] = sum7;
1116: }
1118: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1119: VecRestoreArrayRead(xx,&x);
1120: VecRestoreArray(yy,&y);
1121: return(0);
1122: }
1126: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1127: {
1128: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1129: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1130: const PetscScalar *x,*v;
1131: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1132: PetscErrorCode ierr;
1133: const PetscInt m = b->AIJ->rmap->n,*idx;
1134: PetscInt n,i;
1137: VecSet(yy,0.0);
1138: VecGetArrayRead(xx,&x);
1139: VecGetArray(yy,&y);
1141: for (i=0; i<m; i++) {
1142: idx = a->j + a->i[i];
1143: v = a->a + a->i[i];
1144: n = a->i[i+1] - a->i[i];
1145: alpha1 = x[7*i];
1146: alpha2 = x[7*i+1];
1147: alpha3 = x[7*i+2];
1148: alpha4 = x[7*i+3];
1149: alpha5 = x[7*i+4];
1150: alpha6 = x[7*i+5];
1151: alpha7 = x[7*i+6];
1152: while (n-->0) {
1153: y[7*(*idx)] += alpha1*(*v);
1154: y[7*(*idx)+1] += alpha2*(*v);
1155: y[7*(*idx)+2] += alpha3*(*v);
1156: y[7*(*idx)+3] += alpha4*(*v);
1157: y[7*(*idx)+4] += alpha5*(*v);
1158: y[7*(*idx)+5] += alpha6*(*v);
1159: y[7*(*idx)+6] += alpha7*(*v);
1160: idx++; v++;
1161: }
1162: }
1163: PetscLogFlops(14.0*a->nz);
1164: VecRestoreArrayRead(xx,&x);
1165: VecRestoreArray(yy,&y);
1166: return(0);
1167: }
1171: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1172: {
1173: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1174: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1175: const PetscScalar *x,*v;
1176: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1177: PetscErrorCode ierr;
1178: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1179: PetscInt n,i,jrow,j;
1182: if (yy != zz) {VecCopy(yy,zz);}
1183: VecGetArrayRead(xx,&x);
1184: VecGetArray(zz,&y);
1185: idx = a->j;
1186: v = a->a;
1187: ii = a->i;
1189: for (i=0; i<m; i++) {
1190: jrow = ii[i];
1191: n = ii[i+1] - jrow;
1192: sum1 = 0.0;
1193: sum2 = 0.0;
1194: sum3 = 0.0;
1195: sum4 = 0.0;
1196: sum5 = 0.0;
1197: sum6 = 0.0;
1198: sum7 = 0.0;
1199: for (j=0; j<n; j++) {
1200: sum1 += v[jrow]*x[7*idx[jrow]];
1201: sum2 += v[jrow]*x[7*idx[jrow]+1];
1202: sum3 += v[jrow]*x[7*idx[jrow]+2];
1203: sum4 += v[jrow]*x[7*idx[jrow]+3];
1204: sum5 += v[jrow]*x[7*idx[jrow]+4];
1205: sum6 += v[jrow]*x[7*idx[jrow]+5];
1206: sum7 += v[jrow]*x[7*idx[jrow]+6];
1207: jrow++;
1208: }
1209: y[7*i] += sum1;
1210: y[7*i+1] += sum2;
1211: y[7*i+2] += sum3;
1212: y[7*i+3] += sum4;
1213: y[7*i+4] += sum5;
1214: y[7*i+5] += sum6;
1215: y[7*i+6] += sum7;
1216: }
1218: PetscLogFlops(14.0*a->nz);
1219: VecRestoreArrayRead(xx,&x);
1220: VecRestoreArray(zz,&y);
1221: return(0);
1222: }
1226: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1227: {
1228: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1229: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1230: const PetscScalar *x,*v;
1231: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1232: PetscErrorCode ierr;
1233: const PetscInt m = b->AIJ->rmap->n,*idx;
1234: PetscInt n,i;
1237: if (yy != zz) {VecCopy(yy,zz);}
1238: VecGetArrayRead(xx,&x);
1239: VecGetArray(zz,&y);
1240: for (i=0; i<m; i++) {
1241: idx = a->j + a->i[i];
1242: v = a->a + a->i[i];
1243: n = a->i[i+1] - a->i[i];
1244: alpha1 = x[7*i];
1245: alpha2 = x[7*i+1];
1246: alpha3 = x[7*i+2];
1247: alpha4 = x[7*i+3];
1248: alpha5 = x[7*i+4];
1249: alpha6 = x[7*i+5];
1250: alpha7 = x[7*i+6];
1251: while (n-->0) {
1252: y[7*(*idx)] += alpha1*(*v);
1253: y[7*(*idx)+1] += alpha2*(*v);
1254: y[7*(*idx)+2] += alpha3*(*v);
1255: y[7*(*idx)+3] += alpha4*(*v);
1256: y[7*(*idx)+4] += alpha5*(*v);
1257: y[7*(*idx)+5] += alpha6*(*v);
1258: y[7*(*idx)+6] += alpha7*(*v);
1259: idx++; v++;
1260: }
1261: }
1262: PetscLogFlops(14.0*a->nz);
1263: VecRestoreArrayRead(xx,&x);
1264: VecRestoreArray(zz,&y);
1265: return(0);
1266: }
1270: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1271: {
1272: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1273: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1274: const PetscScalar *x,*v;
1275: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1276: PetscErrorCode ierr;
1277: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1278: PetscInt nonzerorow=0,n,i,jrow,j;
1281: VecGetArrayRead(xx,&x);
1282: VecGetArray(yy,&y);
1283: idx = a->j;
1284: v = a->a;
1285: ii = a->i;
1287: for (i=0; i<m; i++) {
1288: jrow = ii[i];
1289: n = ii[i+1] - jrow;
1290: sum1 = 0.0;
1291: sum2 = 0.0;
1292: sum3 = 0.0;
1293: sum4 = 0.0;
1294: sum5 = 0.0;
1295: sum6 = 0.0;
1296: sum7 = 0.0;
1297: sum8 = 0.0;
1299: nonzerorow += (n>0);
1300: for (j=0; j<n; j++) {
1301: sum1 += v[jrow]*x[8*idx[jrow]];
1302: sum2 += v[jrow]*x[8*idx[jrow]+1];
1303: sum3 += v[jrow]*x[8*idx[jrow]+2];
1304: sum4 += v[jrow]*x[8*idx[jrow]+3];
1305: sum5 += v[jrow]*x[8*idx[jrow]+4];
1306: sum6 += v[jrow]*x[8*idx[jrow]+5];
1307: sum7 += v[jrow]*x[8*idx[jrow]+6];
1308: sum8 += v[jrow]*x[8*idx[jrow]+7];
1309: jrow++;
1310: }
1311: y[8*i] = sum1;
1312: y[8*i+1] = sum2;
1313: y[8*i+2] = sum3;
1314: y[8*i+3] = sum4;
1315: y[8*i+4] = sum5;
1316: y[8*i+5] = sum6;
1317: y[8*i+6] = sum7;
1318: y[8*i+7] = sum8;
1319: }
1321: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1322: VecRestoreArrayRead(xx,&x);
1323: VecRestoreArray(yy,&y);
1324: return(0);
1325: }
1329: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1330: {
1331: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1332: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1333: const PetscScalar *x,*v;
1334: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1335: PetscErrorCode ierr;
1336: const PetscInt m = b->AIJ->rmap->n,*idx;
1337: PetscInt n,i;
1340: VecSet(yy,0.0);
1341: VecGetArrayRead(xx,&x);
1342: VecGetArray(yy,&y);
1344: for (i=0; i<m; i++) {
1345: idx = a->j + a->i[i];
1346: v = a->a + a->i[i];
1347: n = a->i[i+1] - a->i[i];
1348: alpha1 = x[8*i];
1349: alpha2 = x[8*i+1];
1350: alpha3 = x[8*i+2];
1351: alpha4 = x[8*i+3];
1352: alpha5 = x[8*i+4];
1353: alpha6 = x[8*i+5];
1354: alpha7 = x[8*i+6];
1355: alpha8 = x[8*i+7];
1356: while (n-->0) {
1357: y[8*(*idx)] += alpha1*(*v);
1358: y[8*(*idx)+1] += alpha2*(*v);
1359: y[8*(*idx)+2] += alpha3*(*v);
1360: y[8*(*idx)+3] += alpha4*(*v);
1361: y[8*(*idx)+4] += alpha5*(*v);
1362: y[8*(*idx)+5] += alpha6*(*v);
1363: y[8*(*idx)+6] += alpha7*(*v);
1364: y[8*(*idx)+7] += alpha8*(*v);
1365: idx++; v++;
1366: }
1367: }
1368: PetscLogFlops(16.0*a->nz);
1369: VecRestoreArrayRead(xx,&x);
1370: VecRestoreArray(yy,&y);
1371: return(0);
1372: }
1376: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1377: {
1378: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1379: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1380: const PetscScalar *x,*v;
1381: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1382: PetscErrorCode ierr;
1383: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1384: PetscInt n,i,jrow,j;
1387: if (yy != zz) {VecCopy(yy,zz);}
1388: VecGetArrayRead(xx,&x);
1389: VecGetArray(zz,&y);
1390: idx = a->j;
1391: v = a->a;
1392: ii = a->i;
1394: for (i=0; i<m; i++) {
1395: jrow = ii[i];
1396: n = ii[i+1] - jrow;
1397: sum1 = 0.0;
1398: sum2 = 0.0;
1399: sum3 = 0.0;
1400: sum4 = 0.0;
1401: sum5 = 0.0;
1402: sum6 = 0.0;
1403: sum7 = 0.0;
1404: sum8 = 0.0;
1405: for (j=0; j<n; j++) {
1406: sum1 += v[jrow]*x[8*idx[jrow]];
1407: sum2 += v[jrow]*x[8*idx[jrow]+1];
1408: sum3 += v[jrow]*x[8*idx[jrow]+2];
1409: sum4 += v[jrow]*x[8*idx[jrow]+3];
1410: sum5 += v[jrow]*x[8*idx[jrow]+4];
1411: sum6 += v[jrow]*x[8*idx[jrow]+5];
1412: sum7 += v[jrow]*x[8*idx[jrow]+6];
1413: sum8 += v[jrow]*x[8*idx[jrow]+7];
1414: jrow++;
1415: }
1416: y[8*i] += sum1;
1417: y[8*i+1] += sum2;
1418: y[8*i+2] += sum3;
1419: y[8*i+3] += sum4;
1420: y[8*i+4] += sum5;
1421: y[8*i+5] += sum6;
1422: y[8*i+6] += sum7;
1423: y[8*i+7] += sum8;
1424: }
1426: PetscLogFlops(16.0*a->nz);
1427: VecRestoreArrayRead(xx,&x);
1428: VecRestoreArray(zz,&y);
1429: return(0);
1430: }
1434: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1435: {
1436: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1437: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1438: const PetscScalar *x,*v;
1439: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1440: PetscErrorCode ierr;
1441: const PetscInt m = b->AIJ->rmap->n,*idx;
1442: PetscInt n,i;
1445: if (yy != zz) {VecCopy(yy,zz);}
1446: VecGetArrayRead(xx,&x);
1447: VecGetArray(zz,&y);
1448: for (i=0; i<m; i++) {
1449: idx = a->j + a->i[i];
1450: v = a->a + a->i[i];
1451: n = a->i[i+1] - a->i[i];
1452: alpha1 = x[8*i];
1453: alpha2 = x[8*i+1];
1454: alpha3 = x[8*i+2];
1455: alpha4 = x[8*i+3];
1456: alpha5 = x[8*i+4];
1457: alpha6 = x[8*i+5];
1458: alpha7 = x[8*i+6];
1459: alpha8 = x[8*i+7];
1460: while (n-->0) {
1461: y[8*(*idx)] += alpha1*(*v);
1462: y[8*(*idx)+1] += alpha2*(*v);
1463: y[8*(*idx)+2] += alpha3*(*v);
1464: y[8*(*idx)+3] += alpha4*(*v);
1465: y[8*(*idx)+4] += alpha5*(*v);
1466: y[8*(*idx)+5] += alpha6*(*v);
1467: y[8*(*idx)+6] += alpha7*(*v);
1468: y[8*(*idx)+7] += alpha8*(*v);
1469: idx++; v++;
1470: }
1471: }
1472: PetscLogFlops(16.0*a->nz);
1473: VecRestoreArrayRead(xx,&x);
1474: VecRestoreArray(zz,&y);
1475: return(0);
1476: }
1478: /* ------------------------------------------------------------------------------*/
1481: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1482: {
1483: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1484: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1485: const PetscScalar *x,*v;
1486: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1487: PetscErrorCode ierr;
1488: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1489: PetscInt nonzerorow=0,n,i,jrow,j;
1492: VecGetArrayRead(xx,&x);
1493: VecGetArray(yy,&y);
1494: idx = a->j;
1495: v = a->a;
1496: ii = a->i;
1498: for (i=0; i<m; i++) {
1499: jrow = ii[i];
1500: n = ii[i+1] - jrow;
1501: sum1 = 0.0;
1502: sum2 = 0.0;
1503: sum3 = 0.0;
1504: sum4 = 0.0;
1505: sum5 = 0.0;
1506: sum6 = 0.0;
1507: sum7 = 0.0;
1508: sum8 = 0.0;
1509: sum9 = 0.0;
1511: nonzerorow += (n>0);
1512: for (j=0; j<n; j++) {
1513: sum1 += v[jrow]*x[9*idx[jrow]];
1514: sum2 += v[jrow]*x[9*idx[jrow]+1];
1515: sum3 += v[jrow]*x[9*idx[jrow]+2];
1516: sum4 += v[jrow]*x[9*idx[jrow]+3];
1517: sum5 += v[jrow]*x[9*idx[jrow]+4];
1518: sum6 += v[jrow]*x[9*idx[jrow]+5];
1519: sum7 += v[jrow]*x[9*idx[jrow]+6];
1520: sum8 += v[jrow]*x[9*idx[jrow]+7];
1521: sum9 += v[jrow]*x[9*idx[jrow]+8];
1522: jrow++;
1523: }
1524: y[9*i] = sum1;
1525: y[9*i+1] = sum2;
1526: y[9*i+2] = sum3;
1527: y[9*i+3] = sum4;
1528: y[9*i+4] = sum5;
1529: y[9*i+5] = sum6;
1530: y[9*i+6] = sum7;
1531: y[9*i+7] = sum8;
1532: y[9*i+8] = sum9;
1533: }
1535: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1536: VecRestoreArrayRead(xx,&x);
1537: VecRestoreArray(yy,&y);
1538: return(0);
1539: }
1541: /* ------------------------------------------------------------------------------*/
1545: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1546: {
1547: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1548: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1549: const PetscScalar *x,*v;
1550: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1551: PetscErrorCode ierr;
1552: const PetscInt m = b->AIJ->rmap->n,*idx;
1553: PetscInt n,i;
1556: VecSet(yy,0.0);
1557: VecGetArrayRead(xx,&x);
1558: VecGetArray(yy,&y);
1560: for (i=0; i<m; i++) {
1561: idx = a->j + a->i[i];
1562: v = a->a + a->i[i];
1563: n = a->i[i+1] - a->i[i];
1564: alpha1 = x[9*i];
1565: alpha2 = x[9*i+1];
1566: alpha3 = x[9*i+2];
1567: alpha4 = x[9*i+3];
1568: alpha5 = x[9*i+4];
1569: alpha6 = x[9*i+5];
1570: alpha7 = x[9*i+6];
1571: alpha8 = x[9*i+7];
1572: alpha9 = x[9*i+8];
1573: while (n-->0) {
1574: y[9*(*idx)] += alpha1*(*v);
1575: y[9*(*idx)+1] += alpha2*(*v);
1576: y[9*(*idx)+2] += alpha3*(*v);
1577: y[9*(*idx)+3] += alpha4*(*v);
1578: y[9*(*idx)+4] += alpha5*(*v);
1579: y[9*(*idx)+5] += alpha6*(*v);
1580: y[9*(*idx)+6] += alpha7*(*v);
1581: y[9*(*idx)+7] += alpha8*(*v);
1582: y[9*(*idx)+8] += alpha9*(*v);
1583: idx++; v++;
1584: }
1585: }
1586: PetscLogFlops(18.0*a->nz);
1587: VecRestoreArrayRead(xx,&x);
1588: VecRestoreArray(yy,&y);
1589: return(0);
1590: }
1594: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1595: {
1596: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1597: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1598: const PetscScalar *x,*v;
1599: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1600: PetscErrorCode ierr;
1601: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1602: PetscInt n,i,jrow,j;
1605: if (yy != zz) {VecCopy(yy,zz);}
1606: VecGetArrayRead(xx,&x);
1607: VecGetArray(zz,&y);
1608: idx = a->j;
1609: v = a->a;
1610: ii = a->i;
1612: for (i=0; i<m; i++) {
1613: jrow = ii[i];
1614: n = ii[i+1] - jrow;
1615: sum1 = 0.0;
1616: sum2 = 0.0;
1617: sum3 = 0.0;
1618: sum4 = 0.0;
1619: sum5 = 0.0;
1620: sum6 = 0.0;
1621: sum7 = 0.0;
1622: sum8 = 0.0;
1623: sum9 = 0.0;
1624: for (j=0; j<n; j++) {
1625: sum1 += v[jrow]*x[9*idx[jrow]];
1626: sum2 += v[jrow]*x[9*idx[jrow]+1];
1627: sum3 += v[jrow]*x[9*idx[jrow]+2];
1628: sum4 += v[jrow]*x[9*idx[jrow]+3];
1629: sum5 += v[jrow]*x[9*idx[jrow]+4];
1630: sum6 += v[jrow]*x[9*idx[jrow]+5];
1631: sum7 += v[jrow]*x[9*idx[jrow]+6];
1632: sum8 += v[jrow]*x[9*idx[jrow]+7];
1633: sum9 += v[jrow]*x[9*idx[jrow]+8];
1634: jrow++;
1635: }
1636: y[9*i] += sum1;
1637: y[9*i+1] += sum2;
1638: y[9*i+2] += sum3;
1639: y[9*i+3] += sum4;
1640: y[9*i+4] += sum5;
1641: y[9*i+5] += sum6;
1642: y[9*i+6] += sum7;
1643: y[9*i+7] += sum8;
1644: y[9*i+8] += sum9;
1645: }
1647: PetscLogFlops(18*a->nz);
1648: VecRestoreArrayRead(xx,&x);
1649: VecRestoreArray(zz,&y);
1650: return(0);
1651: }
1655: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1656: {
1657: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1658: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1659: const PetscScalar *x,*v;
1660: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1661: PetscErrorCode ierr;
1662: const PetscInt m = b->AIJ->rmap->n,*idx;
1663: PetscInt n,i;
1666: if (yy != zz) {VecCopy(yy,zz);}
1667: VecGetArrayRead(xx,&x);
1668: VecGetArray(zz,&y);
1669: for (i=0; i<m; i++) {
1670: idx = a->j + a->i[i];
1671: v = a->a + a->i[i];
1672: n = a->i[i+1] - a->i[i];
1673: alpha1 = x[9*i];
1674: alpha2 = x[9*i+1];
1675: alpha3 = x[9*i+2];
1676: alpha4 = x[9*i+3];
1677: alpha5 = x[9*i+4];
1678: alpha6 = x[9*i+5];
1679: alpha7 = x[9*i+6];
1680: alpha8 = x[9*i+7];
1681: alpha9 = x[9*i+8];
1682: while (n-->0) {
1683: y[9*(*idx)] += alpha1*(*v);
1684: y[9*(*idx)+1] += alpha2*(*v);
1685: y[9*(*idx)+2] += alpha3*(*v);
1686: y[9*(*idx)+3] += alpha4*(*v);
1687: y[9*(*idx)+4] += alpha5*(*v);
1688: y[9*(*idx)+5] += alpha6*(*v);
1689: y[9*(*idx)+6] += alpha7*(*v);
1690: y[9*(*idx)+7] += alpha8*(*v);
1691: y[9*(*idx)+8] += alpha9*(*v);
1692: idx++; v++;
1693: }
1694: }
1695: PetscLogFlops(18.0*a->nz);
1696: VecRestoreArrayRead(xx,&x);
1697: VecRestoreArray(zz,&y);
1698: return(0);
1699: }
1702: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1703: {
1704: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1705: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1706: const PetscScalar *x,*v;
1707: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1708: PetscErrorCode ierr;
1709: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1710: PetscInt nonzerorow=0,n,i,jrow,j;
1713: VecGetArrayRead(xx,&x);
1714: VecGetArray(yy,&y);
1715: idx = a->j;
1716: v = a->a;
1717: ii = a->i;
1719: for (i=0; i<m; i++) {
1720: jrow = ii[i];
1721: n = ii[i+1] - jrow;
1722: sum1 = 0.0;
1723: sum2 = 0.0;
1724: sum3 = 0.0;
1725: sum4 = 0.0;
1726: sum5 = 0.0;
1727: sum6 = 0.0;
1728: sum7 = 0.0;
1729: sum8 = 0.0;
1730: sum9 = 0.0;
1731: sum10 = 0.0;
1733: nonzerorow += (n>0);
1734: for (j=0; j<n; j++) {
1735: sum1 += v[jrow]*x[10*idx[jrow]];
1736: sum2 += v[jrow]*x[10*idx[jrow]+1];
1737: sum3 += v[jrow]*x[10*idx[jrow]+2];
1738: sum4 += v[jrow]*x[10*idx[jrow]+3];
1739: sum5 += v[jrow]*x[10*idx[jrow]+4];
1740: sum6 += v[jrow]*x[10*idx[jrow]+5];
1741: sum7 += v[jrow]*x[10*idx[jrow]+6];
1742: sum8 += v[jrow]*x[10*idx[jrow]+7];
1743: sum9 += v[jrow]*x[10*idx[jrow]+8];
1744: sum10 += v[jrow]*x[10*idx[jrow]+9];
1745: jrow++;
1746: }
1747: y[10*i] = sum1;
1748: y[10*i+1] = sum2;
1749: y[10*i+2] = sum3;
1750: y[10*i+3] = sum4;
1751: y[10*i+4] = sum5;
1752: y[10*i+5] = sum6;
1753: y[10*i+6] = sum7;
1754: y[10*i+7] = sum8;
1755: y[10*i+8] = sum9;
1756: y[10*i+9] = sum10;
1757: }
1759: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1760: VecRestoreArrayRead(xx,&x);
1761: VecRestoreArray(yy,&y);
1762: return(0);
1763: }
1767: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1768: {
1769: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1770: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1771: const PetscScalar *x,*v;
1772: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1773: PetscErrorCode ierr;
1774: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1775: PetscInt n,i,jrow,j;
1778: if (yy != zz) {VecCopy(yy,zz);}
1779: VecGetArrayRead(xx,&x);
1780: VecGetArray(zz,&y);
1781: idx = a->j;
1782: v = a->a;
1783: ii = a->i;
1785: for (i=0; i<m; i++) {
1786: jrow = ii[i];
1787: n = ii[i+1] - jrow;
1788: sum1 = 0.0;
1789: sum2 = 0.0;
1790: sum3 = 0.0;
1791: sum4 = 0.0;
1792: sum5 = 0.0;
1793: sum6 = 0.0;
1794: sum7 = 0.0;
1795: sum8 = 0.0;
1796: sum9 = 0.0;
1797: sum10 = 0.0;
1798: for (j=0; j<n; j++) {
1799: sum1 += v[jrow]*x[10*idx[jrow]];
1800: sum2 += v[jrow]*x[10*idx[jrow]+1];
1801: sum3 += v[jrow]*x[10*idx[jrow]+2];
1802: sum4 += v[jrow]*x[10*idx[jrow]+3];
1803: sum5 += v[jrow]*x[10*idx[jrow]+4];
1804: sum6 += v[jrow]*x[10*idx[jrow]+5];
1805: sum7 += v[jrow]*x[10*idx[jrow]+6];
1806: sum8 += v[jrow]*x[10*idx[jrow]+7];
1807: sum9 += v[jrow]*x[10*idx[jrow]+8];
1808: sum10 += v[jrow]*x[10*idx[jrow]+9];
1809: jrow++;
1810: }
1811: y[10*i] += sum1;
1812: y[10*i+1] += sum2;
1813: y[10*i+2] += sum3;
1814: y[10*i+3] += sum4;
1815: y[10*i+4] += sum5;
1816: y[10*i+5] += sum6;
1817: y[10*i+6] += sum7;
1818: y[10*i+7] += sum8;
1819: y[10*i+8] += sum9;
1820: y[10*i+9] += sum10;
1821: }
1823: PetscLogFlops(20.0*a->nz);
1824: VecRestoreArrayRead(xx,&x);
1825: VecRestoreArray(yy,&y);
1826: return(0);
1827: }
1831: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1832: {
1833: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1834: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1835: const PetscScalar *x,*v;
1836: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1837: PetscErrorCode ierr;
1838: const PetscInt m = b->AIJ->rmap->n,*idx;
1839: PetscInt n,i;
1842: VecSet(yy,0.0);
1843: VecGetArrayRead(xx,&x);
1844: VecGetArray(yy,&y);
1846: for (i=0; i<m; i++) {
1847: idx = a->j + a->i[i];
1848: v = a->a + a->i[i];
1849: n = a->i[i+1] - a->i[i];
1850: alpha1 = x[10*i];
1851: alpha2 = x[10*i+1];
1852: alpha3 = x[10*i+2];
1853: alpha4 = x[10*i+3];
1854: alpha5 = x[10*i+4];
1855: alpha6 = x[10*i+5];
1856: alpha7 = x[10*i+6];
1857: alpha8 = x[10*i+7];
1858: alpha9 = x[10*i+8];
1859: alpha10 = x[10*i+9];
1860: while (n-->0) {
1861: y[10*(*idx)] += alpha1*(*v);
1862: y[10*(*idx)+1] += alpha2*(*v);
1863: y[10*(*idx)+2] += alpha3*(*v);
1864: y[10*(*idx)+3] += alpha4*(*v);
1865: y[10*(*idx)+4] += alpha5*(*v);
1866: y[10*(*idx)+5] += alpha6*(*v);
1867: y[10*(*idx)+6] += alpha7*(*v);
1868: y[10*(*idx)+7] += alpha8*(*v);
1869: y[10*(*idx)+8] += alpha9*(*v);
1870: y[10*(*idx)+9] += alpha10*(*v);
1871: idx++; v++;
1872: }
1873: }
1874: PetscLogFlops(20.0*a->nz);
1875: VecRestoreArrayRead(xx,&x);
1876: VecRestoreArray(yy,&y);
1877: return(0);
1878: }
1882: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1883: {
1884: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1885: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1886: const PetscScalar *x,*v;
1887: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1888: PetscErrorCode ierr;
1889: const PetscInt m = b->AIJ->rmap->n,*idx;
1890: PetscInt n,i;
1893: if (yy != zz) {VecCopy(yy,zz);}
1894: VecGetArrayRead(xx,&x);
1895: VecGetArray(zz,&y);
1896: for (i=0; i<m; i++) {
1897: idx = a->j + a->i[i];
1898: v = a->a + a->i[i];
1899: n = a->i[i+1] - a->i[i];
1900: alpha1 = x[10*i];
1901: alpha2 = x[10*i+1];
1902: alpha3 = x[10*i+2];
1903: alpha4 = x[10*i+3];
1904: alpha5 = x[10*i+4];
1905: alpha6 = x[10*i+5];
1906: alpha7 = x[10*i+6];
1907: alpha8 = x[10*i+7];
1908: alpha9 = x[10*i+8];
1909: alpha10 = x[10*i+9];
1910: while (n-->0) {
1911: y[10*(*idx)] += alpha1*(*v);
1912: y[10*(*idx)+1] += alpha2*(*v);
1913: y[10*(*idx)+2] += alpha3*(*v);
1914: y[10*(*idx)+3] += alpha4*(*v);
1915: y[10*(*idx)+4] += alpha5*(*v);
1916: y[10*(*idx)+5] += alpha6*(*v);
1917: y[10*(*idx)+6] += alpha7*(*v);
1918: y[10*(*idx)+7] += alpha8*(*v);
1919: y[10*(*idx)+8] += alpha9*(*v);
1920: y[10*(*idx)+9] += alpha10*(*v);
1921: idx++; v++;
1922: }
1923: }
1924: PetscLogFlops(20.0*a->nz);
1925: VecRestoreArrayRead(xx,&x);
1926: VecRestoreArray(zz,&y);
1927: return(0);
1928: }
1931: /*--------------------------------------------------------------------------------------------*/
1934: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1935: {
1936: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1937: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1938: const PetscScalar *x,*v;
1939: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1940: PetscErrorCode ierr;
1941: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1942: PetscInt nonzerorow=0,n,i,jrow,j;
1945: VecGetArrayRead(xx,&x);
1946: VecGetArray(yy,&y);
1947: idx = a->j;
1948: v = a->a;
1949: ii = a->i;
1951: for (i=0; i<m; i++) {
1952: jrow = ii[i];
1953: n = ii[i+1] - jrow;
1954: sum1 = 0.0;
1955: sum2 = 0.0;
1956: sum3 = 0.0;
1957: sum4 = 0.0;
1958: sum5 = 0.0;
1959: sum6 = 0.0;
1960: sum7 = 0.0;
1961: sum8 = 0.0;
1962: sum9 = 0.0;
1963: sum10 = 0.0;
1964: sum11 = 0.0;
1966: nonzerorow += (n>0);
1967: for (j=0; j<n; j++) {
1968: sum1 += v[jrow]*x[11*idx[jrow]];
1969: sum2 += v[jrow]*x[11*idx[jrow]+1];
1970: sum3 += v[jrow]*x[11*idx[jrow]+2];
1971: sum4 += v[jrow]*x[11*idx[jrow]+3];
1972: sum5 += v[jrow]*x[11*idx[jrow]+4];
1973: sum6 += v[jrow]*x[11*idx[jrow]+5];
1974: sum7 += v[jrow]*x[11*idx[jrow]+6];
1975: sum8 += v[jrow]*x[11*idx[jrow]+7];
1976: sum9 += v[jrow]*x[11*idx[jrow]+8];
1977: sum10 += v[jrow]*x[11*idx[jrow]+9];
1978: sum11 += v[jrow]*x[11*idx[jrow]+10];
1979: jrow++;
1980: }
1981: y[11*i] = sum1;
1982: y[11*i+1] = sum2;
1983: y[11*i+2] = sum3;
1984: y[11*i+3] = sum4;
1985: y[11*i+4] = sum5;
1986: y[11*i+5] = sum6;
1987: y[11*i+6] = sum7;
1988: y[11*i+7] = sum8;
1989: y[11*i+8] = sum9;
1990: y[11*i+9] = sum10;
1991: y[11*i+10] = sum11;
1992: }
1994: PetscLogFlops(22*a->nz - 11*nonzerorow);
1995: VecRestoreArrayRead(xx,&x);
1996: VecRestoreArray(yy,&y);
1997: return(0);
1998: }
2002: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2003: {
2004: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2005: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2006: const PetscScalar *x,*v;
2007: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2008: PetscErrorCode ierr;
2009: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2010: PetscInt n,i,jrow,j;
2013: if (yy != zz) {VecCopy(yy,zz);}
2014: VecGetArrayRead(xx,&x);
2015: VecGetArray(zz,&y);
2016: idx = a->j;
2017: v = a->a;
2018: ii = a->i;
2020: for (i=0; i<m; i++) {
2021: jrow = ii[i];
2022: n = ii[i+1] - jrow;
2023: sum1 = 0.0;
2024: sum2 = 0.0;
2025: sum3 = 0.0;
2026: sum4 = 0.0;
2027: sum5 = 0.0;
2028: sum6 = 0.0;
2029: sum7 = 0.0;
2030: sum8 = 0.0;
2031: sum9 = 0.0;
2032: sum10 = 0.0;
2033: sum11 = 0.0;
2034: for (j=0; j<n; j++) {
2035: sum1 += v[jrow]*x[11*idx[jrow]];
2036: sum2 += v[jrow]*x[11*idx[jrow]+1];
2037: sum3 += v[jrow]*x[11*idx[jrow]+2];
2038: sum4 += v[jrow]*x[11*idx[jrow]+3];
2039: sum5 += v[jrow]*x[11*idx[jrow]+4];
2040: sum6 += v[jrow]*x[11*idx[jrow]+5];
2041: sum7 += v[jrow]*x[11*idx[jrow]+6];
2042: sum8 += v[jrow]*x[11*idx[jrow]+7];
2043: sum9 += v[jrow]*x[11*idx[jrow]+8];
2044: sum10 += v[jrow]*x[11*idx[jrow]+9];
2045: sum11 += v[jrow]*x[11*idx[jrow]+10];
2046: jrow++;
2047: }
2048: y[11*i] += sum1;
2049: y[11*i+1] += sum2;
2050: y[11*i+2] += sum3;
2051: y[11*i+3] += sum4;
2052: y[11*i+4] += sum5;
2053: y[11*i+5] += sum6;
2054: y[11*i+6] += sum7;
2055: y[11*i+7] += sum8;
2056: y[11*i+8] += sum9;
2057: y[11*i+9] += sum10;
2058: y[11*i+10] += sum11;
2059: }
2061: PetscLogFlops(22*a->nz);
2062: VecRestoreArrayRead(xx,&x);
2063: VecRestoreArray(yy,&y);
2064: return(0);
2065: }
2069: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2070: {
2071: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2072: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2073: const PetscScalar *x,*v;
2074: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2075: PetscErrorCode ierr;
2076: const PetscInt m = b->AIJ->rmap->n,*idx;
2077: PetscInt n,i;
2080: VecSet(yy,0.0);
2081: VecGetArrayRead(xx,&x);
2082: VecGetArray(yy,&y);
2084: for (i=0; i<m; i++) {
2085: idx = a->j + a->i[i];
2086: v = a->a + a->i[i];
2087: n = a->i[i+1] - a->i[i];
2088: alpha1 = x[11*i];
2089: alpha2 = x[11*i+1];
2090: alpha3 = x[11*i+2];
2091: alpha4 = x[11*i+3];
2092: alpha5 = x[11*i+4];
2093: alpha6 = x[11*i+5];
2094: alpha7 = x[11*i+6];
2095: alpha8 = x[11*i+7];
2096: alpha9 = x[11*i+8];
2097: alpha10 = x[11*i+9];
2098: alpha11 = x[11*i+10];
2099: while (n-->0) {
2100: y[11*(*idx)] += alpha1*(*v);
2101: y[11*(*idx)+1] += alpha2*(*v);
2102: y[11*(*idx)+2] += alpha3*(*v);
2103: y[11*(*idx)+3] += alpha4*(*v);
2104: y[11*(*idx)+4] += alpha5*(*v);
2105: y[11*(*idx)+5] += alpha6*(*v);
2106: y[11*(*idx)+6] += alpha7*(*v);
2107: y[11*(*idx)+7] += alpha8*(*v);
2108: y[11*(*idx)+8] += alpha9*(*v);
2109: y[11*(*idx)+9] += alpha10*(*v);
2110: y[11*(*idx)+10] += alpha11*(*v);
2111: idx++; v++;
2112: }
2113: }
2114: PetscLogFlops(22*a->nz);
2115: VecRestoreArrayRead(xx,&x);
2116: VecRestoreArray(yy,&y);
2117: return(0);
2118: }
2122: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2123: {
2124: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2125: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2126: const PetscScalar *x,*v;
2127: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2128: PetscErrorCode ierr;
2129: const PetscInt m = b->AIJ->rmap->n,*idx;
2130: PetscInt n,i;
2133: if (yy != zz) {VecCopy(yy,zz);}
2134: VecGetArrayRead(xx,&x);
2135: VecGetArray(zz,&y);
2136: for (i=0; i<m; i++) {
2137: idx = a->j + a->i[i];
2138: v = a->a + a->i[i];
2139: n = a->i[i+1] - a->i[i];
2140: alpha1 = x[11*i];
2141: alpha2 = x[11*i+1];
2142: alpha3 = x[11*i+2];
2143: alpha4 = x[11*i+3];
2144: alpha5 = x[11*i+4];
2145: alpha6 = x[11*i+5];
2146: alpha7 = x[11*i+6];
2147: alpha8 = x[11*i+7];
2148: alpha9 = x[11*i+8];
2149: alpha10 = x[11*i+9];
2150: alpha11 = x[11*i+10];
2151: while (n-->0) {
2152: y[11*(*idx)] += alpha1*(*v);
2153: y[11*(*idx)+1] += alpha2*(*v);
2154: y[11*(*idx)+2] += alpha3*(*v);
2155: y[11*(*idx)+3] += alpha4*(*v);
2156: y[11*(*idx)+4] += alpha5*(*v);
2157: y[11*(*idx)+5] += alpha6*(*v);
2158: y[11*(*idx)+6] += alpha7*(*v);
2159: y[11*(*idx)+7] += alpha8*(*v);
2160: y[11*(*idx)+8] += alpha9*(*v);
2161: y[11*(*idx)+9] += alpha10*(*v);
2162: y[11*(*idx)+10] += alpha11*(*v);
2163: idx++; v++;
2164: }
2165: }
2166: PetscLogFlops(22*a->nz);
2167: VecRestoreArrayRead(xx,&x);
2168: VecRestoreArray(zz,&y);
2169: return(0);
2170: }
2173: /*--------------------------------------------------------------------------------------------*/
2176: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2177: {
2178: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2179: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2180: const PetscScalar *x,*v;
2181: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2182: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2183: PetscErrorCode ierr;
2184: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2185: PetscInt nonzerorow=0,n,i,jrow,j;
2188: VecGetArrayRead(xx,&x);
2189: VecGetArray(yy,&y);
2190: idx = a->j;
2191: v = a->a;
2192: ii = a->i;
2194: for (i=0; i<m; i++) {
2195: jrow = ii[i];
2196: n = ii[i+1] - jrow;
2197: sum1 = 0.0;
2198: sum2 = 0.0;
2199: sum3 = 0.0;
2200: sum4 = 0.0;
2201: sum5 = 0.0;
2202: sum6 = 0.0;
2203: sum7 = 0.0;
2204: sum8 = 0.0;
2205: sum9 = 0.0;
2206: sum10 = 0.0;
2207: sum11 = 0.0;
2208: sum12 = 0.0;
2209: sum13 = 0.0;
2210: sum14 = 0.0;
2211: sum15 = 0.0;
2212: sum16 = 0.0;
2214: nonzerorow += (n>0);
2215: for (j=0; j<n; j++) {
2216: sum1 += v[jrow]*x[16*idx[jrow]];
2217: sum2 += v[jrow]*x[16*idx[jrow]+1];
2218: sum3 += v[jrow]*x[16*idx[jrow]+2];
2219: sum4 += v[jrow]*x[16*idx[jrow]+3];
2220: sum5 += v[jrow]*x[16*idx[jrow]+4];
2221: sum6 += v[jrow]*x[16*idx[jrow]+5];
2222: sum7 += v[jrow]*x[16*idx[jrow]+6];
2223: sum8 += v[jrow]*x[16*idx[jrow]+7];
2224: sum9 += v[jrow]*x[16*idx[jrow]+8];
2225: sum10 += v[jrow]*x[16*idx[jrow]+9];
2226: sum11 += v[jrow]*x[16*idx[jrow]+10];
2227: sum12 += v[jrow]*x[16*idx[jrow]+11];
2228: sum13 += v[jrow]*x[16*idx[jrow]+12];
2229: sum14 += v[jrow]*x[16*idx[jrow]+13];
2230: sum15 += v[jrow]*x[16*idx[jrow]+14];
2231: sum16 += v[jrow]*x[16*idx[jrow]+15];
2232: jrow++;
2233: }
2234: y[16*i] = sum1;
2235: y[16*i+1] = sum2;
2236: y[16*i+2] = sum3;
2237: y[16*i+3] = sum4;
2238: y[16*i+4] = sum5;
2239: y[16*i+5] = sum6;
2240: y[16*i+6] = sum7;
2241: y[16*i+7] = sum8;
2242: y[16*i+8] = sum9;
2243: y[16*i+9] = sum10;
2244: y[16*i+10] = sum11;
2245: y[16*i+11] = sum12;
2246: y[16*i+12] = sum13;
2247: y[16*i+13] = sum14;
2248: y[16*i+14] = sum15;
2249: y[16*i+15] = sum16;
2250: }
2252: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2253: VecRestoreArrayRead(xx,&x);
2254: VecRestoreArray(yy,&y);
2255: return(0);
2256: }
2260: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2261: {
2262: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2263: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2264: const PetscScalar *x,*v;
2265: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2266: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2267: PetscErrorCode ierr;
2268: const PetscInt m = b->AIJ->rmap->n,*idx;
2269: PetscInt n,i;
2272: VecSet(yy,0.0);
2273: VecGetArrayRead(xx,&x);
2274: VecGetArray(yy,&y);
2276: for (i=0; i<m; i++) {
2277: idx = a->j + a->i[i];
2278: v = a->a + a->i[i];
2279: n = a->i[i+1] - a->i[i];
2280: alpha1 = x[16*i];
2281: alpha2 = x[16*i+1];
2282: alpha3 = x[16*i+2];
2283: alpha4 = x[16*i+3];
2284: alpha5 = x[16*i+4];
2285: alpha6 = x[16*i+5];
2286: alpha7 = x[16*i+6];
2287: alpha8 = x[16*i+7];
2288: alpha9 = x[16*i+8];
2289: alpha10 = x[16*i+9];
2290: alpha11 = x[16*i+10];
2291: alpha12 = x[16*i+11];
2292: alpha13 = x[16*i+12];
2293: alpha14 = x[16*i+13];
2294: alpha15 = x[16*i+14];
2295: alpha16 = x[16*i+15];
2296: while (n-->0) {
2297: y[16*(*idx)] += alpha1*(*v);
2298: y[16*(*idx)+1] += alpha2*(*v);
2299: y[16*(*idx)+2] += alpha3*(*v);
2300: y[16*(*idx)+3] += alpha4*(*v);
2301: y[16*(*idx)+4] += alpha5*(*v);
2302: y[16*(*idx)+5] += alpha6*(*v);
2303: y[16*(*idx)+6] += alpha7*(*v);
2304: y[16*(*idx)+7] += alpha8*(*v);
2305: y[16*(*idx)+8] += alpha9*(*v);
2306: y[16*(*idx)+9] += alpha10*(*v);
2307: y[16*(*idx)+10] += alpha11*(*v);
2308: y[16*(*idx)+11] += alpha12*(*v);
2309: y[16*(*idx)+12] += alpha13*(*v);
2310: y[16*(*idx)+13] += alpha14*(*v);
2311: y[16*(*idx)+14] += alpha15*(*v);
2312: y[16*(*idx)+15] += alpha16*(*v);
2313: idx++; v++;
2314: }
2315: }
2316: PetscLogFlops(32.0*a->nz);
2317: VecRestoreArrayRead(xx,&x);
2318: VecRestoreArray(yy,&y);
2319: return(0);
2320: }
2324: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2325: {
2326: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2327: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2328: const PetscScalar *x,*v;
2329: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2330: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2331: PetscErrorCode ierr;
2332: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2333: PetscInt n,i,jrow,j;
2336: if (yy != zz) {VecCopy(yy,zz);}
2337: VecGetArrayRead(xx,&x);
2338: VecGetArray(zz,&y);
2339: idx = a->j;
2340: v = a->a;
2341: ii = a->i;
2343: for (i=0; i<m; i++) {
2344: jrow = ii[i];
2345: n = ii[i+1] - jrow;
2346: sum1 = 0.0;
2347: sum2 = 0.0;
2348: sum3 = 0.0;
2349: sum4 = 0.0;
2350: sum5 = 0.0;
2351: sum6 = 0.0;
2352: sum7 = 0.0;
2353: sum8 = 0.0;
2354: sum9 = 0.0;
2355: sum10 = 0.0;
2356: sum11 = 0.0;
2357: sum12 = 0.0;
2358: sum13 = 0.0;
2359: sum14 = 0.0;
2360: sum15 = 0.0;
2361: sum16 = 0.0;
2362: for (j=0; j<n; j++) {
2363: sum1 += v[jrow]*x[16*idx[jrow]];
2364: sum2 += v[jrow]*x[16*idx[jrow]+1];
2365: sum3 += v[jrow]*x[16*idx[jrow]+2];
2366: sum4 += v[jrow]*x[16*idx[jrow]+3];
2367: sum5 += v[jrow]*x[16*idx[jrow]+4];
2368: sum6 += v[jrow]*x[16*idx[jrow]+5];
2369: sum7 += v[jrow]*x[16*idx[jrow]+6];
2370: sum8 += v[jrow]*x[16*idx[jrow]+7];
2371: sum9 += v[jrow]*x[16*idx[jrow]+8];
2372: sum10 += v[jrow]*x[16*idx[jrow]+9];
2373: sum11 += v[jrow]*x[16*idx[jrow]+10];
2374: sum12 += v[jrow]*x[16*idx[jrow]+11];
2375: sum13 += v[jrow]*x[16*idx[jrow]+12];
2376: sum14 += v[jrow]*x[16*idx[jrow]+13];
2377: sum15 += v[jrow]*x[16*idx[jrow]+14];
2378: sum16 += v[jrow]*x[16*idx[jrow]+15];
2379: jrow++;
2380: }
2381: y[16*i] += sum1;
2382: y[16*i+1] += sum2;
2383: y[16*i+2] += sum3;
2384: y[16*i+3] += sum4;
2385: y[16*i+4] += sum5;
2386: y[16*i+5] += sum6;
2387: y[16*i+6] += sum7;
2388: y[16*i+7] += sum8;
2389: y[16*i+8] += sum9;
2390: y[16*i+9] += sum10;
2391: y[16*i+10] += sum11;
2392: y[16*i+11] += sum12;
2393: y[16*i+12] += sum13;
2394: y[16*i+13] += sum14;
2395: y[16*i+14] += sum15;
2396: y[16*i+15] += sum16;
2397: }
2399: PetscLogFlops(32.0*a->nz);
2400: VecRestoreArrayRead(xx,&x);
2401: VecRestoreArray(zz,&y);
2402: return(0);
2403: }
2407: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2408: {
2409: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2410: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2411: const PetscScalar *x,*v;
2412: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2413: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2414: PetscErrorCode ierr;
2415: const PetscInt m = b->AIJ->rmap->n,*idx;
2416: PetscInt n,i;
2419: if (yy != zz) {VecCopy(yy,zz);}
2420: VecGetArrayRead(xx,&x);
2421: VecGetArray(zz,&y);
2422: for (i=0; i<m; i++) {
2423: idx = a->j + a->i[i];
2424: v = a->a + a->i[i];
2425: n = a->i[i+1] - a->i[i];
2426: alpha1 = x[16*i];
2427: alpha2 = x[16*i+1];
2428: alpha3 = x[16*i+2];
2429: alpha4 = x[16*i+3];
2430: alpha5 = x[16*i+4];
2431: alpha6 = x[16*i+5];
2432: alpha7 = x[16*i+6];
2433: alpha8 = x[16*i+7];
2434: alpha9 = x[16*i+8];
2435: alpha10 = x[16*i+9];
2436: alpha11 = x[16*i+10];
2437: alpha12 = x[16*i+11];
2438: alpha13 = x[16*i+12];
2439: alpha14 = x[16*i+13];
2440: alpha15 = x[16*i+14];
2441: alpha16 = x[16*i+15];
2442: while (n-->0) {
2443: y[16*(*idx)] += alpha1*(*v);
2444: y[16*(*idx)+1] += alpha2*(*v);
2445: y[16*(*idx)+2] += alpha3*(*v);
2446: y[16*(*idx)+3] += alpha4*(*v);
2447: y[16*(*idx)+4] += alpha5*(*v);
2448: y[16*(*idx)+5] += alpha6*(*v);
2449: y[16*(*idx)+6] += alpha7*(*v);
2450: y[16*(*idx)+7] += alpha8*(*v);
2451: y[16*(*idx)+8] += alpha9*(*v);
2452: y[16*(*idx)+9] += alpha10*(*v);
2453: y[16*(*idx)+10] += alpha11*(*v);
2454: y[16*(*idx)+11] += alpha12*(*v);
2455: y[16*(*idx)+12] += alpha13*(*v);
2456: y[16*(*idx)+13] += alpha14*(*v);
2457: y[16*(*idx)+14] += alpha15*(*v);
2458: y[16*(*idx)+15] += alpha16*(*v);
2459: idx++; v++;
2460: }
2461: }
2462: PetscLogFlops(32.0*a->nz);
2463: VecRestoreArrayRead(xx,&x);
2464: VecRestoreArray(zz,&y);
2465: return(0);
2466: }
2468: /*--------------------------------------------------------------------------------------------*/
2471: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2472: {
2473: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2474: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2475: const PetscScalar *x,*v;
2476: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2477: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2478: PetscErrorCode ierr;
2479: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2480: PetscInt nonzerorow=0,n,i,jrow,j;
2483: VecGetArrayRead(xx,&x);
2484: VecGetArray(yy,&y);
2485: idx = a->j;
2486: v = a->a;
2487: ii = a->i;
2489: for (i=0; i<m; i++) {
2490: jrow = ii[i];
2491: n = ii[i+1] - jrow;
2492: sum1 = 0.0;
2493: sum2 = 0.0;
2494: sum3 = 0.0;
2495: sum4 = 0.0;
2496: sum5 = 0.0;
2497: sum6 = 0.0;
2498: sum7 = 0.0;
2499: sum8 = 0.0;
2500: sum9 = 0.0;
2501: sum10 = 0.0;
2502: sum11 = 0.0;
2503: sum12 = 0.0;
2504: sum13 = 0.0;
2505: sum14 = 0.0;
2506: sum15 = 0.0;
2507: sum16 = 0.0;
2508: sum17 = 0.0;
2509: sum18 = 0.0;
2511: nonzerorow += (n>0);
2512: for (j=0; j<n; j++) {
2513: sum1 += v[jrow]*x[18*idx[jrow]];
2514: sum2 += v[jrow]*x[18*idx[jrow]+1];
2515: sum3 += v[jrow]*x[18*idx[jrow]+2];
2516: sum4 += v[jrow]*x[18*idx[jrow]+3];
2517: sum5 += v[jrow]*x[18*idx[jrow]+4];
2518: sum6 += v[jrow]*x[18*idx[jrow]+5];
2519: sum7 += v[jrow]*x[18*idx[jrow]+6];
2520: sum8 += v[jrow]*x[18*idx[jrow]+7];
2521: sum9 += v[jrow]*x[18*idx[jrow]+8];
2522: sum10 += v[jrow]*x[18*idx[jrow]+9];
2523: sum11 += v[jrow]*x[18*idx[jrow]+10];
2524: sum12 += v[jrow]*x[18*idx[jrow]+11];
2525: sum13 += v[jrow]*x[18*idx[jrow]+12];
2526: sum14 += v[jrow]*x[18*idx[jrow]+13];
2527: sum15 += v[jrow]*x[18*idx[jrow]+14];
2528: sum16 += v[jrow]*x[18*idx[jrow]+15];
2529: sum17 += v[jrow]*x[18*idx[jrow]+16];
2530: sum18 += v[jrow]*x[18*idx[jrow]+17];
2531: jrow++;
2532: }
2533: y[18*i] = sum1;
2534: y[18*i+1] = sum2;
2535: y[18*i+2] = sum3;
2536: y[18*i+3] = sum4;
2537: y[18*i+4] = sum5;
2538: y[18*i+5] = sum6;
2539: y[18*i+6] = sum7;
2540: y[18*i+7] = sum8;
2541: y[18*i+8] = sum9;
2542: y[18*i+9] = sum10;
2543: y[18*i+10] = sum11;
2544: y[18*i+11] = sum12;
2545: y[18*i+12] = sum13;
2546: y[18*i+13] = sum14;
2547: y[18*i+14] = sum15;
2548: y[18*i+15] = sum16;
2549: y[18*i+16] = sum17;
2550: y[18*i+17] = sum18;
2551: }
2553: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2554: VecRestoreArrayRead(xx,&x);
2555: VecRestoreArray(yy,&y);
2556: return(0);
2557: }
2561: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2562: {
2563: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2564: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2565: const PetscScalar *x,*v;
2566: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2567: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2568: PetscErrorCode ierr;
2569: const PetscInt m = b->AIJ->rmap->n,*idx;
2570: PetscInt n,i;
2573: VecSet(yy,0.0);
2574: VecGetArrayRead(xx,&x);
2575: VecGetArray(yy,&y);
2577: for (i=0; i<m; i++) {
2578: idx = a->j + a->i[i];
2579: v = a->a + a->i[i];
2580: n = a->i[i+1] - a->i[i];
2581: alpha1 = x[18*i];
2582: alpha2 = x[18*i+1];
2583: alpha3 = x[18*i+2];
2584: alpha4 = x[18*i+3];
2585: alpha5 = x[18*i+4];
2586: alpha6 = x[18*i+5];
2587: alpha7 = x[18*i+6];
2588: alpha8 = x[18*i+7];
2589: alpha9 = x[18*i+8];
2590: alpha10 = x[18*i+9];
2591: alpha11 = x[18*i+10];
2592: alpha12 = x[18*i+11];
2593: alpha13 = x[18*i+12];
2594: alpha14 = x[18*i+13];
2595: alpha15 = x[18*i+14];
2596: alpha16 = x[18*i+15];
2597: alpha17 = x[18*i+16];
2598: alpha18 = x[18*i+17];
2599: while (n-->0) {
2600: y[18*(*idx)] += alpha1*(*v);
2601: y[18*(*idx)+1] += alpha2*(*v);
2602: y[18*(*idx)+2] += alpha3*(*v);
2603: y[18*(*idx)+3] += alpha4*(*v);
2604: y[18*(*idx)+4] += alpha5*(*v);
2605: y[18*(*idx)+5] += alpha6*(*v);
2606: y[18*(*idx)+6] += alpha7*(*v);
2607: y[18*(*idx)+7] += alpha8*(*v);
2608: y[18*(*idx)+8] += alpha9*(*v);
2609: y[18*(*idx)+9] += alpha10*(*v);
2610: y[18*(*idx)+10] += alpha11*(*v);
2611: y[18*(*idx)+11] += alpha12*(*v);
2612: y[18*(*idx)+12] += alpha13*(*v);
2613: y[18*(*idx)+13] += alpha14*(*v);
2614: y[18*(*idx)+14] += alpha15*(*v);
2615: y[18*(*idx)+15] += alpha16*(*v);
2616: y[18*(*idx)+16] += alpha17*(*v);
2617: y[18*(*idx)+17] += alpha18*(*v);
2618: idx++; v++;
2619: }
2620: }
2621: PetscLogFlops(36.0*a->nz);
2622: VecRestoreArrayRead(xx,&x);
2623: VecRestoreArray(yy,&y);
2624: return(0);
2625: }
2629: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2630: {
2631: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2632: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2633: const PetscScalar *x,*v;
2634: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2635: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2636: PetscErrorCode ierr;
2637: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2638: PetscInt n,i,jrow,j;
2641: if (yy != zz) {VecCopy(yy,zz);}
2642: VecGetArrayRead(xx,&x);
2643: VecGetArray(zz,&y);
2644: idx = a->j;
2645: v = a->a;
2646: ii = a->i;
2648: for (i=0; i<m; i++) {
2649: jrow = ii[i];
2650: n = ii[i+1] - jrow;
2651: sum1 = 0.0;
2652: sum2 = 0.0;
2653: sum3 = 0.0;
2654: sum4 = 0.0;
2655: sum5 = 0.0;
2656: sum6 = 0.0;
2657: sum7 = 0.0;
2658: sum8 = 0.0;
2659: sum9 = 0.0;
2660: sum10 = 0.0;
2661: sum11 = 0.0;
2662: sum12 = 0.0;
2663: sum13 = 0.0;
2664: sum14 = 0.0;
2665: sum15 = 0.0;
2666: sum16 = 0.0;
2667: sum17 = 0.0;
2668: sum18 = 0.0;
2669: for (j=0; j<n; j++) {
2670: sum1 += v[jrow]*x[18*idx[jrow]];
2671: sum2 += v[jrow]*x[18*idx[jrow]+1];
2672: sum3 += v[jrow]*x[18*idx[jrow]+2];
2673: sum4 += v[jrow]*x[18*idx[jrow]+3];
2674: sum5 += v[jrow]*x[18*idx[jrow]+4];
2675: sum6 += v[jrow]*x[18*idx[jrow]+5];
2676: sum7 += v[jrow]*x[18*idx[jrow]+6];
2677: sum8 += v[jrow]*x[18*idx[jrow]+7];
2678: sum9 += v[jrow]*x[18*idx[jrow]+8];
2679: sum10 += v[jrow]*x[18*idx[jrow]+9];
2680: sum11 += v[jrow]*x[18*idx[jrow]+10];
2681: sum12 += v[jrow]*x[18*idx[jrow]+11];
2682: sum13 += v[jrow]*x[18*idx[jrow]+12];
2683: sum14 += v[jrow]*x[18*idx[jrow]+13];
2684: sum15 += v[jrow]*x[18*idx[jrow]+14];
2685: sum16 += v[jrow]*x[18*idx[jrow]+15];
2686: sum17 += v[jrow]*x[18*idx[jrow]+16];
2687: sum18 += v[jrow]*x[18*idx[jrow]+17];
2688: jrow++;
2689: }
2690: y[18*i] += sum1;
2691: y[18*i+1] += sum2;
2692: y[18*i+2] += sum3;
2693: y[18*i+3] += sum4;
2694: y[18*i+4] += sum5;
2695: y[18*i+5] += sum6;
2696: y[18*i+6] += sum7;
2697: y[18*i+7] += sum8;
2698: y[18*i+8] += sum9;
2699: y[18*i+9] += sum10;
2700: y[18*i+10] += sum11;
2701: y[18*i+11] += sum12;
2702: y[18*i+12] += sum13;
2703: y[18*i+13] += sum14;
2704: y[18*i+14] += sum15;
2705: y[18*i+15] += sum16;
2706: y[18*i+16] += sum17;
2707: y[18*i+17] += sum18;
2708: }
2710: PetscLogFlops(36.0*a->nz);
2711: VecRestoreArrayRead(xx,&x);
2712: VecRestoreArray(zz,&y);
2713: return(0);
2714: }
2718: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2719: {
2720: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2721: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2722: const PetscScalar *x,*v;
2723: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2724: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2725: PetscErrorCode ierr;
2726: const PetscInt m = b->AIJ->rmap->n,*idx;
2727: PetscInt n,i;
2730: if (yy != zz) {VecCopy(yy,zz);}
2731: VecGetArrayRead(xx,&x);
2732: VecGetArray(zz,&y);
2733: for (i=0; i<m; i++) {
2734: idx = a->j + a->i[i];
2735: v = a->a + a->i[i];
2736: n = a->i[i+1] - a->i[i];
2737: alpha1 = x[18*i];
2738: alpha2 = x[18*i+1];
2739: alpha3 = x[18*i+2];
2740: alpha4 = x[18*i+3];
2741: alpha5 = x[18*i+4];
2742: alpha6 = x[18*i+5];
2743: alpha7 = x[18*i+6];
2744: alpha8 = x[18*i+7];
2745: alpha9 = x[18*i+8];
2746: alpha10 = x[18*i+9];
2747: alpha11 = x[18*i+10];
2748: alpha12 = x[18*i+11];
2749: alpha13 = x[18*i+12];
2750: alpha14 = x[18*i+13];
2751: alpha15 = x[18*i+14];
2752: alpha16 = x[18*i+15];
2753: alpha17 = x[18*i+16];
2754: alpha18 = x[18*i+17];
2755: while (n-->0) {
2756: y[18*(*idx)] += alpha1*(*v);
2757: y[18*(*idx)+1] += alpha2*(*v);
2758: y[18*(*idx)+2] += alpha3*(*v);
2759: y[18*(*idx)+3] += alpha4*(*v);
2760: y[18*(*idx)+4] += alpha5*(*v);
2761: y[18*(*idx)+5] += alpha6*(*v);
2762: y[18*(*idx)+6] += alpha7*(*v);
2763: y[18*(*idx)+7] += alpha8*(*v);
2764: y[18*(*idx)+8] += alpha9*(*v);
2765: y[18*(*idx)+9] += alpha10*(*v);
2766: y[18*(*idx)+10] += alpha11*(*v);
2767: y[18*(*idx)+11] += alpha12*(*v);
2768: y[18*(*idx)+12] += alpha13*(*v);
2769: y[18*(*idx)+13] += alpha14*(*v);
2770: y[18*(*idx)+14] += alpha15*(*v);
2771: y[18*(*idx)+15] += alpha16*(*v);
2772: y[18*(*idx)+16] += alpha17*(*v);
2773: y[18*(*idx)+17] += alpha18*(*v);
2774: idx++; v++;
2775: }
2776: }
2777: PetscLogFlops(36.0*a->nz);
2778: VecRestoreArrayRead(xx,&x);
2779: VecRestoreArray(zz,&y);
2780: return(0);
2781: }
2785: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2786: {
2787: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2788: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2789: const PetscScalar *x,*v;
2790: PetscScalar *y,*sums;
2791: PetscErrorCode ierr;
2792: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2793: PetscInt n,i,jrow,j,dof = b->dof,k;
2796: VecGetArrayRead(xx,&x);
2797: VecSet(yy,0.0);
2798: VecGetArray(yy,&y);
2799: idx = a->j;
2800: v = a->a;
2801: ii = a->i;
2803: for (i=0; i<m; i++) {
2804: jrow = ii[i];
2805: n = ii[i+1] - jrow;
2806: sums = y + dof*i;
2807: for (j=0; j<n; j++) {
2808: for (k=0; k<dof; k++) {
2809: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2810: }
2811: jrow++;
2812: }
2813: }
2815: PetscLogFlops(2.0*dof*a->nz);
2816: VecRestoreArrayRead(xx,&x);
2817: VecRestoreArray(yy,&y);
2818: return(0);
2819: }
2823: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2824: {
2825: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2826: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2827: const PetscScalar *x,*v;
2828: PetscScalar *y,*sums;
2829: PetscErrorCode ierr;
2830: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2831: PetscInt n,i,jrow,j,dof = b->dof,k;
2834: if (yy != zz) {VecCopy(yy,zz);}
2835: VecGetArrayRead(xx,&x);
2836: VecGetArray(zz,&y);
2837: idx = a->j;
2838: v = a->a;
2839: ii = a->i;
2841: for (i=0; i<m; i++) {
2842: jrow = ii[i];
2843: n = ii[i+1] - jrow;
2844: sums = y + dof*i;
2845: for (j=0; j<n; j++) {
2846: for (k=0; k<dof; k++) {
2847: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2848: }
2849: jrow++;
2850: }
2851: }
2853: PetscLogFlops(2.0*dof*a->nz);
2854: VecRestoreArrayRead(xx,&x);
2855: VecRestoreArray(zz,&y);
2856: return(0);
2857: }
2861: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2862: {
2863: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2864: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2865: const PetscScalar *x,*v,*alpha;
2866: PetscScalar *y;
2867: PetscErrorCode ierr;
2868: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2869: PetscInt n,i,k;
2872: VecGetArrayRead(xx,&x);
2873: VecSet(yy,0.0);
2874: VecGetArray(yy,&y);
2875: for (i=0; i<m; i++) {
2876: idx = a->j + a->i[i];
2877: v = a->a + a->i[i];
2878: n = a->i[i+1] - a->i[i];
2879: alpha = x + dof*i;
2880: while (n-->0) {
2881: for (k=0; k<dof; k++) {
2882: y[dof*(*idx)+k] += alpha[k]*(*v);
2883: }
2884: idx++; v++;
2885: }
2886: }
2887: PetscLogFlops(2.0*dof*a->nz);
2888: VecRestoreArrayRead(xx,&x);
2889: VecRestoreArray(yy,&y);
2890: return(0);
2891: }
2895: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2896: {
2897: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2898: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2899: const PetscScalar *x,*v,*alpha;
2900: PetscScalar *y;
2901: PetscErrorCode ierr;
2902: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2903: PetscInt n,i,k;
2906: if (yy != zz) {VecCopy(yy,zz);}
2907: VecGetArrayRead(xx,&x);
2908: VecGetArray(zz,&y);
2909: for (i=0; i<m; i++) {
2910: idx = a->j + a->i[i];
2911: v = a->a + a->i[i];
2912: n = a->i[i+1] - a->i[i];
2913: alpha = x + dof*i;
2914: while (n-->0) {
2915: for (k=0; k<dof; k++) {
2916: y[dof*(*idx)+k] += alpha[k]*(*v);
2917: }
2918: idx++; v++;
2919: }
2920: }
2921: PetscLogFlops(2.0*dof*a->nz);
2922: VecRestoreArrayRead(xx,&x);
2923: VecRestoreArray(zz,&y);
2924: return(0);
2925: }
2927: /*===================================================================================*/
2930: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2931: {
2932: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2936: /* start the scatter */
2937: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2938: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2939: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2940: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2941: return(0);
2942: }
2946: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2947: {
2948: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2952: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2953: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2954: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2955: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2956: return(0);
2957: }
2961: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2962: {
2963: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2967: /* start the scatter */
2968: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2969: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2970: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2971: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2972: return(0);
2973: }
2977: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2978: {
2979: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2983: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2984: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2985: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2986: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2987: return(0);
2988: }
2990: /* ----------------------------------------------------------------*/
2993: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2994: {
2995: PetscErrorCode ierr;
2996: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2997: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2998: Mat P =pp->AIJ;
2999: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
3000: PetscInt *pti,*ptj,*ptJ;
3001: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3002: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3003: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3004: MatScalar *ca;
3005: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
3008: /* Get ij structure of P^T */
3009: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3011: cn = pn*ppdof;
3012: /* Allocate ci array, arrays for fill computation and */
3013: /* free space for accumulating nonzero column info */
3014: PetscMalloc1(cn+1,&ci);
3015: ci[0] = 0;
3017: /* Work arrays for rows of P^T*A */
3018: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
3019: PetscMemzero(ptadenserow,an*sizeof(PetscInt));
3020: PetscMemzero(denserow,cn*sizeof(PetscInt));
3022: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3023: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3024: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3025: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3026: current_space = free_space;
3028: /* Determine symbolic info for each row of C: */
3029: for (i=0; i<pn; i++) {
3030: ptnzi = pti[i+1] - pti[i];
3031: ptJ = ptj + pti[i];
3032: for (dof=0; dof<ppdof; dof++) {
3033: ptanzi = 0;
3034: /* Determine symbolic row of PtA: */
3035: for (j=0; j<ptnzi; j++) {
3036: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3037: arow = ptJ[j]*ppdof + dof;
3038: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3039: anzj = ai[arow+1] - ai[arow];
3040: ajj = aj + ai[arow];
3041: for (k=0; k<anzj; k++) {
3042: if (!ptadenserow[ajj[k]]) {
3043: ptadenserow[ajj[k]] = -1;
3044: ptasparserow[ptanzi++] = ajj[k];
3045: }
3046: }
3047: }
3048: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3049: ptaj = ptasparserow;
3050: cnzi = 0;
3051: for (j=0; j<ptanzi; j++) {
3052: /* Get offset within block of P */
3053: pshift = *ptaj%ppdof;
3054: /* Get block row of P */
3055: prow = (*ptaj++)/ppdof; /* integer division */
3056: /* P has same number of nonzeros per row as the compressed form */
3057: pnzj = pi[prow+1] - pi[prow];
3058: pjj = pj + pi[prow];
3059: for (k=0;k<pnzj;k++) {
3060: /* Locations in C are shifted by the offset within the block */
3061: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3062: if (!denserow[pjj[k]*ppdof+pshift]) {
3063: denserow[pjj[k]*ppdof+pshift] = -1;
3064: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3065: }
3066: }
3067: }
3069: /* sort sparserow */
3070: PetscSortInt(cnzi,sparserow);
3072: /* If free space is not available, make more free space */
3073: /* Double the amount of total space in the list */
3074: if (current_space->local_remaining<cnzi) {
3075: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
3076: }
3078: /* Copy data into free space, and zero out denserows */
3079: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3081: current_space->array += cnzi;
3082: current_space->local_used += cnzi;
3083: current_space->local_remaining -= cnzi;
3085: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3086: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3088: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3089: /* For now, we will recompute what is needed. */
3090: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3091: }
3092: }
3093: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3094: /* Allocate space for cj, initialize cj, and */
3095: /* destroy list of free space and other temporary array(s) */
3096: PetscMalloc1(ci[cn]+1,&cj);
3097: PetscFreeSpaceContiguous(&free_space,cj);
3098: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3100: /* Allocate space for ca */
3101: PetscCalloc1(ci[cn]+1,&ca);
3103: /* put together the new matrix */
3104: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
3105: MatSetBlockSize(*C,pp->dof);
3107: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3108: /* Since these are PETSc arrays, change flags to free them as necessary. */
3109: c = (Mat_SeqAIJ*)((*C)->data);
3110: c->free_a = PETSC_TRUE;
3111: c->free_ij = PETSC_TRUE;
3112: c->nonew = 0;
3114: (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3116: /* Clean up. */
3117: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3118: return(0);
3119: }
3123: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3124: {
3125: /* This routine requires testing -- first draft only */
3126: PetscErrorCode ierr;
3127: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3128: Mat P =pp->AIJ;
3129: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
3130: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3131: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3132: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3133: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3134: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3135: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3136: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3137: MatScalar *ca=c->a,*caj,*apa;
3140: /* Allocate temporary array for storage of one row of A*P */
3141: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3143: /* Clear old values in C */
3144: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
3146: for (i=0; i<am; i++) {
3147: /* Form sparse row of A*P */
3148: anzi = ai[i+1] - ai[i];
3149: apnzj = 0;
3150: for (j=0; j<anzi; j++) {
3151: /* Get offset within block of P */
3152: pshift = *aj%ppdof;
3153: /* Get block row of P */
3154: prow = *aj++/ppdof; /* integer division */
3155: pnzj = pi[prow+1] - pi[prow];
3156: pjj = pj + pi[prow];
3157: paj = pa + pi[prow];
3158: for (k=0; k<pnzj; k++) {
3159: poffset = pjj[k]*ppdof+pshift;
3160: if (!apjdense[poffset]) {
3161: apjdense[poffset] = -1;
3162: apj[apnzj++] = poffset;
3163: }
3164: apa[poffset] += (*aa)*paj[k];
3165: }
3166: PetscLogFlops(2.0*pnzj);
3167: aa++;
3168: }
3170: /* Sort the j index array for quick sparse axpy. */
3171: /* Note: a array does not need sorting as it is in dense storage locations. */
3172: PetscSortInt(apnzj,apj);
3174: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3175: prow = i/ppdof; /* integer division */
3176: pshift = i%ppdof;
3177: poffset = pi[prow];
3178: pnzi = pi[prow+1] - poffset;
3179: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3180: pJ = pj+poffset;
3181: pA = pa+poffset;
3182: for (j=0; j<pnzi; j++) {
3183: crow = (*pJ)*ppdof+pshift;
3184: cjj = cj + ci[crow];
3185: caj = ca + ci[crow];
3186: pJ++;
3187: /* Perform sparse axpy operation. Note cjj includes apj. */
3188: for (k=0,nextap=0; nextap<apnzj; k++) {
3189: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3190: }
3191: PetscLogFlops(2.0*apnzj);
3192: pA++;
3193: }
3195: /* Zero the current row info for A*P */
3196: for (j=0; j<apnzj; j++) {
3197: apa[apj[j]] = 0.;
3198: apjdense[apj[j]] = 0;
3199: }
3200: }
3202: /* Assemble the final matrix and clean up */
3203: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3204: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3205: PetscFree3(apa,apj,apjdense);
3206: return(0);
3207: }
3211: PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3212: {
3216: if (scall == MAT_INITIAL_MATRIX) {
3217: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3218: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3219: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3220: }
3221: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3222: MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3223: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3224: return(0);
3225: }
3229: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3230: {
3234: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3235: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3236: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3237: return(0);
3238: }
3242: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3243: {
3245: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3246: return(0);
3247: }
3251: PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3252: {
3256: if (scall == MAT_INITIAL_MATRIX) {
3257: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3258: MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);
3259: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3260: }
3261: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3262: ((*C)->ops->ptapnumeric)(A,P,*C);
3263: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3264: return(0);
3265: }
3269: PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3270: {
3271: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3272: Mat a = b->AIJ,B;
3273: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3275: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3276: PetscInt *cols;
3277: PetscScalar *vals;
3280: MatGetSize(a,&m,&n);
3281: PetscMalloc1(dof*m,&ilen);
3282: for (i=0; i<m; i++) {
3283: nmax = PetscMax(nmax,aij->ilen[i]);
3284: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3285: }
3286: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3287: PetscFree(ilen);
3288: PetscMalloc1(nmax,&icols);
3289: ii = 0;
3290: for (i=0; i<m; i++) {
3291: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3292: for (j=0; j<dof; j++) {
3293: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3294: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3295: ii++;
3296: }
3297: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3298: }
3299: PetscFree(icols);
3300: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3301: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3303: if (reuse == MAT_REUSE_MATRIX) {
3304: MatHeaderReplace(A,B);
3305: } else {
3306: *newmat = B;
3307: }
3308: return(0);
3309: }
3311: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3315: PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3316: {
3317: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3318: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3319: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3320: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3321: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3322: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3323: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3324: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3325: PetscInt rstart,cstart,*garray,ii,k;
3327: PetscScalar *vals,*ovals;
3330: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3331: for (i=0; i<A->rmap->n/dof; i++) {
3332: nmax = PetscMax(nmax,AIJ->ilen[i]);
3333: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3334: for (j=0; j<dof; j++) {
3335: dnz[dof*i+j] = AIJ->ilen[i];
3336: onz[dof*i+j] = OAIJ->ilen[i];
3337: }
3338: }
3339: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3340: MatSetBlockSize(B,dof);
3341: PetscFree2(dnz,onz);
3343: PetscMalloc2(nmax,&icols,onmax,&oicols);
3344: rstart = dof*maij->A->rmap->rstart;
3345: cstart = dof*maij->A->cmap->rstart;
3346: garray = mpiaij->garray;
3348: ii = rstart;
3349: for (i=0; i<A->rmap->n/dof; i++) {
3350: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3351: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3352: for (j=0; j<dof; j++) {
3353: for (k=0; k<ncols; k++) {
3354: icols[k] = cstart + dof*cols[k]+j;
3355: }
3356: for (k=0; k<oncols; k++) {
3357: oicols[k] = dof*garray[ocols[k]]+j;
3358: }
3359: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3360: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3361: ii++;
3362: }
3363: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3364: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3365: }
3366: PetscFree2(icols,oicols);
3368: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3369: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3371: if (reuse == MAT_REUSE_MATRIX) {
3372: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3373: ((PetscObject)A)->refct = 1;
3375: MatHeaderReplace(A,B);
3377: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3378: } else {
3379: *newmat = B;
3380: }
3381: return(0);
3382: }
3386: PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3387: {
3389: Mat A;
3392: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3393: MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3394: MatDestroy(&A);
3395: return(0);
3396: }
3398: /* ---------------------------------------------------------------------------------- */
3401: /*@C
3402: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3403: operations for multicomponent problems. It interpolates each component the same
3404: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3405: and MATMPIAIJ for distributed matrices.
3407: Collective
3409: Input Parameters:
3410: + A - the AIJ matrix describing the action on blocks
3411: - dof - the block size (number of components per node)
3413: Output Parameter:
3414: . maij - the new MAIJ matrix
3416: Operations provided:
3417: + MatMult
3418: . MatMultTranspose
3419: . MatMultAdd
3420: . MatMultTransposeAdd
3421: - MatView
3423: Level: advanced
3425: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3426: @*/
3427: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3428: {
3430: PetscMPIInt size;
3431: PetscInt n;
3432: Mat B;
3435: PetscObjectReference((PetscObject)A);
3437: if (dof == 1) *maij = A;
3438: else {
3439: MatCreate(PetscObjectComm((PetscObject)A),&B);
3440: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3441: PetscLayoutSetBlockSize(B->rmap,dof);
3442: PetscLayoutSetBlockSize(B->cmap,dof);
3443: PetscLayoutSetUp(B->rmap);
3444: PetscLayoutSetUp(B->cmap);
3446: B->assembled = PETSC_TRUE;
3448: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3449: if (size == 1) {
3450: Mat_SeqMAIJ *b;
3452: MatSetType(B,MATSEQMAIJ);
3454: B->ops->setup = NULL;
3455: B->ops->destroy = MatDestroy_SeqMAIJ;
3456: B->ops->view = MatView_SeqMAIJ;
3457: b = (Mat_SeqMAIJ*)B->data;
3458: b->dof = dof;
3459: b->AIJ = A;
3461: if (dof == 2) {
3462: B->ops->mult = MatMult_SeqMAIJ_2;
3463: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3464: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3465: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3466: } else if (dof == 3) {
3467: B->ops->mult = MatMult_SeqMAIJ_3;
3468: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3469: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3470: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3471: } else if (dof == 4) {
3472: B->ops->mult = MatMult_SeqMAIJ_4;
3473: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3474: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3475: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3476: } else if (dof == 5) {
3477: B->ops->mult = MatMult_SeqMAIJ_5;
3478: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3479: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3480: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3481: } else if (dof == 6) {
3482: B->ops->mult = MatMult_SeqMAIJ_6;
3483: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3484: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3485: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3486: } else if (dof == 7) {
3487: B->ops->mult = MatMult_SeqMAIJ_7;
3488: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3489: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3490: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3491: } else if (dof == 8) {
3492: B->ops->mult = MatMult_SeqMAIJ_8;
3493: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3494: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3495: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3496: } else if (dof == 9) {
3497: B->ops->mult = MatMult_SeqMAIJ_9;
3498: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3499: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3500: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3501: } else if (dof == 10) {
3502: B->ops->mult = MatMult_SeqMAIJ_10;
3503: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3504: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3505: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3506: } else if (dof == 11) {
3507: B->ops->mult = MatMult_SeqMAIJ_11;
3508: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3509: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3510: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3511: } else if (dof == 16) {
3512: B->ops->mult = MatMult_SeqMAIJ_16;
3513: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3514: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3515: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3516: } else if (dof == 18) {
3517: B->ops->mult = MatMult_SeqMAIJ_18;
3518: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3519: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3520: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3521: } else {
3522: B->ops->mult = MatMult_SeqMAIJ_N;
3523: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3524: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3525: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3526: }
3527: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3528: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3529: } else {
3530: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3531: Mat_MPIMAIJ *b;
3532: IS from,to;
3533: Vec gvec;
3535: MatSetType(B,MATMPIMAIJ);
3537: B->ops->setup = NULL;
3538: B->ops->destroy = MatDestroy_MPIMAIJ;
3539: B->ops->view = MatView_MPIMAIJ;
3541: b = (Mat_MPIMAIJ*)B->data;
3542: b->dof = dof;
3543: b->A = A;
3545: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3546: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3548: VecGetSize(mpiaij->lvec,&n);
3549: VecCreate(PETSC_COMM_SELF,&b->w);
3550: VecSetSizes(b->w,n*dof,n*dof);
3551: VecSetBlockSize(b->w,dof);
3552: VecSetType(b->w,VECSEQ);
3554: /* create two temporary Index sets for build scatter gather */
3555: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3556: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3558: /* create temporary global vector to generate scatter context */
3559: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3561: /* generate the scatter context */
3562: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3564: ISDestroy(&from);
3565: ISDestroy(&to);
3566: VecDestroy(&gvec);
3568: B->ops->mult = MatMult_MPIMAIJ_dof;
3569: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3570: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3571: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3573: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3574: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3575: }
3576: B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3577: MatSetUp(B);
3578: *maij = B;
3579: MatViewFromOptions(B,NULL,"-mat_view");
3580: }
3581: return(0);
3582: }