Actual source code: maij.c
petsc-3.5.4 2015-05-23
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20: #include <../src/mat/utils/freespace.h>
21: #include <petsc-private/vecimpl.h>
25: /*@
26: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30: Input Parameter:
31: . A - the MAIJ matrix
33: Output Parameter:
34: . B - the AIJ matrix
36: Level: advanced
38: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40: .seealso: MatCreateMAIJ()
41: @*/
42: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
43: {
45: PetscBool ismpimaij,isseqmaij;
48: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
49: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
50: if (ismpimaij) {
51: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53: *B = b->A;
54: } else if (isseqmaij) {
55: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57: *B = b->AIJ;
58: } else {
59: *B = A;
60: }
61: return(0);
62: }
66: /*@C
67: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69: Logically Collective
71: Input Parameter:
72: + A - the MAIJ matrix
73: - dof - the block size for the new matrix
75: Output Parameter:
76: . B - the new MAIJ matrix
78: Level: advanced
80: .seealso: MatCreateMAIJ()
81: @*/
82: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83: {
85: Mat Aij = NULL;
89: MatMAIJGetAIJ(A,&Aij);
90: MatCreateMAIJ(Aij,dof,B);
91: return(0);
92: }
96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97: {
99: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
102: MatDestroy(&b->AIJ);
103: PetscFree(A->data);
104: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
105: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
106: return(0);
107: }
111: PetscErrorCode MatSetUp_MAIJ(Mat A)
112: {
114: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
115: return(0);
116: }
120: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
121: {
123: Mat B;
126: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
127: MatView(B,viewer);
128: MatDestroy(&B);
129: return(0);
130: }
134: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
135: {
137: Mat B;
140: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
141: MatView(B,viewer);
142: MatDestroy(&B);
143: return(0);
144: }
148: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
149: {
151: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
154: MatDestroy(&b->AIJ);
155: MatDestroy(&b->OAIJ);
156: MatDestroy(&b->A);
157: VecScatterDestroy(&b->ctx);
158: VecDestroy(&b->w);
159: PetscFree(A->data);
160: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
161: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
162: PetscObjectChangeTypeName((PetscObject)A,0);
163: return(0);
164: }
166: /*MC
167: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
168: multicomponent problems, interpolating or restricting each component the same way independently.
169: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
171: Operations provided:
172: . MatMult
173: . MatMultTranspose
174: . MatMultAdd
175: . MatMultTransposeAdd
177: Level: advanced
179: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
180: M*/
184: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
185: {
187: Mat_MPIMAIJ *b;
188: PetscMPIInt size;
191: PetscNewLog(A,&b);
192: A->data = (void*)b;
194: PetscMemzero(A->ops,sizeof(struct _MatOps));
196: A->ops->setup = MatSetUp_MAIJ;
198: b->AIJ = 0;
199: b->dof = 0;
200: b->OAIJ = 0;
201: b->ctx = 0;
202: b->w = 0;
203: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
204: if (size == 1) {
205: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
206: } else {
207: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
208: }
209: return(0);
210: }
212: /* --------------------------------------------------------------------------------------*/
215: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
216: {
217: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
218: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
219: const PetscScalar *x,*v;
220: PetscScalar *y, sum1, sum2;
221: PetscErrorCode ierr;
222: PetscInt nonzerorow=0,n,i,jrow,j;
223: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
226: VecGetArrayRead(xx,&x);
227: VecGetArray(yy,&y);
228: idx = a->j;
229: v = a->a;
230: ii = a->i;
232: for (i=0; i<m; i++) {
233: jrow = ii[i];
234: n = ii[i+1] - jrow;
235: sum1 = 0.0;
236: sum2 = 0.0;
238: nonzerorow += (n>0);
239: for (j=0; j<n; j++) {
240: sum1 += v[jrow]*x[2*idx[jrow]];
241: sum2 += v[jrow]*x[2*idx[jrow]+1];
242: jrow++;
243: }
244: y[2*i] = sum1;
245: y[2*i+1] = sum2;
246: }
248: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
249: VecRestoreArrayRead(xx,&x);
250: VecRestoreArray(yy,&y);
251: return(0);
252: }
256: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
257: {
258: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
259: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
260: const PetscScalar *x,*v;
261: PetscScalar *y,alpha1,alpha2;
262: PetscErrorCode ierr;
263: PetscInt n,i;
264: const PetscInt m = b->AIJ->rmap->n,*idx;
267: VecSet(yy,0.0);
268: VecGetArrayRead(xx,&x);
269: VecGetArray(yy,&y);
271: for (i=0; i<m; i++) {
272: idx = a->j + a->i[i];
273: v = a->a + a->i[i];
274: n = a->i[i+1] - a->i[i];
275: alpha1 = x[2*i];
276: alpha2 = x[2*i+1];
277: while (n-->0) {
278: y[2*(*idx)] += alpha1*(*v);
279: y[2*(*idx)+1] += alpha2*(*v);
280: idx++; v++;
281: }
282: }
283: PetscLogFlops(4.0*a->nz);
284: VecRestoreArrayRead(xx,&x);
285: VecRestoreArray(yy,&y);
286: return(0);
287: }
291: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
292: {
293: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
294: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
295: const PetscScalar *x,*v;
296: PetscScalar *y,sum1, sum2;
297: PetscErrorCode ierr;
298: PetscInt n,i,jrow,j;
299: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
302: if (yy != zz) {VecCopy(yy,zz);}
303: VecGetArrayRead(xx,&x);
304: VecGetArray(zz,&y);
305: idx = a->j;
306: v = a->a;
307: ii = a->i;
309: for (i=0; i<m; i++) {
310: jrow = ii[i];
311: n = ii[i+1] - jrow;
312: sum1 = 0.0;
313: sum2 = 0.0;
314: for (j=0; j<n; j++) {
315: sum1 += v[jrow]*x[2*idx[jrow]];
316: sum2 += v[jrow]*x[2*idx[jrow]+1];
317: jrow++;
318: }
319: y[2*i] += sum1;
320: y[2*i+1] += sum2;
321: }
323: PetscLogFlops(4.0*a->nz);
324: VecRestoreArrayRead(xx,&x);
325: VecRestoreArray(zz,&y);
326: return(0);
327: }
330: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
331: {
332: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
333: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
334: const PetscScalar *x,*v;
335: PetscScalar *y,alpha1,alpha2;
336: PetscErrorCode ierr;
337: PetscInt n,i;
338: const PetscInt m = b->AIJ->rmap->n,*idx;
341: if (yy != zz) {VecCopy(yy,zz);}
342: VecGetArrayRead(xx,&x);
343: VecGetArray(zz,&y);
345: for (i=0; i<m; i++) {
346: idx = a->j + a->i[i];
347: v = a->a + a->i[i];
348: n = a->i[i+1] - a->i[i];
349: alpha1 = x[2*i];
350: alpha2 = x[2*i+1];
351: while (n-->0) {
352: y[2*(*idx)] += alpha1*(*v);
353: y[2*(*idx)+1] += alpha2*(*v);
354: idx++; v++;
355: }
356: }
357: PetscLogFlops(4.0*a->nz);
358: VecRestoreArrayRead(xx,&x);
359: VecRestoreArray(zz,&y);
360: return(0);
361: }
362: /* --------------------------------------------------------------------------------------*/
365: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
366: {
367: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
368: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
369: const PetscScalar *x,*v;
370: PetscScalar *y,sum1, sum2, sum3;
371: PetscErrorCode ierr;
372: PetscInt nonzerorow=0,n,i,jrow,j;
373: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
376: VecGetArrayRead(xx,&x);
377: VecGetArray(yy,&y);
378: idx = a->j;
379: v = a->a;
380: ii = a->i;
382: for (i=0; i<m; i++) {
383: jrow = ii[i];
384: n = ii[i+1] - jrow;
385: sum1 = 0.0;
386: sum2 = 0.0;
387: sum3 = 0.0;
389: nonzerorow += (n>0);
390: for (j=0; j<n; j++) {
391: sum1 += v[jrow]*x[3*idx[jrow]];
392: sum2 += v[jrow]*x[3*idx[jrow]+1];
393: sum3 += v[jrow]*x[3*idx[jrow]+2];
394: jrow++;
395: }
396: y[3*i] = sum1;
397: y[3*i+1] = sum2;
398: y[3*i+2] = sum3;
399: }
401: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
402: VecRestoreArrayRead(xx,&x);
403: VecRestoreArray(yy,&y);
404: return(0);
405: }
409: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
410: {
411: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
412: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
413: const PetscScalar *x,*v;
414: PetscScalar *y,alpha1,alpha2,alpha3;
415: PetscErrorCode ierr;
416: PetscInt n,i;
417: const PetscInt m = b->AIJ->rmap->n,*idx;
420: VecSet(yy,0.0);
421: VecGetArrayRead(xx,&x);
422: VecGetArray(yy,&y);
424: for (i=0; i<m; i++) {
425: idx = a->j + a->i[i];
426: v = a->a + a->i[i];
427: n = a->i[i+1] - a->i[i];
428: alpha1 = x[3*i];
429: alpha2 = x[3*i+1];
430: alpha3 = x[3*i+2];
431: while (n-->0) {
432: y[3*(*idx)] += alpha1*(*v);
433: y[3*(*idx)+1] += alpha2*(*v);
434: y[3*(*idx)+2] += alpha3*(*v);
435: idx++; v++;
436: }
437: }
438: PetscLogFlops(6.0*a->nz);
439: VecRestoreArrayRead(xx,&x);
440: VecRestoreArray(yy,&y);
441: return(0);
442: }
446: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
447: {
448: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
449: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
450: const PetscScalar *x,*v;
451: PetscScalar *y,sum1, sum2, sum3;
452: PetscErrorCode ierr;
453: PetscInt n,i,jrow,j;
454: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
457: if (yy != zz) {VecCopy(yy,zz);}
458: VecGetArrayRead(xx,&x);
459: VecGetArray(zz,&y);
460: idx = a->j;
461: v = a->a;
462: ii = a->i;
464: for (i=0; i<m; i++) {
465: jrow = ii[i];
466: n = ii[i+1] - jrow;
467: sum1 = 0.0;
468: sum2 = 0.0;
469: sum3 = 0.0;
470: for (j=0; j<n; j++) {
471: sum1 += v[jrow]*x[3*idx[jrow]];
472: sum2 += v[jrow]*x[3*idx[jrow]+1];
473: sum3 += v[jrow]*x[3*idx[jrow]+2];
474: jrow++;
475: }
476: y[3*i] += sum1;
477: y[3*i+1] += sum2;
478: y[3*i+2] += sum3;
479: }
481: PetscLogFlops(6.0*a->nz);
482: VecRestoreArrayRead(xx,&x);
483: VecRestoreArray(zz,&y);
484: return(0);
485: }
488: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
489: {
490: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
491: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
492: const PetscScalar *x,*v;
493: PetscScalar *y,alpha1,alpha2,alpha3;
494: PetscErrorCode ierr;
495: PetscInt n,i;
496: const PetscInt m = b->AIJ->rmap->n,*idx;
499: if (yy != zz) {VecCopy(yy,zz);}
500: VecGetArrayRead(xx,&x);
501: VecGetArray(zz,&y);
502: for (i=0; i<m; i++) {
503: idx = a->j + a->i[i];
504: v = a->a + a->i[i];
505: n = a->i[i+1] - a->i[i];
506: alpha1 = x[3*i];
507: alpha2 = x[3*i+1];
508: alpha3 = x[3*i+2];
509: while (n-->0) {
510: y[3*(*idx)] += alpha1*(*v);
511: y[3*(*idx)+1] += alpha2*(*v);
512: y[3*(*idx)+2] += alpha3*(*v);
513: idx++; v++;
514: }
515: }
516: PetscLogFlops(6.0*a->nz);
517: VecRestoreArrayRead(xx,&x);
518: VecRestoreArray(zz,&y);
519: return(0);
520: }
522: /* ------------------------------------------------------------------------------*/
525: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
526: {
527: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
528: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
529: const PetscScalar *x,*v;
530: PetscScalar *y,sum1, sum2, sum3, sum4;
531: PetscErrorCode ierr;
532: PetscInt nonzerorow=0,n,i,jrow,j;
533: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
536: VecGetArrayRead(xx,&x);
537: VecGetArray(yy,&y);
538: idx = a->j;
539: v = a->a;
540: ii = a->i;
542: for (i=0; i<m; i++) {
543: jrow = ii[i];
544: n = ii[i+1] - jrow;
545: sum1 = 0.0;
546: sum2 = 0.0;
547: sum3 = 0.0;
548: sum4 = 0.0;
549: nonzerorow += (n>0);
550: for (j=0; j<n; j++) {
551: sum1 += v[jrow]*x[4*idx[jrow]];
552: sum2 += v[jrow]*x[4*idx[jrow]+1];
553: sum3 += v[jrow]*x[4*idx[jrow]+2];
554: sum4 += v[jrow]*x[4*idx[jrow]+3];
555: jrow++;
556: }
557: y[4*i] = sum1;
558: y[4*i+1] = sum2;
559: y[4*i+2] = sum3;
560: y[4*i+3] = sum4;
561: }
563: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
564: VecRestoreArrayRead(xx,&x);
565: VecRestoreArray(yy,&y);
566: return(0);
567: }
571: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
572: {
573: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
574: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
575: const PetscScalar *x,*v;
576: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
577: PetscErrorCode ierr;
578: PetscInt n,i;
579: const PetscInt m = b->AIJ->rmap->n,*idx;
582: VecSet(yy,0.0);
583: VecGetArrayRead(xx,&x);
584: VecGetArray(yy,&y);
585: for (i=0; i<m; i++) {
586: idx = a->j + a->i[i];
587: v = a->a + a->i[i];
588: n = a->i[i+1] - a->i[i];
589: alpha1 = x[4*i];
590: alpha2 = x[4*i+1];
591: alpha3 = x[4*i+2];
592: alpha4 = x[4*i+3];
593: while (n-->0) {
594: y[4*(*idx)] += alpha1*(*v);
595: y[4*(*idx)+1] += alpha2*(*v);
596: y[4*(*idx)+2] += alpha3*(*v);
597: y[4*(*idx)+3] += alpha4*(*v);
598: idx++; v++;
599: }
600: }
601: PetscLogFlops(8.0*a->nz);
602: VecRestoreArrayRead(xx,&x);
603: VecRestoreArray(yy,&y);
604: return(0);
605: }
609: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
610: {
611: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
612: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
613: const PetscScalar *x,*v;
614: PetscScalar *y,sum1, sum2, sum3, sum4;
615: PetscErrorCode ierr;
616: PetscInt n,i,jrow,j;
617: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
620: if (yy != zz) {VecCopy(yy,zz);}
621: VecGetArrayRead(xx,&x);
622: VecGetArray(zz,&y);
623: idx = a->j;
624: v = a->a;
625: ii = a->i;
627: for (i=0; i<m; i++) {
628: jrow = ii[i];
629: n = ii[i+1] - jrow;
630: sum1 = 0.0;
631: sum2 = 0.0;
632: sum3 = 0.0;
633: sum4 = 0.0;
634: for (j=0; j<n; j++) {
635: sum1 += v[jrow]*x[4*idx[jrow]];
636: sum2 += v[jrow]*x[4*idx[jrow]+1];
637: sum3 += v[jrow]*x[4*idx[jrow]+2];
638: sum4 += v[jrow]*x[4*idx[jrow]+3];
639: jrow++;
640: }
641: y[4*i] += sum1;
642: y[4*i+1] += sum2;
643: y[4*i+2] += sum3;
644: y[4*i+3] += sum4;
645: }
647: PetscLogFlops(8.0*a->nz);
648: VecRestoreArrayRead(xx,&x);
649: VecRestoreArray(zz,&y);
650: return(0);
651: }
654: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
655: {
656: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
657: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
658: const PetscScalar *x,*v;
659: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
660: PetscErrorCode ierr;
661: PetscInt n,i;
662: const PetscInt m = b->AIJ->rmap->n,*idx;
665: if (yy != zz) {VecCopy(yy,zz);}
666: VecGetArrayRead(xx,&x);
667: VecGetArray(zz,&y);
669: for (i=0; i<m; i++) {
670: idx = a->j + a->i[i];
671: v = a->a + a->i[i];
672: n = a->i[i+1] - a->i[i];
673: alpha1 = x[4*i];
674: alpha2 = x[4*i+1];
675: alpha3 = x[4*i+2];
676: alpha4 = x[4*i+3];
677: while (n-->0) {
678: y[4*(*idx)] += alpha1*(*v);
679: y[4*(*idx)+1] += alpha2*(*v);
680: y[4*(*idx)+2] += alpha3*(*v);
681: y[4*(*idx)+3] += alpha4*(*v);
682: idx++; v++;
683: }
684: }
685: PetscLogFlops(8.0*a->nz);
686: VecRestoreArrayRead(xx,&x);
687: VecRestoreArray(zz,&y);
688: return(0);
689: }
690: /* ------------------------------------------------------------------------------*/
694: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
695: {
696: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
697: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
698: const PetscScalar *x,*v;
699: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
700: PetscErrorCode ierr;
701: PetscInt nonzerorow=0,n,i,jrow,j;
702: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
705: VecGetArrayRead(xx,&x);
706: VecGetArray(yy,&y);
707: idx = a->j;
708: v = a->a;
709: ii = a->i;
711: for (i=0; i<m; i++) {
712: jrow = ii[i];
713: n = ii[i+1] - jrow;
714: sum1 = 0.0;
715: sum2 = 0.0;
716: sum3 = 0.0;
717: sum4 = 0.0;
718: sum5 = 0.0;
720: nonzerorow += (n>0);
721: for (j=0; j<n; j++) {
722: sum1 += v[jrow]*x[5*idx[jrow]];
723: sum2 += v[jrow]*x[5*idx[jrow]+1];
724: sum3 += v[jrow]*x[5*idx[jrow]+2];
725: sum4 += v[jrow]*x[5*idx[jrow]+3];
726: sum5 += v[jrow]*x[5*idx[jrow]+4];
727: jrow++;
728: }
729: y[5*i] = sum1;
730: y[5*i+1] = sum2;
731: y[5*i+2] = sum3;
732: y[5*i+3] = sum4;
733: y[5*i+4] = sum5;
734: }
736: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
737: VecRestoreArrayRead(xx,&x);
738: VecRestoreArray(yy,&y);
739: return(0);
740: }
744: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
745: {
746: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
747: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
748: const PetscScalar *x,*v;
749: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
750: PetscErrorCode ierr;
751: PetscInt n,i;
752: const PetscInt m = b->AIJ->rmap->n,*idx;
755: VecSet(yy,0.0);
756: VecGetArrayRead(xx,&x);
757: VecGetArray(yy,&y);
759: for (i=0; i<m; i++) {
760: idx = a->j + a->i[i];
761: v = a->a + a->i[i];
762: n = a->i[i+1] - a->i[i];
763: alpha1 = x[5*i];
764: alpha2 = x[5*i+1];
765: alpha3 = x[5*i+2];
766: alpha4 = x[5*i+3];
767: alpha5 = x[5*i+4];
768: while (n-->0) {
769: y[5*(*idx)] += alpha1*(*v);
770: y[5*(*idx)+1] += alpha2*(*v);
771: y[5*(*idx)+2] += alpha3*(*v);
772: y[5*(*idx)+3] += alpha4*(*v);
773: y[5*(*idx)+4] += alpha5*(*v);
774: idx++; v++;
775: }
776: }
777: PetscLogFlops(10.0*a->nz);
778: VecRestoreArrayRead(xx,&x);
779: VecRestoreArray(yy,&y);
780: return(0);
781: }
785: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
786: {
787: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
788: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
789: const PetscScalar *x,*v;
790: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
791: PetscErrorCode ierr;
792: PetscInt n,i,jrow,j;
793: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
796: if (yy != zz) {VecCopy(yy,zz);}
797: VecGetArrayRead(xx,&x);
798: VecGetArray(zz,&y);
799: idx = a->j;
800: v = a->a;
801: ii = a->i;
803: for (i=0; i<m; i++) {
804: jrow = ii[i];
805: n = ii[i+1] - jrow;
806: sum1 = 0.0;
807: sum2 = 0.0;
808: sum3 = 0.0;
809: sum4 = 0.0;
810: sum5 = 0.0;
811: for (j=0; j<n; j++) {
812: sum1 += v[jrow]*x[5*idx[jrow]];
813: sum2 += v[jrow]*x[5*idx[jrow]+1];
814: sum3 += v[jrow]*x[5*idx[jrow]+2];
815: sum4 += v[jrow]*x[5*idx[jrow]+3];
816: sum5 += v[jrow]*x[5*idx[jrow]+4];
817: jrow++;
818: }
819: y[5*i] += sum1;
820: y[5*i+1] += sum2;
821: y[5*i+2] += sum3;
822: y[5*i+3] += sum4;
823: y[5*i+4] += sum5;
824: }
826: PetscLogFlops(10.0*a->nz);
827: VecRestoreArrayRead(xx,&x);
828: VecRestoreArray(zz,&y);
829: return(0);
830: }
834: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
835: {
836: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
837: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
838: const PetscScalar *x,*v;
839: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
840: PetscErrorCode ierr;
841: PetscInt n,i;
842: const PetscInt m = b->AIJ->rmap->n,*idx;
845: if (yy != zz) {VecCopy(yy,zz);}
846: VecGetArrayRead(xx,&x);
847: VecGetArray(zz,&y);
849: for (i=0; i<m; i++) {
850: idx = a->j + a->i[i];
851: v = a->a + a->i[i];
852: n = a->i[i+1] - a->i[i];
853: alpha1 = x[5*i];
854: alpha2 = x[5*i+1];
855: alpha3 = x[5*i+2];
856: alpha4 = x[5*i+3];
857: alpha5 = x[5*i+4];
858: while (n-->0) {
859: y[5*(*idx)] += alpha1*(*v);
860: y[5*(*idx)+1] += alpha2*(*v);
861: y[5*(*idx)+2] += alpha3*(*v);
862: y[5*(*idx)+3] += alpha4*(*v);
863: y[5*(*idx)+4] += alpha5*(*v);
864: idx++; v++;
865: }
866: }
867: PetscLogFlops(10.0*a->nz);
868: VecRestoreArrayRead(xx,&x);
869: VecRestoreArray(zz,&y);
870: return(0);
871: }
873: /* ------------------------------------------------------------------------------*/
876: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
877: {
878: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
879: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
880: const PetscScalar *x,*v;
881: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
882: PetscErrorCode ierr;
883: PetscInt nonzerorow=0,n,i,jrow,j;
884: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
887: VecGetArrayRead(xx,&x);
888: VecGetArray(yy,&y);
889: idx = a->j;
890: v = a->a;
891: ii = a->i;
893: for (i=0; i<m; i++) {
894: jrow = ii[i];
895: n = ii[i+1] - jrow;
896: sum1 = 0.0;
897: sum2 = 0.0;
898: sum3 = 0.0;
899: sum4 = 0.0;
900: sum5 = 0.0;
901: sum6 = 0.0;
903: nonzerorow += (n>0);
904: for (j=0; j<n; j++) {
905: sum1 += v[jrow]*x[6*idx[jrow]];
906: sum2 += v[jrow]*x[6*idx[jrow]+1];
907: sum3 += v[jrow]*x[6*idx[jrow]+2];
908: sum4 += v[jrow]*x[6*idx[jrow]+3];
909: sum5 += v[jrow]*x[6*idx[jrow]+4];
910: sum6 += v[jrow]*x[6*idx[jrow]+5];
911: jrow++;
912: }
913: y[6*i] = sum1;
914: y[6*i+1] = sum2;
915: y[6*i+2] = sum3;
916: y[6*i+3] = sum4;
917: y[6*i+4] = sum5;
918: y[6*i+5] = sum6;
919: }
921: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
922: VecRestoreArrayRead(xx,&x);
923: VecRestoreArray(yy,&y);
924: return(0);
925: }
929: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
930: {
931: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
932: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
933: const PetscScalar *x,*v;
934: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
935: PetscErrorCode ierr;
936: PetscInt n,i;
937: const PetscInt m = b->AIJ->rmap->n,*idx;
940: VecSet(yy,0.0);
941: VecGetArrayRead(xx,&x);
942: VecGetArray(yy,&y);
944: for (i=0; i<m; i++) {
945: idx = a->j + a->i[i];
946: v = a->a + a->i[i];
947: n = a->i[i+1] - a->i[i];
948: alpha1 = x[6*i];
949: alpha2 = x[6*i+1];
950: alpha3 = x[6*i+2];
951: alpha4 = x[6*i+3];
952: alpha5 = x[6*i+4];
953: alpha6 = x[6*i+5];
954: while (n-->0) {
955: y[6*(*idx)] += alpha1*(*v);
956: y[6*(*idx)+1] += alpha2*(*v);
957: y[6*(*idx)+2] += alpha3*(*v);
958: y[6*(*idx)+3] += alpha4*(*v);
959: y[6*(*idx)+4] += alpha5*(*v);
960: y[6*(*idx)+5] += alpha6*(*v);
961: idx++; v++;
962: }
963: }
964: PetscLogFlops(12.0*a->nz);
965: VecRestoreArrayRead(xx,&x);
966: VecRestoreArray(yy,&y);
967: return(0);
968: }
972: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
973: {
974: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
975: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
976: const PetscScalar *x,*v;
977: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
978: PetscErrorCode ierr;
979: PetscInt n,i,jrow,j;
980: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
983: if (yy != zz) {VecCopy(yy,zz);}
984: VecGetArrayRead(xx,&x);
985: VecGetArray(zz,&y);
986: idx = a->j;
987: v = a->a;
988: ii = a->i;
990: for (i=0; i<m; i++) {
991: jrow = ii[i];
992: n = ii[i+1] - jrow;
993: sum1 = 0.0;
994: sum2 = 0.0;
995: sum3 = 0.0;
996: sum4 = 0.0;
997: sum5 = 0.0;
998: sum6 = 0.0;
999: for (j=0; j<n; j++) {
1000: sum1 += v[jrow]*x[6*idx[jrow]];
1001: sum2 += v[jrow]*x[6*idx[jrow]+1];
1002: sum3 += v[jrow]*x[6*idx[jrow]+2];
1003: sum4 += v[jrow]*x[6*idx[jrow]+3];
1004: sum5 += v[jrow]*x[6*idx[jrow]+4];
1005: sum6 += v[jrow]*x[6*idx[jrow]+5];
1006: jrow++;
1007: }
1008: y[6*i] += sum1;
1009: y[6*i+1] += sum2;
1010: y[6*i+2] += sum3;
1011: y[6*i+3] += sum4;
1012: y[6*i+4] += sum5;
1013: y[6*i+5] += sum6;
1014: }
1016: PetscLogFlops(12.0*a->nz);
1017: VecRestoreArrayRead(xx,&x);
1018: VecRestoreArray(zz,&y);
1019: return(0);
1020: }
1024: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1025: {
1026: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1027: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1028: const PetscScalar *x,*v;
1029: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1030: PetscErrorCode ierr;
1031: PetscInt n,i;
1032: const PetscInt m = b->AIJ->rmap->n,*idx;
1035: if (yy != zz) {VecCopy(yy,zz);}
1036: VecGetArrayRead(xx,&x);
1037: VecGetArray(zz,&y);
1039: for (i=0; i<m; i++) {
1040: idx = a->j + a->i[i];
1041: v = a->a + a->i[i];
1042: n = a->i[i+1] - a->i[i];
1043: alpha1 = x[6*i];
1044: alpha2 = x[6*i+1];
1045: alpha3 = x[6*i+2];
1046: alpha4 = x[6*i+3];
1047: alpha5 = x[6*i+4];
1048: alpha6 = x[6*i+5];
1049: while (n-->0) {
1050: y[6*(*idx)] += alpha1*(*v);
1051: y[6*(*idx)+1] += alpha2*(*v);
1052: y[6*(*idx)+2] += alpha3*(*v);
1053: y[6*(*idx)+3] += alpha4*(*v);
1054: y[6*(*idx)+4] += alpha5*(*v);
1055: y[6*(*idx)+5] += alpha6*(*v);
1056: idx++; v++;
1057: }
1058: }
1059: PetscLogFlops(12.0*a->nz);
1060: VecRestoreArrayRead(xx,&x);
1061: VecRestoreArray(zz,&y);
1062: return(0);
1063: }
1065: /* ------------------------------------------------------------------------------*/
1068: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1069: {
1070: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1071: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1072: const PetscScalar *x,*v;
1073: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1074: PetscErrorCode ierr;
1075: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1076: PetscInt nonzerorow=0,n,i,jrow,j;
1079: VecGetArrayRead(xx,&x);
1080: VecGetArray(yy,&y);
1081: idx = a->j;
1082: v = a->a;
1083: ii = a->i;
1085: for (i=0; i<m; i++) {
1086: jrow = ii[i];
1087: n = ii[i+1] - jrow;
1088: sum1 = 0.0;
1089: sum2 = 0.0;
1090: sum3 = 0.0;
1091: sum4 = 0.0;
1092: sum5 = 0.0;
1093: sum6 = 0.0;
1094: sum7 = 0.0;
1096: nonzerorow += (n>0);
1097: for (j=0; j<n; j++) {
1098: sum1 += v[jrow]*x[7*idx[jrow]];
1099: sum2 += v[jrow]*x[7*idx[jrow]+1];
1100: sum3 += v[jrow]*x[7*idx[jrow]+2];
1101: sum4 += v[jrow]*x[7*idx[jrow]+3];
1102: sum5 += v[jrow]*x[7*idx[jrow]+4];
1103: sum6 += v[jrow]*x[7*idx[jrow]+5];
1104: sum7 += v[jrow]*x[7*idx[jrow]+6];
1105: jrow++;
1106: }
1107: y[7*i] = sum1;
1108: y[7*i+1] = sum2;
1109: y[7*i+2] = sum3;
1110: y[7*i+3] = sum4;
1111: y[7*i+4] = sum5;
1112: y[7*i+5] = sum6;
1113: y[7*i+6] = sum7;
1114: }
1116: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1117: VecRestoreArrayRead(xx,&x);
1118: VecRestoreArray(yy,&y);
1119: return(0);
1120: }
1124: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1125: {
1126: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1127: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1128: const PetscScalar *x,*v;
1129: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1130: PetscErrorCode ierr;
1131: const PetscInt m = b->AIJ->rmap->n,*idx;
1132: PetscInt n,i;
1135: VecSet(yy,0.0);
1136: VecGetArrayRead(xx,&x);
1137: VecGetArray(yy,&y);
1139: for (i=0; i<m; i++) {
1140: idx = a->j + a->i[i];
1141: v = a->a + a->i[i];
1142: n = a->i[i+1] - a->i[i];
1143: alpha1 = x[7*i];
1144: alpha2 = x[7*i+1];
1145: alpha3 = x[7*i+2];
1146: alpha4 = x[7*i+3];
1147: alpha5 = x[7*i+4];
1148: alpha6 = x[7*i+5];
1149: alpha7 = x[7*i+6];
1150: while (n-->0) {
1151: y[7*(*idx)] += alpha1*(*v);
1152: y[7*(*idx)+1] += alpha2*(*v);
1153: y[7*(*idx)+2] += alpha3*(*v);
1154: y[7*(*idx)+3] += alpha4*(*v);
1155: y[7*(*idx)+4] += alpha5*(*v);
1156: y[7*(*idx)+5] += alpha6*(*v);
1157: y[7*(*idx)+6] += alpha7*(*v);
1158: idx++; v++;
1159: }
1160: }
1161: PetscLogFlops(14.0*a->nz);
1162: VecRestoreArrayRead(xx,&x);
1163: VecRestoreArray(yy,&y);
1164: return(0);
1165: }
1169: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1170: {
1171: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1172: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1173: const PetscScalar *x,*v;
1174: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1175: PetscErrorCode ierr;
1176: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1177: PetscInt n,i,jrow,j;
1180: if (yy != zz) {VecCopy(yy,zz);}
1181: VecGetArrayRead(xx,&x);
1182: VecGetArray(zz,&y);
1183: idx = a->j;
1184: v = a->a;
1185: ii = a->i;
1187: for (i=0; i<m; i++) {
1188: jrow = ii[i];
1189: n = ii[i+1] - jrow;
1190: sum1 = 0.0;
1191: sum2 = 0.0;
1192: sum3 = 0.0;
1193: sum4 = 0.0;
1194: sum5 = 0.0;
1195: sum6 = 0.0;
1196: sum7 = 0.0;
1197: for (j=0; j<n; j++) {
1198: sum1 += v[jrow]*x[7*idx[jrow]];
1199: sum2 += v[jrow]*x[7*idx[jrow]+1];
1200: sum3 += v[jrow]*x[7*idx[jrow]+2];
1201: sum4 += v[jrow]*x[7*idx[jrow]+3];
1202: sum5 += v[jrow]*x[7*idx[jrow]+4];
1203: sum6 += v[jrow]*x[7*idx[jrow]+5];
1204: sum7 += v[jrow]*x[7*idx[jrow]+6];
1205: jrow++;
1206: }
1207: y[7*i] += sum1;
1208: y[7*i+1] += sum2;
1209: y[7*i+2] += sum3;
1210: y[7*i+3] += sum4;
1211: y[7*i+4] += sum5;
1212: y[7*i+5] += sum6;
1213: y[7*i+6] += sum7;
1214: }
1216: PetscLogFlops(14.0*a->nz);
1217: VecRestoreArrayRead(xx,&x);
1218: VecRestoreArray(zz,&y);
1219: return(0);
1220: }
1224: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1225: {
1226: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1227: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1228: const PetscScalar *x,*v;
1229: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1230: PetscErrorCode ierr;
1231: const PetscInt m = b->AIJ->rmap->n,*idx;
1232: PetscInt n,i;
1235: if (yy != zz) {VecCopy(yy,zz);}
1236: VecGetArrayRead(xx,&x);
1237: VecGetArray(zz,&y);
1238: for (i=0; i<m; i++) {
1239: idx = a->j + a->i[i];
1240: v = a->a + a->i[i];
1241: n = a->i[i+1] - a->i[i];
1242: alpha1 = x[7*i];
1243: alpha2 = x[7*i+1];
1244: alpha3 = x[7*i+2];
1245: alpha4 = x[7*i+3];
1246: alpha5 = x[7*i+4];
1247: alpha6 = x[7*i+5];
1248: alpha7 = x[7*i+6];
1249: while (n-->0) {
1250: y[7*(*idx)] += alpha1*(*v);
1251: y[7*(*idx)+1] += alpha2*(*v);
1252: y[7*(*idx)+2] += alpha3*(*v);
1253: y[7*(*idx)+3] += alpha4*(*v);
1254: y[7*(*idx)+4] += alpha5*(*v);
1255: y[7*(*idx)+5] += alpha6*(*v);
1256: y[7*(*idx)+6] += alpha7*(*v);
1257: idx++; v++;
1258: }
1259: }
1260: PetscLogFlops(14.0*a->nz);
1261: VecRestoreArrayRead(xx,&x);
1262: VecRestoreArray(zz,&y);
1263: return(0);
1264: }
1268: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1269: {
1270: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1271: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1272: const PetscScalar *x,*v;
1273: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1274: PetscErrorCode ierr;
1275: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1276: PetscInt nonzerorow=0,n,i,jrow,j;
1279: VecGetArrayRead(xx,&x);
1280: VecGetArray(yy,&y);
1281: idx = a->j;
1282: v = a->a;
1283: ii = a->i;
1285: for (i=0; i<m; i++) {
1286: jrow = ii[i];
1287: n = ii[i+1] - jrow;
1288: sum1 = 0.0;
1289: sum2 = 0.0;
1290: sum3 = 0.0;
1291: sum4 = 0.0;
1292: sum5 = 0.0;
1293: sum6 = 0.0;
1294: sum7 = 0.0;
1295: sum8 = 0.0;
1297: nonzerorow += (n>0);
1298: for (j=0; j<n; j++) {
1299: sum1 += v[jrow]*x[8*idx[jrow]];
1300: sum2 += v[jrow]*x[8*idx[jrow]+1];
1301: sum3 += v[jrow]*x[8*idx[jrow]+2];
1302: sum4 += v[jrow]*x[8*idx[jrow]+3];
1303: sum5 += v[jrow]*x[8*idx[jrow]+4];
1304: sum6 += v[jrow]*x[8*idx[jrow]+5];
1305: sum7 += v[jrow]*x[8*idx[jrow]+6];
1306: sum8 += v[jrow]*x[8*idx[jrow]+7];
1307: jrow++;
1308: }
1309: y[8*i] = sum1;
1310: y[8*i+1] = sum2;
1311: y[8*i+2] = sum3;
1312: y[8*i+3] = sum4;
1313: y[8*i+4] = sum5;
1314: y[8*i+5] = sum6;
1315: y[8*i+6] = sum7;
1316: y[8*i+7] = sum8;
1317: }
1319: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1320: VecRestoreArrayRead(xx,&x);
1321: VecRestoreArray(yy,&y);
1322: return(0);
1323: }
1327: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1328: {
1329: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1330: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1331: const PetscScalar *x,*v;
1332: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1333: PetscErrorCode ierr;
1334: const PetscInt m = b->AIJ->rmap->n,*idx;
1335: PetscInt n,i;
1338: VecSet(yy,0.0);
1339: VecGetArrayRead(xx,&x);
1340: VecGetArray(yy,&y);
1342: for (i=0; i<m; i++) {
1343: idx = a->j + a->i[i];
1344: v = a->a + a->i[i];
1345: n = a->i[i+1] - a->i[i];
1346: alpha1 = x[8*i];
1347: alpha2 = x[8*i+1];
1348: alpha3 = x[8*i+2];
1349: alpha4 = x[8*i+3];
1350: alpha5 = x[8*i+4];
1351: alpha6 = x[8*i+5];
1352: alpha7 = x[8*i+6];
1353: alpha8 = x[8*i+7];
1354: while (n-->0) {
1355: y[8*(*idx)] += alpha1*(*v);
1356: y[8*(*idx)+1] += alpha2*(*v);
1357: y[8*(*idx)+2] += alpha3*(*v);
1358: y[8*(*idx)+3] += alpha4*(*v);
1359: y[8*(*idx)+4] += alpha5*(*v);
1360: y[8*(*idx)+5] += alpha6*(*v);
1361: y[8*(*idx)+6] += alpha7*(*v);
1362: y[8*(*idx)+7] += alpha8*(*v);
1363: idx++; v++;
1364: }
1365: }
1366: PetscLogFlops(16.0*a->nz);
1367: VecRestoreArrayRead(xx,&x);
1368: VecRestoreArray(yy,&y);
1369: return(0);
1370: }
1374: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1375: {
1376: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1377: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1378: const PetscScalar *x,*v;
1379: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1380: PetscErrorCode ierr;
1381: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1382: PetscInt n,i,jrow,j;
1385: if (yy != zz) {VecCopy(yy,zz);}
1386: VecGetArrayRead(xx,&x);
1387: VecGetArray(zz,&y);
1388: idx = a->j;
1389: v = a->a;
1390: ii = a->i;
1392: for (i=0; i<m; i++) {
1393: jrow = ii[i];
1394: n = ii[i+1] - jrow;
1395: sum1 = 0.0;
1396: sum2 = 0.0;
1397: sum3 = 0.0;
1398: sum4 = 0.0;
1399: sum5 = 0.0;
1400: sum6 = 0.0;
1401: sum7 = 0.0;
1402: sum8 = 0.0;
1403: for (j=0; j<n; j++) {
1404: sum1 += v[jrow]*x[8*idx[jrow]];
1405: sum2 += v[jrow]*x[8*idx[jrow]+1];
1406: sum3 += v[jrow]*x[8*idx[jrow]+2];
1407: sum4 += v[jrow]*x[8*idx[jrow]+3];
1408: sum5 += v[jrow]*x[8*idx[jrow]+4];
1409: sum6 += v[jrow]*x[8*idx[jrow]+5];
1410: sum7 += v[jrow]*x[8*idx[jrow]+6];
1411: sum8 += v[jrow]*x[8*idx[jrow]+7];
1412: jrow++;
1413: }
1414: y[8*i] += sum1;
1415: y[8*i+1] += sum2;
1416: y[8*i+2] += sum3;
1417: y[8*i+3] += sum4;
1418: y[8*i+4] += sum5;
1419: y[8*i+5] += sum6;
1420: y[8*i+6] += sum7;
1421: y[8*i+7] += sum8;
1422: }
1424: PetscLogFlops(16.0*a->nz);
1425: VecRestoreArrayRead(xx,&x);
1426: VecRestoreArray(zz,&y);
1427: return(0);
1428: }
1432: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1433: {
1434: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1435: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1436: const PetscScalar *x,*v;
1437: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1438: PetscErrorCode ierr;
1439: const PetscInt m = b->AIJ->rmap->n,*idx;
1440: PetscInt n,i;
1443: if (yy != zz) {VecCopy(yy,zz);}
1444: VecGetArrayRead(xx,&x);
1445: VecGetArray(zz,&y);
1446: for (i=0; i<m; i++) {
1447: idx = a->j + a->i[i];
1448: v = a->a + a->i[i];
1449: n = a->i[i+1] - a->i[i];
1450: alpha1 = x[8*i];
1451: alpha2 = x[8*i+1];
1452: alpha3 = x[8*i+2];
1453: alpha4 = x[8*i+3];
1454: alpha5 = x[8*i+4];
1455: alpha6 = x[8*i+5];
1456: alpha7 = x[8*i+6];
1457: alpha8 = x[8*i+7];
1458: while (n-->0) {
1459: y[8*(*idx)] += alpha1*(*v);
1460: y[8*(*idx)+1] += alpha2*(*v);
1461: y[8*(*idx)+2] += alpha3*(*v);
1462: y[8*(*idx)+3] += alpha4*(*v);
1463: y[8*(*idx)+4] += alpha5*(*v);
1464: y[8*(*idx)+5] += alpha6*(*v);
1465: y[8*(*idx)+6] += alpha7*(*v);
1466: y[8*(*idx)+7] += alpha8*(*v);
1467: idx++; v++;
1468: }
1469: }
1470: PetscLogFlops(16.0*a->nz);
1471: VecRestoreArrayRead(xx,&x);
1472: VecRestoreArray(zz,&y);
1473: return(0);
1474: }
1476: /* ------------------------------------------------------------------------------*/
1479: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1480: {
1481: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1482: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1483: const PetscScalar *x,*v;
1484: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1485: PetscErrorCode ierr;
1486: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1487: PetscInt nonzerorow=0,n,i,jrow,j;
1490: VecGetArrayRead(xx,&x);
1491: VecGetArray(yy,&y);
1492: idx = a->j;
1493: v = a->a;
1494: ii = a->i;
1496: for (i=0; i<m; i++) {
1497: jrow = ii[i];
1498: n = ii[i+1] - jrow;
1499: sum1 = 0.0;
1500: sum2 = 0.0;
1501: sum3 = 0.0;
1502: sum4 = 0.0;
1503: sum5 = 0.0;
1504: sum6 = 0.0;
1505: sum7 = 0.0;
1506: sum8 = 0.0;
1507: sum9 = 0.0;
1509: nonzerorow += (n>0);
1510: for (j=0; j<n; j++) {
1511: sum1 += v[jrow]*x[9*idx[jrow]];
1512: sum2 += v[jrow]*x[9*idx[jrow]+1];
1513: sum3 += v[jrow]*x[9*idx[jrow]+2];
1514: sum4 += v[jrow]*x[9*idx[jrow]+3];
1515: sum5 += v[jrow]*x[9*idx[jrow]+4];
1516: sum6 += v[jrow]*x[9*idx[jrow]+5];
1517: sum7 += v[jrow]*x[9*idx[jrow]+6];
1518: sum8 += v[jrow]*x[9*idx[jrow]+7];
1519: sum9 += v[jrow]*x[9*idx[jrow]+8];
1520: jrow++;
1521: }
1522: y[9*i] = sum1;
1523: y[9*i+1] = sum2;
1524: y[9*i+2] = sum3;
1525: y[9*i+3] = sum4;
1526: y[9*i+4] = sum5;
1527: y[9*i+5] = sum6;
1528: y[9*i+6] = sum7;
1529: y[9*i+7] = sum8;
1530: y[9*i+8] = sum9;
1531: }
1533: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1534: VecRestoreArrayRead(xx,&x);
1535: VecRestoreArray(yy,&y);
1536: return(0);
1537: }
1539: /* ------------------------------------------------------------------------------*/
1543: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1544: {
1545: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1546: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1547: const PetscScalar *x,*v;
1548: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549: PetscErrorCode ierr;
1550: const PetscInt m = b->AIJ->rmap->n,*idx;
1551: PetscInt n,i;
1554: VecSet(yy,0.0);
1555: VecGetArrayRead(xx,&x);
1556: VecGetArray(yy,&y);
1558: for (i=0; i<m; i++) {
1559: idx = a->j + a->i[i];
1560: v = a->a + a->i[i];
1561: n = a->i[i+1] - a->i[i];
1562: alpha1 = x[9*i];
1563: alpha2 = x[9*i+1];
1564: alpha3 = x[9*i+2];
1565: alpha4 = x[9*i+3];
1566: alpha5 = x[9*i+4];
1567: alpha6 = x[9*i+5];
1568: alpha7 = x[9*i+6];
1569: alpha8 = x[9*i+7];
1570: alpha9 = x[9*i+8];
1571: while (n-->0) {
1572: y[9*(*idx)] += alpha1*(*v);
1573: y[9*(*idx)+1] += alpha2*(*v);
1574: y[9*(*idx)+2] += alpha3*(*v);
1575: y[9*(*idx)+3] += alpha4*(*v);
1576: y[9*(*idx)+4] += alpha5*(*v);
1577: y[9*(*idx)+5] += alpha6*(*v);
1578: y[9*(*idx)+6] += alpha7*(*v);
1579: y[9*(*idx)+7] += alpha8*(*v);
1580: y[9*(*idx)+8] += alpha9*(*v);
1581: idx++; v++;
1582: }
1583: }
1584: PetscLogFlops(18.0*a->nz);
1585: VecRestoreArrayRead(xx,&x);
1586: VecRestoreArray(yy,&y);
1587: return(0);
1588: }
1592: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1593: {
1594: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1595: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1596: const PetscScalar *x,*v;
1597: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1598: PetscErrorCode ierr;
1599: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1600: PetscInt n,i,jrow,j;
1603: if (yy != zz) {VecCopy(yy,zz);}
1604: VecGetArrayRead(xx,&x);
1605: VecGetArray(zz,&y);
1606: idx = a->j;
1607: v = a->a;
1608: ii = a->i;
1610: for (i=0; i<m; i++) {
1611: jrow = ii[i];
1612: n = ii[i+1] - jrow;
1613: sum1 = 0.0;
1614: sum2 = 0.0;
1615: sum3 = 0.0;
1616: sum4 = 0.0;
1617: sum5 = 0.0;
1618: sum6 = 0.0;
1619: sum7 = 0.0;
1620: sum8 = 0.0;
1621: sum9 = 0.0;
1622: for (j=0; j<n; j++) {
1623: sum1 += v[jrow]*x[9*idx[jrow]];
1624: sum2 += v[jrow]*x[9*idx[jrow]+1];
1625: sum3 += v[jrow]*x[9*idx[jrow]+2];
1626: sum4 += v[jrow]*x[9*idx[jrow]+3];
1627: sum5 += v[jrow]*x[9*idx[jrow]+4];
1628: sum6 += v[jrow]*x[9*idx[jrow]+5];
1629: sum7 += v[jrow]*x[9*idx[jrow]+6];
1630: sum8 += v[jrow]*x[9*idx[jrow]+7];
1631: sum9 += v[jrow]*x[9*idx[jrow]+8];
1632: jrow++;
1633: }
1634: y[9*i] += sum1;
1635: y[9*i+1] += sum2;
1636: y[9*i+2] += sum3;
1637: y[9*i+3] += sum4;
1638: y[9*i+4] += sum5;
1639: y[9*i+5] += sum6;
1640: y[9*i+6] += sum7;
1641: y[9*i+7] += sum8;
1642: y[9*i+8] += sum9;
1643: }
1645: PetscLogFlops(18*a->nz);
1646: VecRestoreArrayRead(xx,&x);
1647: VecRestoreArray(zz,&y);
1648: return(0);
1649: }
1653: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1654: {
1655: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1656: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1657: const PetscScalar *x,*v;
1658: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1659: PetscErrorCode ierr;
1660: const PetscInt m = b->AIJ->rmap->n,*idx;
1661: PetscInt n,i;
1664: if (yy != zz) {VecCopy(yy,zz);}
1665: VecGetArrayRead(xx,&x);
1666: VecGetArray(zz,&y);
1667: for (i=0; i<m; i++) {
1668: idx = a->j + a->i[i];
1669: v = a->a + a->i[i];
1670: n = a->i[i+1] - a->i[i];
1671: alpha1 = x[9*i];
1672: alpha2 = x[9*i+1];
1673: alpha3 = x[9*i+2];
1674: alpha4 = x[9*i+3];
1675: alpha5 = x[9*i+4];
1676: alpha6 = x[9*i+5];
1677: alpha7 = x[9*i+6];
1678: alpha8 = x[9*i+7];
1679: alpha9 = x[9*i+8];
1680: while (n-->0) {
1681: y[9*(*idx)] += alpha1*(*v);
1682: y[9*(*idx)+1] += alpha2*(*v);
1683: y[9*(*idx)+2] += alpha3*(*v);
1684: y[9*(*idx)+3] += alpha4*(*v);
1685: y[9*(*idx)+4] += alpha5*(*v);
1686: y[9*(*idx)+5] += alpha6*(*v);
1687: y[9*(*idx)+6] += alpha7*(*v);
1688: y[9*(*idx)+7] += alpha8*(*v);
1689: y[9*(*idx)+8] += alpha9*(*v);
1690: idx++; v++;
1691: }
1692: }
1693: PetscLogFlops(18.0*a->nz);
1694: VecRestoreArrayRead(xx,&x);
1695: VecRestoreArray(zz,&y);
1696: return(0);
1697: }
1700: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1701: {
1702: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1703: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1704: const PetscScalar *x,*v;
1705: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1706: PetscErrorCode ierr;
1707: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1708: PetscInt nonzerorow=0,n,i,jrow,j;
1711: VecGetArrayRead(xx,&x);
1712: VecGetArray(yy,&y);
1713: idx = a->j;
1714: v = a->a;
1715: ii = a->i;
1717: for (i=0; i<m; i++) {
1718: jrow = ii[i];
1719: n = ii[i+1] - jrow;
1720: sum1 = 0.0;
1721: sum2 = 0.0;
1722: sum3 = 0.0;
1723: sum4 = 0.0;
1724: sum5 = 0.0;
1725: sum6 = 0.0;
1726: sum7 = 0.0;
1727: sum8 = 0.0;
1728: sum9 = 0.0;
1729: sum10 = 0.0;
1731: nonzerorow += (n>0);
1732: for (j=0; j<n; j++) {
1733: sum1 += v[jrow]*x[10*idx[jrow]];
1734: sum2 += v[jrow]*x[10*idx[jrow]+1];
1735: sum3 += v[jrow]*x[10*idx[jrow]+2];
1736: sum4 += v[jrow]*x[10*idx[jrow]+3];
1737: sum5 += v[jrow]*x[10*idx[jrow]+4];
1738: sum6 += v[jrow]*x[10*idx[jrow]+5];
1739: sum7 += v[jrow]*x[10*idx[jrow]+6];
1740: sum8 += v[jrow]*x[10*idx[jrow]+7];
1741: sum9 += v[jrow]*x[10*idx[jrow]+8];
1742: sum10 += v[jrow]*x[10*idx[jrow]+9];
1743: jrow++;
1744: }
1745: y[10*i] = sum1;
1746: y[10*i+1] = sum2;
1747: y[10*i+2] = sum3;
1748: y[10*i+3] = sum4;
1749: y[10*i+4] = sum5;
1750: y[10*i+5] = sum6;
1751: y[10*i+6] = sum7;
1752: y[10*i+7] = sum8;
1753: y[10*i+8] = sum9;
1754: y[10*i+9] = sum10;
1755: }
1757: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1758: VecRestoreArrayRead(xx,&x);
1759: VecRestoreArray(yy,&y);
1760: return(0);
1761: }
1765: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1766: {
1767: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1768: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1769: const PetscScalar *x,*v;
1770: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1771: PetscErrorCode ierr;
1772: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1773: PetscInt n,i,jrow,j;
1776: if (yy != zz) {VecCopy(yy,zz);}
1777: VecGetArrayRead(xx,&x);
1778: VecGetArray(zz,&y);
1779: idx = a->j;
1780: v = a->a;
1781: ii = a->i;
1783: for (i=0; i<m; i++) {
1784: jrow = ii[i];
1785: n = ii[i+1] - jrow;
1786: sum1 = 0.0;
1787: sum2 = 0.0;
1788: sum3 = 0.0;
1789: sum4 = 0.0;
1790: sum5 = 0.0;
1791: sum6 = 0.0;
1792: sum7 = 0.0;
1793: sum8 = 0.0;
1794: sum9 = 0.0;
1795: sum10 = 0.0;
1796: for (j=0; j<n; j++) {
1797: sum1 += v[jrow]*x[10*idx[jrow]];
1798: sum2 += v[jrow]*x[10*idx[jrow]+1];
1799: sum3 += v[jrow]*x[10*idx[jrow]+2];
1800: sum4 += v[jrow]*x[10*idx[jrow]+3];
1801: sum5 += v[jrow]*x[10*idx[jrow]+4];
1802: sum6 += v[jrow]*x[10*idx[jrow]+5];
1803: sum7 += v[jrow]*x[10*idx[jrow]+6];
1804: sum8 += v[jrow]*x[10*idx[jrow]+7];
1805: sum9 += v[jrow]*x[10*idx[jrow]+8];
1806: sum10 += v[jrow]*x[10*idx[jrow]+9];
1807: jrow++;
1808: }
1809: y[10*i] += sum1;
1810: y[10*i+1] += sum2;
1811: y[10*i+2] += sum3;
1812: y[10*i+3] += sum4;
1813: y[10*i+4] += sum5;
1814: y[10*i+5] += sum6;
1815: y[10*i+6] += sum7;
1816: y[10*i+7] += sum8;
1817: y[10*i+8] += sum9;
1818: y[10*i+9] += sum10;
1819: }
1821: PetscLogFlops(20.0*a->nz);
1822: VecRestoreArrayRead(xx,&x);
1823: VecRestoreArray(yy,&y);
1824: return(0);
1825: }
1829: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1830: {
1831: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1832: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1833: const PetscScalar *x,*v;
1834: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1835: PetscErrorCode ierr;
1836: const PetscInt m = b->AIJ->rmap->n,*idx;
1837: PetscInt n,i;
1840: VecSet(yy,0.0);
1841: VecGetArrayRead(xx,&x);
1842: VecGetArray(yy,&y);
1844: for (i=0; i<m; i++) {
1845: idx = a->j + a->i[i];
1846: v = a->a + a->i[i];
1847: n = a->i[i+1] - a->i[i];
1848: alpha1 = x[10*i];
1849: alpha2 = x[10*i+1];
1850: alpha3 = x[10*i+2];
1851: alpha4 = x[10*i+3];
1852: alpha5 = x[10*i+4];
1853: alpha6 = x[10*i+5];
1854: alpha7 = x[10*i+6];
1855: alpha8 = x[10*i+7];
1856: alpha9 = x[10*i+8];
1857: alpha10 = x[10*i+9];
1858: while (n-->0) {
1859: y[10*(*idx)] += alpha1*(*v);
1860: y[10*(*idx)+1] += alpha2*(*v);
1861: y[10*(*idx)+2] += alpha3*(*v);
1862: y[10*(*idx)+3] += alpha4*(*v);
1863: y[10*(*idx)+4] += alpha5*(*v);
1864: y[10*(*idx)+5] += alpha6*(*v);
1865: y[10*(*idx)+6] += alpha7*(*v);
1866: y[10*(*idx)+7] += alpha8*(*v);
1867: y[10*(*idx)+8] += alpha9*(*v);
1868: y[10*(*idx)+9] += alpha10*(*v);
1869: idx++; v++;
1870: }
1871: }
1872: PetscLogFlops(20.0*a->nz);
1873: VecRestoreArrayRead(xx,&x);
1874: VecRestoreArray(yy,&y);
1875: return(0);
1876: }
1880: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1881: {
1882: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1883: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1884: const PetscScalar *x,*v;
1885: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1886: PetscErrorCode ierr;
1887: const PetscInt m = b->AIJ->rmap->n,*idx;
1888: PetscInt n,i;
1891: if (yy != zz) {VecCopy(yy,zz);}
1892: VecGetArrayRead(xx,&x);
1893: VecGetArray(zz,&y);
1894: for (i=0; i<m; i++) {
1895: idx = a->j + a->i[i];
1896: v = a->a + a->i[i];
1897: n = a->i[i+1] - a->i[i];
1898: alpha1 = x[10*i];
1899: alpha2 = x[10*i+1];
1900: alpha3 = x[10*i+2];
1901: alpha4 = x[10*i+3];
1902: alpha5 = x[10*i+4];
1903: alpha6 = x[10*i+5];
1904: alpha7 = x[10*i+6];
1905: alpha8 = x[10*i+7];
1906: alpha9 = x[10*i+8];
1907: alpha10 = x[10*i+9];
1908: while (n-->0) {
1909: y[10*(*idx)] += alpha1*(*v);
1910: y[10*(*idx)+1] += alpha2*(*v);
1911: y[10*(*idx)+2] += alpha3*(*v);
1912: y[10*(*idx)+3] += alpha4*(*v);
1913: y[10*(*idx)+4] += alpha5*(*v);
1914: y[10*(*idx)+5] += alpha6*(*v);
1915: y[10*(*idx)+6] += alpha7*(*v);
1916: y[10*(*idx)+7] += alpha8*(*v);
1917: y[10*(*idx)+8] += alpha9*(*v);
1918: y[10*(*idx)+9] += alpha10*(*v);
1919: idx++; v++;
1920: }
1921: }
1922: PetscLogFlops(20.0*a->nz);
1923: VecRestoreArrayRead(xx,&x);
1924: VecRestoreArray(zz,&y);
1925: return(0);
1926: }
1929: /*--------------------------------------------------------------------------------------------*/
1932: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1933: {
1934: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1935: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1936: const PetscScalar *x,*v;
1937: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1938: PetscErrorCode ierr;
1939: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1940: PetscInt nonzerorow=0,n,i,jrow,j;
1943: VecGetArrayRead(xx,&x);
1944: VecGetArray(yy,&y);
1945: idx = a->j;
1946: v = a->a;
1947: ii = a->i;
1949: for (i=0; i<m; i++) {
1950: jrow = ii[i];
1951: n = ii[i+1] - jrow;
1952: sum1 = 0.0;
1953: sum2 = 0.0;
1954: sum3 = 0.0;
1955: sum4 = 0.0;
1956: sum5 = 0.0;
1957: sum6 = 0.0;
1958: sum7 = 0.0;
1959: sum8 = 0.0;
1960: sum9 = 0.0;
1961: sum10 = 0.0;
1962: sum11 = 0.0;
1964: nonzerorow += (n>0);
1965: for (j=0; j<n; j++) {
1966: sum1 += v[jrow]*x[11*idx[jrow]];
1967: sum2 += v[jrow]*x[11*idx[jrow]+1];
1968: sum3 += v[jrow]*x[11*idx[jrow]+2];
1969: sum4 += v[jrow]*x[11*idx[jrow]+3];
1970: sum5 += v[jrow]*x[11*idx[jrow]+4];
1971: sum6 += v[jrow]*x[11*idx[jrow]+5];
1972: sum7 += v[jrow]*x[11*idx[jrow]+6];
1973: sum8 += v[jrow]*x[11*idx[jrow]+7];
1974: sum9 += v[jrow]*x[11*idx[jrow]+8];
1975: sum10 += v[jrow]*x[11*idx[jrow]+9];
1976: sum11 += v[jrow]*x[11*idx[jrow]+10];
1977: jrow++;
1978: }
1979: y[11*i] = sum1;
1980: y[11*i+1] = sum2;
1981: y[11*i+2] = sum3;
1982: y[11*i+3] = sum4;
1983: y[11*i+4] = sum5;
1984: y[11*i+5] = sum6;
1985: y[11*i+6] = sum7;
1986: y[11*i+7] = sum8;
1987: y[11*i+8] = sum9;
1988: y[11*i+9] = sum10;
1989: y[11*i+10] = sum11;
1990: }
1992: PetscLogFlops(22*a->nz - 11*nonzerorow);
1993: VecRestoreArrayRead(xx,&x);
1994: VecRestoreArray(yy,&y);
1995: return(0);
1996: }
2000: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2001: {
2002: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2003: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2004: const PetscScalar *x,*v;
2005: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
2006: PetscErrorCode ierr;
2007: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2008: PetscInt n,i,jrow,j;
2011: if (yy != zz) {VecCopy(yy,zz);}
2012: VecGetArrayRead(xx,&x);
2013: VecGetArray(zz,&y);
2014: idx = a->j;
2015: v = a->a;
2016: ii = a->i;
2018: for (i=0; i<m; i++) {
2019: jrow = ii[i];
2020: n = ii[i+1] - jrow;
2021: sum1 = 0.0;
2022: sum2 = 0.0;
2023: sum3 = 0.0;
2024: sum4 = 0.0;
2025: sum5 = 0.0;
2026: sum6 = 0.0;
2027: sum7 = 0.0;
2028: sum8 = 0.0;
2029: sum9 = 0.0;
2030: sum10 = 0.0;
2031: sum11 = 0.0;
2032: for (j=0; j<n; j++) {
2033: sum1 += v[jrow]*x[11*idx[jrow]];
2034: sum2 += v[jrow]*x[11*idx[jrow]+1];
2035: sum3 += v[jrow]*x[11*idx[jrow]+2];
2036: sum4 += v[jrow]*x[11*idx[jrow]+3];
2037: sum5 += v[jrow]*x[11*idx[jrow]+4];
2038: sum6 += v[jrow]*x[11*idx[jrow]+5];
2039: sum7 += v[jrow]*x[11*idx[jrow]+6];
2040: sum8 += v[jrow]*x[11*idx[jrow]+7];
2041: sum9 += v[jrow]*x[11*idx[jrow]+8];
2042: sum10 += v[jrow]*x[11*idx[jrow]+9];
2043: sum11 += v[jrow]*x[11*idx[jrow]+10];
2044: jrow++;
2045: }
2046: y[11*i] += sum1;
2047: y[11*i+1] += sum2;
2048: y[11*i+2] += sum3;
2049: y[11*i+3] += sum4;
2050: y[11*i+4] += sum5;
2051: y[11*i+5] += sum6;
2052: y[11*i+6] += sum7;
2053: y[11*i+7] += sum8;
2054: y[11*i+8] += sum9;
2055: y[11*i+9] += sum10;
2056: y[11*i+10] += sum11;
2057: }
2059: PetscLogFlops(22*a->nz);
2060: VecRestoreArrayRead(xx,&x);
2061: VecRestoreArray(yy,&y);
2062: return(0);
2063: }
2067: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2068: {
2069: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2070: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2071: const PetscScalar *x,*v;
2072: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2073: PetscErrorCode ierr;
2074: const PetscInt m = b->AIJ->rmap->n,*idx;
2075: PetscInt n,i;
2078: VecSet(yy,0.0);
2079: VecGetArrayRead(xx,&x);
2080: VecGetArray(yy,&y);
2082: for (i=0; i<m; i++) {
2083: idx = a->j + a->i[i];
2084: v = a->a + a->i[i];
2085: n = a->i[i+1] - a->i[i];
2086: alpha1 = x[11*i];
2087: alpha2 = x[11*i+1];
2088: alpha3 = x[11*i+2];
2089: alpha4 = x[11*i+3];
2090: alpha5 = x[11*i+4];
2091: alpha6 = x[11*i+5];
2092: alpha7 = x[11*i+6];
2093: alpha8 = x[11*i+7];
2094: alpha9 = x[11*i+8];
2095: alpha10 = x[11*i+9];
2096: alpha11 = x[11*i+10];
2097: while (n-->0) {
2098: y[11*(*idx)] += alpha1*(*v);
2099: y[11*(*idx)+1] += alpha2*(*v);
2100: y[11*(*idx)+2] += alpha3*(*v);
2101: y[11*(*idx)+3] += alpha4*(*v);
2102: y[11*(*idx)+4] += alpha5*(*v);
2103: y[11*(*idx)+5] += alpha6*(*v);
2104: y[11*(*idx)+6] += alpha7*(*v);
2105: y[11*(*idx)+7] += alpha8*(*v);
2106: y[11*(*idx)+8] += alpha9*(*v);
2107: y[11*(*idx)+9] += alpha10*(*v);
2108: y[11*(*idx)+10] += alpha11*(*v);
2109: idx++; v++;
2110: }
2111: }
2112: PetscLogFlops(22*a->nz);
2113: VecRestoreArrayRead(xx,&x);
2114: VecRestoreArray(yy,&y);
2115: return(0);
2116: }
2120: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2121: {
2122: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2123: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2124: const PetscScalar *x,*v;
2125: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2126: PetscErrorCode ierr;
2127: const PetscInt m = b->AIJ->rmap->n,*idx;
2128: PetscInt n,i;
2131: if (yy != zz) {VecCopy(yy,zz);}
2132: VecGetArrayRead(xx,&x);
2133: VecGetArray(zz,&y);
2134: for (i=0; i<m; i++) {
2135: idx = a->j + a->i[i];
2136: v = a->a + a->i[i];
2137: n = a->i[i+1] - a->i[i];
2138: alpha1 = x[11*i];
2139: alpha2 = x[11*i+1];
2140: alpha3 = x[11*i+2];
2141: alpha4 = x[11*i+3];
2142: alpha5 = x[11*i+4];
2143: alpha6 = x[11*i+5];
2144: alpha7 = x[11*i+6];
2145: alpha8 = x[11*i+7];
2146: alpha9 = x[11*i+8];
2147: alpha10 = x[11*i+9];
2148: alpha11 = x[11*i+10];
2149: while (n-->0) {
2150: y[11*(*idx)] += alpha1*(*v);
2151: y[11*(*idx)+1] += alpha2*(*v);
2152: y[11*(*idx)+2] += alpha3*(*v);
2153: y[11*(*idx)+3] += alpha4*(*v);
2154: y[11*(*idx)+4] += alpha5*(*v);
2155: y[11*(*idx)+5] += alpha6*(*v);
2156: y[11*(*idx)+6] += alpha7*(*v);
2157: y[11*(*idx)+7] += alpha8*(*v);
2158: y[11*(*idx)+8] += alpha9*(*v);
2159: y[11*(*idx)+9] += alpha10*(*v);
2160: y[11*(*idx)+10] += alpha11*(*v);
2161: idx++; v++;
2162: }
2163: }
2164: PetscLogFlops(22*a->nz);
2165: VecRestoreArrayRead(xx,&x);
2166: VecRestoreArray(zz,&y);
2167: return(0);
2168: }
2171: /*--------------------------------------------------------------------------------------------*/
2174: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2175: {
2176: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2177: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2178: const PetscScalar *x,*v;
2179: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2180: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2181: PetscErrorCode ierr;
2182: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2183: PetscInt nonzerorow=0,n,i,jrow,j;
2186: VecGetArrayRead(xx,&x);
2187: VecGetArray(yy,&y);
2188: idx = a->j;
2189: v = a->a;
2190: ii = a->i;
2192: for (i=0; i<m; i++) {
2193: jrow = ii[i];
2194: n = ii[i+1] - jrow;
2195: sum1 = 0.0;
2196: sum2 = 0.0;
2197: sum3 = 0.0;
2198: sum4 = 0.0;
2199: sum5 = 0.0;
2200: sum6 = 0.0;
2201: sum7 = 0.0;
2202: sum8 = 0.0;
2203: sum9 = 0.0;
2204: sum10 = 0.0;
2205: sum11 = 0.0;
2206: sum12 = 0.0;
2207: sum13 = 0.0;
2208: sum14 = 0.0;
2209: sum15 = 0.0;
2210: sum16 = 0.0;
2212: nonzerorow += (n>0);
2213: for (j=0; j<n; j++) {
2214: sum1 += v[jrow]*x[16*idx[jrow]];
2215: sum2 += v[jrow]*x[16*idx[jrow]+1];
2216: sum3 += v[jrow]*x[16*idx[jrow]+2];
2217: sum4 += v[jrow]*x[16*idx[jrow]+3];
2218: sum5 += v[jrow]*x[16*idx[jrow]+4];
2219: sum6 += v[jrow]*x[16*idx[jrow]+5];
2220: sum7 += v[jrow]*x[16*idx[jrow]+6];
2221: sum8 += v[jrow]*x[16*idx[jrow]+7];
2222: sum9 += v[jrow]*x[16*idx[jrow]+8];
2223: sum10 += v[jrow]*x[16*idx[jrow]+9];
2224: sum11 += v[jrow]*x[16*idx[jrow]+10];
2225: sum12 += v[jrow]*x[16*idx[jrow]+11];
2226: sum13 += v[jrow]*x[16*idx[jrow]+12];
2227: sum14 += v[jrow]*x[16*idx[jrow]+13];
2228: sum15 += v[jrow]*x[16*idx[jrow]+14];
2229: sum16 += v[jrow]*x[16*idx[jrow]+15];
2230: jrow++;
2231: }
2232: y[16*i] = sum1;
2233: y[16*i+1] = sum2;
2234: y[16*i+2] = sum3;
2235: y[16*i+3] = sum4;
2236: y[16*i+4] = sum5;
2237: y[16*i+5] = sum6;
2238: y[16*i+6] = sum7;
2239: y[16*i+7] = sum8;
2240: y[16*i+8] = sum9;
2241: y[16*i+9] = sum10;
2242: y[16*i+10] = sum11;
2243: y[16*i+11] = sum12;
2244: y[16*i+12] = sum13;
2245: y[16*i+13] = sum14;
2246: y[16*i+14] = sum15;
2247: y[16*i+15] = sum16;
2248: }
2250: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2251: VecRestoreArrayRead(xx,&x);
2252: VecRestoreArray(yy,&y);
2253: return(0);
2254: }
2258: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2259: {
2260: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2261: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2262: const PetscScalar *x,*v;
2263: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2264: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2265: PetscErrorCode ierr;
2266: const PetscInt m = b->AIJ->rmap->n,*idx;
2267: PetscInt n,i;
2270: VecSet(yy,0.0);
2271: VecGetArrayRead(xx,&x);
2272: VecGetArray(yy,&y);
2274: for (i=0; i<m; i++) {
2275: idx = a->j + a->i[i];
2276: v = a->a + a->i[i];
2277: n = a->i[i+1] - a->i[i];
2278: alpha1 = x[16*i];
2279: alpha2 = x[16*i+1];
2280: alpha3 = x[16*i+2];
2281: alpha4 = x[16*i+3];
2282: alpha5 = x[16*i+4];
2283: alpha6 = x[16*i+5];
2284: alpha7 = x[16*i+6];
2285: alpha8 = x[16*i+7];
2286: alpha9 = x[16*i+8];
2287: alpha10 = x[16*i+9];
2288: alpha11 = x[16*i+10];
2289: alpha12 = x[16*i+11];
2290: alpha13 = x[16*i+12];
2291: alpha14 = x[16*i+13];
2292: alpha15 = x[16*i+14];
2293: alpha16 = x[16*i+15];
2294: while (n-->0) {
2295: y[16*(*idx)] += alpha1*(*v);
2296: y[16*(*idx)+1] += alpha2*(*v);
2297: y[16*(*idx)+2] += alpha3*(*v);
2298: y[16*(*idx)+3] += alpha4*(*v);
2299: y[16*(*idx)+4] += alpha5*(*v);
2300: y[16*(*idx)+5] += alpha6*(*v);
2301: y[16*(*idx)+6] += alpha7*(*v);
2302: y[16*(*idx)+7] += alpha8*(*v);
2303: y[16*(*idx)+8] += alpha9*(*v);
2304: y[16*(*idx)+9] += alpha10*(*v);
2305: y[16*(*idx)+10] += alpha11*(*v);
2306: y[16*(*idx)+11] += alpha12*(*v);
2307: y[16*(*idx)+12] += alpha13*(*v);
2308: y[16*(*idx)+13] += alpha14*(*v);
2309: y[16*(*idx)+14] += alpha15*(*v);
2310: y[16*(*idx)+15] += alpha16*(*v);
2311: idx++; v++;
2312: }
2313: }
2314: PetscLogFlops(32.0*a->nz);
2315: VecRestoreArrayRead(xx,&x);
2316: VecRestoreArray(yy,&y);
2317: return(0);
2318: }
2322: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2323: {
2324: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2325: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2326: const PetscScalar *x,*v;
2327: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2328: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2329: PetscErrorCode ierr;
2330: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2331: PetscInt n,i,jrow,j;
2334: if (yy != zz) {VecCopy(yy,zz);}
2335: VecGetArrayRead(xx,&x);
2336: VecGetArray(zz,&y);
2337: idx = a->j;
2338: v = a->a;
2339: ii = a->i;
2341: for (i=0; i<m; i++) {
2342: jrow = ii[i];
2343: n = ii[i+1] - jrow;
2344: sum1 = 0.0;
2345: sum2 = 0.0;
2346: sum3 = 0.0;
2347: sum4 = 0.0;
2348: sum5 = 0.0;
2349: sum6 = 0.0;
2350: sum7 = 0.0;
2351: sum8 = 0.0;
2352: sum9 = 0.0;
2353: sum10 = 0.0;
2354: sum11 = 0.0;
2355: sum12 = 0.0;
2356: sum13 = 0.0;
2357: sum14 = 0.0;
2358: sum15 = 0.0;
2359: sum16 = 0.0;
2360: for (j=0; j<n; j++) {
2361: sum1 += v[jrow]*x[16*idx[jrow]];
2362: sum2 += v[jrow]*x[16*idx[jrow]+1];
2363: sum3 += v[jrow]*x[16*idx[jrow]+2];
2364: sum4 += v[jrow]*x[16*idx[jrow]+3];
2365: sum5 += v[jrow]*x[16*idx[jrow]+4];
2366: sum6 += v[jrow]*x[16*idx[jrow]+5];
2367: sum7 += v[jrow]*x[16*idx[jrow]+6];
2368: sum8 += v[jrow]*x[16*idx[jrow]+7];
2369: sum9 += v[jrow]*x[16*idx[jrow]+8];
2370: sum10 += v[jrow]*x[16*idx[jrow]+9];
2371: sum11 += v[jrow]*x[16*idx[jrow]+10];
2372: sum12 += v[jrow]*x[16*idx[jrow]+11];
2373: sum13 += v[jrow]*x[16*idx[jrow]+12];
2374: sum14 += v[jrow]*x[16*idx[jrow]+13];
2375: sum15 += v[jrow]*x[16*idx[jrow]+14];
2376: sum16 += v[jrow]*x[16*idx[jrow]+15];
2377: jrow++;
2378: }
2379: y[16*i] += sum1;
2380: y[16*i+1] += sum2;
2381: y[16*i+2] += sum3;
2382: y[16*i+3] += sum4;
2383: y[16*i+4] += sum5;
2384: y[16*i+5] += sum6;
2385: y[16*i+6] += sum7;
2386: y[16*i+7] += sum8;
2387: y[16*i+8] += sum9;
2388: y[16*i+9] += sum10;
2389: y[16*i+10] += sum11;
2390: y[16*i+11] += sum12;
2391: y[16*i+12] += sum13;
2392: y[16*i+13] += sum14;
2393: y[16*i+14] += sum15;
2394: y[16*i+15] += sum16;
2395: }
2397: PetscLogFlops(32.0*a->nz);
2398: VecRestoreArrayRead(xx,&x);
2399: VecRestoreArray(zz,&y);
2400: return(0);
2401: }
2405: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2406: {
2407: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2408: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2409: const PetscScalar *x,*v;
2410: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2411: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2412: PetscErrorCode ierr;
2413: const PetscInt m = b->AIJ->rmap->n,*idx;
2414: PetscInt n,i;
2417: if (yy != zz) {VecCopy(yy,zz);}
2418: VecGetArrayRead(xx,&x);
2419: VecGetArray(zz,&y);
2420: for (i=0; i<m; i++) {
2421: idx = a->j + a->i[i];
2422: v = a->a + a->i[i];
2423: n = a->i[i+1] - a->i[i];
2424: alpha1 = x[16*i];
2425: alpha2 = x[16*i+1];
2426: alpha3 = x[16*i+2];
2427: alpha4 = x[16*i+3];
2428: alpha5 = x[16*i+4];
2429: alpha6 = x[16*i+5];
2430: alpha7 = x[16*i+6];
2431: alpha8 = x[16*i+7];
2432: alpha9 = x[16*i+8];
2433: alpha10 = x[16*i+9];
2434: alpha11 = x[16*i+10];
2435: alpha12 = x[16*i+11];
2436: alpha13 = x[16*i+12];
2437: alpha14 = x[16*i+13];
2438: alpha15 = x[16*i+14];
2439: alpha16 = x[16*i+15];
2440: while (n-->0) {
2441: y[16*(*idx)] += alpha1*(*v);
2442: y[16*(*idx)+1] += alpha2*(*v);
2443: y[16*(*idx)+2] += alpha3*(*v);
2444: y[16*(*idx)+3] += alpha4*(*v);
2445: y[16*(*idx)+4] += alpha5*(*v);
2446: y[16*(*idx)+5] += alpha6*(*v);
2447: y[16*(*idx)+6] += alpha7*(*v);
2448: y[16*(*idx)+7] += alpha8*(*v);
2449: y[16*(*idx)+8] += alpha9*(*v);
2450: y[16*(*idx)+9] += alpha10*(*v);
2451: y[16*(*idx)+10] += alpha11*(*v);
2452: y[16*(*idx)+11] += alpha12*(*v);
2453: y[16*(*idx)+12] += alpha13*(*v);
2454: y[16*(*idx)+13] += alpha14*(*v);
2455: y[16*(*idx)+14] += alpha15*(*v);
2456: y[16*(*idx)+15] += alpha16*(*v);
2457: idx++; v++;
2458: }
2459: }
2460: PetscLogFlops(32.0*a->nz);
2461: VecRestoreArrayRead(xx,&x);
2462: VecRestoreArray(zz,&y);
2463: return(0);
2464: }
2466: /*--------------------------------------------------------------------------------------------*/
2469: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2470: {
2471: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2472: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2473: const PetscScalar *x,*v;
2474: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2475: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2476: PetscErrorCode ierr;
2477: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2478: PetscInt nonzerorow=0,n,i,jrow,j;
2481: VecGetArrayRead(xx,&x);
2482: VecGetArray(yy,&y);
2483: idx = a->j;
2484: v = a->a;
2485: ii = a->i;
2487: for (i=0; i<m; i++) {
2488: jrow = ii[i];
2489: n = ii[i+1] - jrow;
2490: sum1 = 0.0;
2491: sum2 = 0.0;
2492: sum3 = 0.0;
2493: sum4 = 0.0;
2494: sum5 = 0.0;
2495: sum6 = 0.0;
2496: sum7 = 0.0;
2497: sum8 = 0.0;
2498: sum9 = 0.0;
2499: sum10 = 0.0;
2500: sum11 = 0.0;
2501: sum12 = 0.0;
2502: sum13 = 0.0;
2503: sum14 = 0.0;
2504: sum15 = 0.0;
2505: sum16 = 0.0;
2506: sum17 = 0.0;
2507: sum18 = 0.0;
2509: nonzerorow += (n>0);
2510: for (j=0; j<n; j++) {
2511: sum1 += v[jrow]*x[18*idx[jrow]];
2512: sum2 += v[jrow]*x[18*idx[jrow]+1];
2513: sum3 += v[jrow]*x[18*idx[jrow]+2];
2514: sum4 += v[jrow]*x[18*idx[jrow]+3];
2515: sum5 += v[jrow]*x[18*idx[jrow]+4];
2516: sum6 += v[jrow]*x[18*idx[jrow]+5];
2517: sum7 += v[jrow]*x[18*idx[jrow]+6];
2518: sum8 += v[jrow]*x[18*idx[jrow]+7];
2519: sum9 += v[jrow]*x[18*idx[jrow]+8];
2520: sum10 += v[jrow]*x[18*idx[jrow]+9];
2521: sum11 += v[jrow]*x[18*idx[jrow]+10];
2522: sum12 += v[jrow]*x[18*idx[jrow]+11];
2523: sum13 += v[jrow]*x[18*idx[jrow]+12];
2524: sum14 += v[jrow]*x[18*idx[jrow]+13];
2525: sum15 += v[jrow]*x[18*idx[jrow]+14];
2526: sum16 += v[jrow]*x[18*idx[jrow]+15];
2527: sum17 += v[jrow]*x[18*idx[jrow]+16];
2528: sum18 += v[jrow]*x[18*idx[jrow]+17];
2529: jrow++;
2530: }
2531: y[18*i] = sum1;
2532: y[18*i+1] = sum2;
2533: y[18*i+2] = sum3;
2534: y[18*i+3] = sum4;
2535: y[18*i+4] = sum5;
2536: y[18*i+5] = sum6;
2537: y[18*i+6] = sum7;
2538: y[18*i+7] = sum8;
2539: y[18*i+8] = sum9;
2540: y[18*i+9] = sum10;
2541: y[18*i+10] = sum11;
2542: y[18*i+11] = sum12;
2543: y[18*i+12] = sum13;
2544: y[18*i+13] = sum14;
2545: y[18*i+14] = sum15;
2546: y[18*i+15] = sum16;
2547: y[18*i+16] = sum17;
2548: y[18*i+17] = sum18;
2549: }
2551: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2552: VecRestoreArrayRead(xx,&x);
2553: VecRestoreArray(yy,&y);
2554: return(0);
2555: }
2559: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2560: {
2561: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2562: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2563: const PetscScalar *x,*v;
2564: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2565: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2566: PetscErrorCode ierr;
2567: const PetscInt m = b->AIJ->rmap->n,*idx;
2568: PetscInt n,i;
2571: VecSet(yy,0.0);
2572: VecGetArrayRead(xx,&x);
2573: VecGetArray(yy,&y);
2575: for (i=0; i<m; i++) {
2576: idx = a->j + a->i[i];
2577: v = a->a + a->i[i];
2578: n = a->i[i+1] - a->i[i];
2579: alpha1 = x[18*i];
2580: alpha2 = x[18*i+1];
2581: alpha3 = x[18*i+2];
2582: alpha4 = x[18*i+3];
2583: alpha5 = x[18*i+4];
2584: alpha6 = x[18*i+5];
2585: alpha7 = x[18*i+6];
2586: alpha8 = x[18*i+7];
2587: alpha9 = x[18*i+8];
2588: alpha10 = x[18*i+9];
2589: alpha11 = x[18*i+10];
2590: alpha12 = x[18*i+11];
2591: alpha13 = x[18*i+12];
2592: alpha14 = x[18*i+13];
2593: alpha15 = x[18*i+14];
2594: alpha16 = x[18*i+15];
2595: alpha17 = x[18*i+16];
2596: alpha18 = x[18*i+17];
2597: while (n-->0) {
2598: y[18*(*idx)] += alpha1*(*v);
2599: y[18*(*idx)+1] += alpha2*(*v);
2600: y[18*(*idx)+2] += alpha3*(*v);
2601: y[18*(*idx)+3] += alpha4*(*v);
2602: y[18*(*idx)+4] += alpha5*(*v);
2603: y[18*(*idx)+5] += alpha6*(*v);
2604: y[18*(*idx)+6] += alpha7*(*v);
2605: y[18*(*idx)+7] += alpha8*(*v);
2606: y[18*(*idx)+8] += alpha9*(*v);
2607: y[18*(*idx)+9] += alpha10*(*v);
2608: y[18*(*idx)+10] += alpha11*(*v);
2609: y[18*(*idx)+11] += alpha12*(*v);
2610: y[18*(*idx)+12] += alpha13*(*v);
2611: y[18*(*idx)+13] += alpha14*(*v);
2612: y[18*(*idx)+14] += alpha15*(*v);
2613: y[18*(*idx)+15] += alpha16*(*v);
2614: y[18*(*idx)+16] += alpha17*(*v);
2615: y[18*(*idx)+17] += alpha18*(*v);
2616: idx++; v++;
2617: }
2618: }
2619: PetscLogFlops(36.0*a->nz);
2620: VecRestoreArrayRead(xx,&x);
2621: VecRestoreArray(yy,&y);
2622: return(0);
2623: }
2627: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2628: {
2629: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2630: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2631: const PetscScalar *x,*v;
2632: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2633: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2634: PetscErrorCode ierr;
2635: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2636: PetscInt n,i,jrow,j;
2639: if (yy != zz) {VecCopy(yy,zz);}
2640: VecGetArrayRead(xx,&x);
2641: VecGetArray(zz,&y);
2642: idx = a->j;
2643: v = a->a;
2644: ii = a->i;
2646: for (i=0; i<m; i++) {
2647: jrow = ii[i];
2648: n = ii[i+1] - jrow;
2649: sum1 = 0.0;
2650: sum2 = 0.0;
2651: sum3 = 0.0;
2652: sum4 = 0.0;
2653: sum5 = 0.0;
2654: sum6 = 0.0;
2655: sum7 = 0.0;
2656: sum8 = 0.0;
2657: sum9 = 0.0;
2658: sum10 = 0.0;
2659: sum11 = 0.0;
2660: sum12 = 0.0;
2661: sum13 = 0.0;
2662: sum14 = 0.0;
2663: sum15 = 0.0;
2664: sum16 = 0.0;
2665: sum17 = 0.0;
2666: sum18 = 0.0;
2667: for (j=0; j<n; j++) {
2668: sum1 += v[jrow]*x[18*idx[jrow]];
2669: sum2 += v[jrow]*x[18*idx[jrow]+1];
2670: sum3 += v[jrow]*x[18*idx[jrow]+2];
2671: sum4 += v[jrow]*x[18*idx[jrow]+3];
2672: sum5 += v[jrow]*x[18*idx[jrow]+4];
2673: sum6 += v[jrow]*x[18*idx[jrow]+5];
2674: sum7 += v[jrow]*x[18*idx[jrow]+6];
2675: sum8 += v[jrow]*x[18*idx[jrow]+7];
2676: sum9 += v[jrow]*x[18*idx[jrow]+8];
2677: sum10 += v[jrow]*x[18*idx[jrow]+9];
2678: sum11 += v[jrow]*x[18*idx[jrow]+10];
2679: sum12 += v[jrow]*x[18*idx[jrow]+11];
2680: sum13 += v[jrow]*x[18*idx[jrow]+12];
2681: sum14 += v[jrow]*x[18*idx[jrow]+13];
2682: sum15 += v[jrow]*x[18*idx[jrow]+14];
2683: sum16 += v[jrow]*x[18*idx[jrow]+15];
2684: sum17 += v[jrow]*x[18*idx[jrow]+16];
2685: sum18 += v[jrow]*x[18*idx[jrow]+17];
2686: jrow++;
2687: }
2688: y[18*i] += sum1;
2689: y[18*i+1] += sum2;
2690: y[18*i+2] += sum3;
2691: y[18*i+3] += sum4;
2692: y[18*i+4] += sum5;
2693: y[18*i+5] += sum6;
2694: y[18*i+6] += sum7;
2695: y[18*i+7] += sum8;
2696: y[18*i+8] += sum9;
2697: y[18*i+9] += sum10;
2698: y[18*i+10] += sum11;
2699: y[18*i+11] += sum12;
2700: y[18*i+12] += sum13;
2701: y[18*i+13] += sum14;
2702: y[18*i+14] += sum15;
2703: y[18*i+15] += sum16;
2704: y[18*i+16] += sum17;
2705: y[18*i+17] += sum18;
2706: }
2708: PetscLogFlops(36.0*a->nz);
2709: VecRestoreArrayRead(xx,&x);
2710: VecRestoreArray(zz,&y);
2711: return(0);
2712: }
2716: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2717: {
2718: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2719: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2720: const PetscScalar *x,*v;
2721: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2722: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2723: PetscErrorCode ierr;
2724: const PetscInt m = b->AIJ->rmap->n,*idx;
2725: PetscInt n,i;
2728: if (yy != zz) {VecCopy(yy,zz);}
2729: VecGetArrayRead(xx,&x);
2730: VecGetArray(zz,&y);
2731: for (i=0; i<m; i++) {
2732: idx = a->j + a->i[i];
2733: v = a->a + a->i[i];
2734: n = a->i[i+1] - a->i[i];
2735: alpha1 = x[18*i];
2736: alpha2 = x[18*i+1];
2737: alpha3 = x[18*i+2];
2738: alpha4 = x[18*i+3];
2739: alpha5 = x[18*i+4];
2740: alpha6 = x[18*i+5];
2741: alpha7 = x[18*i+6];
2742: alpha8 = x[18*i+7];
2743: alpha9 = x[18*i+8];
2744: alpha10 = x[18*i+9];
2745: alpha11 = x[18*i+10];
2746: alpha12 = x[18*i+11];
2747: alpha13 = x[18*i+12];
2748: alpha14 = x[18*i+13];
2749: alpha15 = x[18*i+14];
2750: alpha16 = x[18*i+15];
2751: alpha17 = x[18*i+16];
2752: alpha18 = x[18*i+17];
2753: while (n-->0) {
2754: y[18*(*idx)] += alpha1*(*v);
2755: y[18*(*idx)+1] += alpha2*(*v);
2756: y[18*(*idx)+2] += alpha3*(*v);
2757: y[18*(*idx)+3] += alpha4*(*v);
2758: y[18*(*idx)+4] += alpha5*(*v);
2759: y[18*(*idx)+5] += alpha6*(*v);
2760: y[18*(*idx)+6] += alpha7*(*v);
2761: y[18*(*idx)+7] += alpha8*(*v);
2762: y[18*(*idx)+8] += alpha9*(*v);
2763: y[18*(*idx)+9] += alpha10*(*v);
2764: y[18*(*idx)+10] += alpha11*(*v);
2765: y[18*(*idx)+11] += alpha12*(*v);
2766: y[18*(*idx)+12] += alpha13*(*v);
2767: y[18*(*idx)+13] += alpha14*(*v);
2768: y[18*(*idx)+14] += alpha15*(*v);
2769: y[18*(*idx)+15] += alpha16*(*v);
2770: y[18*(*idx)+16] += alpha17*(*v);
2771: y[18*(*idx)+17] += alpha18*(*v);
2772: idx++; v++;
2773: }
2774: }
2775: PetscLogFlops(36.0*a->nz);
2776: VecRestoreArrayRead(xx,&x);
2777: VecRestoreArray(zz,&y);
2778: return(0);
2779: }
2783: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2784: {
2785: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2786: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2787: const PetscScalar *x,*v;
2788: PetscScalar *y,*sums;
2789: PetscErrorCode ierr;
2790: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2791: PetscInt n,i,jrow,j,dof = b->dof,k;
2794: VecGetArrayRead(xx,&x);
2795: VecSet(yy,0.0);
2796: VecGetArray(yy,&y);
2797: idx = a->j;
2798: v = a->a;
2799: ii = a->i;
2801: for (i=0; i<m; i++) {
2802: jrow = ii[i];
2803: n = ii[i+1] - jrow;
2804: sums = y + dof*i;
2805: for (j=0; j<n; j++) {
2806: for (k=0; k<dof; k++) {
2807: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2808: }
2809: jrow++;
2810: }
2811: }
2813: PetscLogFlops(2.0*dof*a->nz);
2814: VecRestoreArrayRead(xx,&x);
2815: VecRestoreArray(yy,&y);
2816: return(0);
2817: }
2821: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2822: {
2823: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2824: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2825: const PetscScalar *x,*v;
2826: PetscScalar *y,*sums;
2827: PetscErrorCode ierr;
2828: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2829: PetscInt n,i,jrow,j,dof = b->dof,k;
2832: if (yy != zz) {VecCopy(yy,zz);}
2833: VecGetArrayRead(xx,&x);
2834: VecGetArray(zz,&y);
2835: idx = a->j;
2836: v = a->a;
2837: ii = a->i;
2839: for (i=0; i<m; i++) {
2840: jrow = ii[i];
2841: n = ii[i+1] - jrow;
2842: sums = y + dof*i;
2843: for (j=0; j<n; j++) {
2844: for (k=0; k<dof; k++) {
2845: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2846: }
2847: jrow++;
2848: }
2849: }
2851: PetscLogFlops(2.0*dof*a->nz);
2852: VecRestoreArrayRead(xx,&x);
2853: VecRestoreArray(zz,&y);
2854: return(0);
2855: }
2859: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2860: {
2861: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2862: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2863: const PetscScalar *x,*v,*alpha;
2864: PetscScalar *y;
2865: PetscErrorCode ierr;
2866: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2867: PetscInt n,i,k;
2870: VecGetArrayRead(xx,&x);
2871: VecSet(yy,0.0);
2872: VecGetArray(yy,&y);
2873: for (i=0; i<m; i++) {
2874: idx = a->j + a->i[i];
2875: v = a->a + a->i[i];
2876: n = a->i[i+1] - a->i[i];
2877: alpha = x + dof*i;
2878: while (n-->0) {
2879: for (k=0; k<dof; k++) {
2880: y[dof*(*idx)+k] += alpha[k]*(*v);
2881: }
2882: idx++; v++;
2883: }
2884: }
2885: PetscLogFlops(2.0*dof*a->nz);
2886: VecRestoreArrayRead(xx,&x);
2887: VecRestoreArray(yy,&y);
2888: return(0);
2889: }
2893: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2894: {
2895: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2896: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2897: const PetscScalar *x,*v,*alpha;
2898: PetscScalar *y;
2899: PetscErrorCode ierr;
2900: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2901: PetscInt n,i,k;
2904: if (yy != zz) {VecCopy(yy,zz);}
2905: VecGetArrayRead(xx,&x);
2906: VecGetArray(zz,&y);
2907: for (i=0; i<m; i++) {
2908: idx = a->j + a->i[i];
2909: v = a->a + a->i[i];
2910: n = a->i[i+1] - a->i[i];
2911: alpha = x + dof*i;
2912: while (n-->0) {
2913: for (k=0; k<dof; k++) {
2914: y[dof*(*idx)+k] += alpha[k]*(*v);
2915: }
2916: idx++; v++;
2917: }
2918: }
2919: PetscLogFlops(2.0*dof*a->nz);
2920: VecRestoreArrayRead(xx,&x);
2921: VecRestoreArray(zz,&y);
2922: return(0);
2923: }
2925: /*===================================================================================*/
2928: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2929: {
2930: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2934: /* start the scatter */
2935: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2936: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2937: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2938: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2939: return(0);
2940: }
2944: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2945: {
2946: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2950: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2951: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2952: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2953: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2954: return(0);
2955: }
2959: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2960: {
2961: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2965: /* start the scatter */
2966: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2967: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2968: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2969: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2970: return(0);
2971: }
2975: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2976: {
2977: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2981: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2982: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2983: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2984: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2985: return(0);
2986: }
2988: /* ----------------------------------------------------------------*/
2991: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2992: {
2993: PetscErrorCode ierr;
2994: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2995: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2996: Mat P =pp->AIJ;
2997: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2998: PetscInt *pti,*ptj,*ptJ;
2999: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
3000: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
3001: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
3002: MatScalar *ca;
3003: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
3006: /* Get ij structure of P^T */
3007: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3009: cn = pn*ppdof;
3010: /* Allocate ci array, arrays for fill computation and */
3011: /* free space for accumulating nonzero column info */
3012: PetscMalloc1((cn+1),&ci);
3013: ci[0] = 0;
3015: /* Work arrays for rows of P^T*A */
3016: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
3017: PetscMemzero(ptadenserow,an*sizeof(PetscInt));
3018: PetscMemzero(denserow,cn*sizeof(PetscInt));
3020: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3021: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3022: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3023: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3024: current_space = free_space;
3026: /* Determine symbolic info for each row of C: */
3027: for (i=0; i<pn; i++) {
3028: ptnzi = pti[i+1] - pti[i];
3029: ptJ = ptj + pti[i];
3030: for (dof=0; dof<ppdof; dof++) {
3031: ptanzi = 0;
3032: /* Determine symbolic row of PtA: */
3033: for (j=0; j<ptnzi; j++) {
3034: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3035: arow = ptJ[j]*ppdof + dof;
3036: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3037: anzj = ai[arow+1] - ai[arow];
3038: ajj = aj + ai[arow];
3039: for (k=0; k<anzj; k++) {
3040: if (!ptadenserow[ajj[k]]) {
3041: ptadenserow[ajj[k]] = -1;
3042: ptasparserow[ptanzi++] = ajj[k];
3043: }
3044: }
3045: }
3046: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3047: ptaj = ptasparserow;
3048: cnzi = 0;
3049: for (j=0; j<ptanzi; j++) {
3050: /* Get offset within block of P */
3051: pshift = *ptaj%ppdof;
3052: /* Get block row of P */
3053: prow = (*ptaj++)/ppdof; /* integer division */
3054: /* P has same number of nonzeros per row as the compressed form */
3055: pnzj = pi[prow+1] - pi[prow];
3056: pjj = pj + pi[prow];
3057: for (k=0;k<pnzj;k++) {
3058: /* Locations in C are shifted by the offset within the block */
3059: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3060: if (!denserow[pjj[k]*ppdof+pshift]) {
3061: denserow[pjj[k]*ppdof+pshift] = -1;
3062: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3063: }
3064: }
3065: }
3067: /* sort sparserow */
3068: PetscSortInt(cnzi,sparserow);
3070: /* If free space is not available, make more free space */
3071: /* Double the amount of total space in the list */
3072: if (current_space->local_remaining<cnzi) {
3073: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
3074: }
3076: /* Copy data into free space, and zero out denserows */
3077: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3079: current_space->array += cnzi;
3080: current_space->local_used += cnzi;
3081: current_space->local_remaining -= cnzi;
3083: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3084: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3086: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3087: /* For now, we will recompute what is needed. */
3088: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3089: }
3090: }
3091: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3092: /* Allocate space for cj, initialize cj, and */
3093: /* destroy list of free space and other temporary array(s) */
3094: PetscMalloc1((ci[cn]+1),&cj);
3095: PetscFreeSpaceContiguous(&free_space,cj);
3096: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3098: /* Allocate space for ca */
3099: PetscCalloc1((ci[cn]+1),&ca);
3101: /* put together the new matrix */
3102: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
3103: MatSetBlockSize(*C,pp->dof);
3105: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3106: /* Since these are PETSc arrays, change flags to free them as necessary. */
3107: c = (Mat_SeqAIJ*)((*C)->data);
3108: c->free_a = PETSC_TRUE;
3109: c->free_ij = PETSC_TRUE;
3110: c->nonew = 0;
3112: (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3114: /* Clean up. */
3115: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3116: return(0);
3117: }
3121: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3122: {
3123: /* This routine requires testing -- first draft only */
3124: PetscErrorCode ierr;
3125: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3126: Mat P =pp->AIJ;
3127: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
3128: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3129: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3130: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3131: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3132: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3133: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3134: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3135: MatScalar *ca=c->a,*caj,*apa;
3138: /* Allocate temporary array for storage of one row of A*P */
3139: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3141: /* Clear old values in C */
3142: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
3144: for (i=0; i<am; i++) {
3145: /* Form sparse row of A*P */
3146: anzi = ai[i+1] - ai[i];
3147: apnzj = 0;
3148: for (j=0; j<anzi; j++) {
3149: /* Get offset within block of P */
3150: pshift = *aj%ppdof;
3151: /* Get block row of P */
3152: prow = *aj++/ppdof; /* integer division */
3153: pnzj = pi[prow+1] - pi[prow];
3154: pjj = pj + pi[prow];
3155: paj = pa + pi[prow];
3156: for (k=0; k<pnzj; k++) {
3157: poffset = pjj[k]*ppdof+pshift;
3158: if (!apjdense[poffset]) {
3159: apjdense[poffset] = -1;
3160: apj[apnzj++] = poffset;
3161: }
3162: apa[poffset] += (*aa)*paj[k];
3163: }
3164: PetscLogFlops(2.0*pnzj);
3165: aa++;
3166: }
3168: /* Sort the j index array for quick sparse axpy. */
3169: /* Note: a array does not need sorting as it is in dense storage locations. */
3170: PetscSortInt(apnzj,apj);
3172: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3173: prow = i/ppdof; /* integer division */
3174: pshift = i%ppdof;
3175: poffset = pi[prow];
3176: pnzi = pi[prow+1] - poffset;
3177: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3178: pJ = pj+poffset;
3179: pA = pa+poffset;
3180: for (j=0; j<pnzi; j++) {
3181: crow = (*pJ)*ppdof+pshift;
3182: cjj = cj + ci[crow];
3183: caj = ca + ci[crow];
3184: pJ++;
3185: /* Perform sparse axpy operation. Note cjj includes apj. */
3186: for (k=0,nextap=0; nextap<apnzj; k++) {
3187: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3188: }
3189: PetscLogFlops(2.0*apnzj);
3190: pA++;
3191: }
3193: /* Zero the current row info for A*P */
3194: for (j=0; j<apnzj; j++) {
3195: apa[apj[j]] = 0.;
3196: apjdense[apj[j]] = 0;
3197: }
3198: }
3200: /* Assemble the final matrix and clean up */
3201: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3202: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3203: PetscFree3(apa,apj,apjdense);
3204: return(0);
3205: }
3209: PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3210: {
3214: if (scall == MAT_INITIAL_MATRIX) {
3215: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3216: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3217: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3218: }
3219: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3220: MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3221: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3222: return(0);
3223: }
3227: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3228: {
3232: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3233: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3234: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3235: return(0);
3236: }
3240: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3241: {
3243: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3244: return(0);
3245: }
3249: PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3250: {
3254: if (scall == MAT_INITIAL_MATRIX) {
3255: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3256: MatPtAPSymbolic_MPIAIJ_MPIMAIJ(A,P,fill,C);
3257: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3258: }
3259: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3260: ((*C)->ops->ptapnumeric)(A,P,*C);
3261: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3262: return(0);
3263: }
3267: PETSC_EXTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3268: {
3269: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3270: Mat a = b->AIJ,B;
3271: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3273: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3274: PetscInt *cols;
3275: PetscScalar *vals;
3278: MatGetSize(a,&m,&n);
3279: PetscMalloc1(dof*m,&ilen);
3280: for (i=0; i<m; i++) {
3281: nmax = PetscMax(nmax,aij->ilen[i]);
3282: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3283: }
3284: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3285: PetscFree(ilen);
3286: PetscMalloc1(nmax,&icols);
3287: ii = 0;
3288: for (i=0; i<m; i++) {
3289: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3290: for (j=0; j<dof; j++) {
3291: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3292: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3293: ii++;
3294: }
3295: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3296: }
3297: PetscFree(icols);
3298: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3299: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3301: if (reuse == MAT_REUSE_MATRIX) {
3302: MatHeaderReplace(A,B);
3303: } else {
3304: *newmat = B;
3305: }
3306: return(0);
3307: }
3309: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3313: PETSC_EXTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3314: {
3315: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3316: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3317: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3318: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3319: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3320: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3321: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3322: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3323: PetscInt rstart,cstart,*garray,ii,k;
3325: PetscScalar *vals,*ovals;
3328: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3329: for (i=0; i<A->rmap->n/dof; i++) {
3330: nmax = PetscMax(nmax,AIJ->ilen[i]);
3331: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3332: for (j=0; j<dof; j++) {
3333: dnz[dof*i+j] = AIJ->ilen[i];
3334: onz[dof*i+j] = OAIJ->ilen[i];
3335: }
3336: }
3337: MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3338: MatSetBlockSize(B,dof);
3339: PetscFree2(dnz,onz);
3341: PetscMalloc2(nmax,&icols,onmax,&oicols);
3342: rstart = dof*maij->A->rmap->rstart;
3343: cstart = dof*maij->A->cmap->rstart;
3344: garray = mpiaij->garray;
3346: ii = rstart;
3347: for (i=0; i<A->rmap->n/dof; i++) {
3348: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3349: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3350: for (j=0; j<dof; j++) {
3351: for (k=0; k<ncols; k++) {
3352: icols[k] = cstart + dof*cols[k]+j;
3353: }
3354: for (k=0; k<oncols; k++) {
3355: oicols[k] = dof*garray[ocols[k]]+j;
3356: }
3357: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3358: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3359: ii++;
3360: }
3361: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3362: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3363: }
3364: PetscFree2(icols,oicols);
3366: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3367: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3369: if (reuse == MAT_REUSE_MATRIX) {
3370: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3371: ((PetscObject)A)->refct = 1;
3373: MatHeaderReplace(A,B);
3375: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3376: } else {
3377: *newmat = B;
3378: }
3379: return(0);
3380: }
3384: PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3385: {
3387: Mat A;
3390: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3391: MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3392: MatDestroy(&A);
3393: return(0);
3394: }
3396: /* ---------------------------------------------------------------------------------- */
3399: /*@C
3400: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3401: operations for multicomponent problems. It interpolates each component the same
3402: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3403: and MATMPIAIJ for distributed matrices.
3405: Collective
3407: Input Parameters:
3408: + A - the AIJ matrix describing the action on blocks
3409: - dof - the block size (number of components per node)
3411: Output Parameter:
3412: . maij - the new MAIJ matrix
3414: Operations provided:
3415: + MatMult
3416: . MatMultTranspose
3417: . MatMultAdd
3418: . MatMultTransposeAdd
3419: - MatView
3421: Level: advanced
3423: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3424: @*/
3425: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3426: {
3428: PetscMPIInt size;
3429: PetscInt n;
3430: Mat B;
3433: PetscObjectReference((PetscObject)A);
3435: if (dof == 1) *maij = A;
3436: else {
3437: MatCreate(PetscObjectComm((PetscObject)A),&B);
3438: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3439: PetscLayoutSetBlockSize(B->rmap,dof);
3440: PetscLayoutSetBlockSize(B->cmap,dof);
3441: PetscLayoutSetUp(B->rmap);
3442: PetscLayoutSetUp(B->cmap);
3444: B->assembled = PETSC_TRUE;
3446: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3447: if (size == 1) {
3448: Mat_SeqMAIJ *b;
3450: MatSetType(B,MATSEQMAIJ);
3452: B->ops->setup = NULL;
3453: B->ops->destroy = MatDestroy_SeqMAIJ;
3454: B->ops->view = MatView_SeqMAIJ;
3455: b = (Mat_SeqMAIJ*)B->data;
3456: b->dof = dof;
3457: b->AIJ = A;
3459: if (dof == 2) {
3460: B->ops->mult = MatMult_SeqMAIJ_2;
3461: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3462: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3463: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3464: } else if (dof == 3) {
3465: B->ops->mult = MatMult_SeqMAIJ_3;
3466: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3467: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3468: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3469: } else if (dof == 4) {
3470: B->ops->mult = MatMult_SeqMAIJ_4;
3471: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3472: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3473: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3474: } else if (dof == 5) {
3475: B->ops->mult = MatMult_SeqMAIJ_5;
3476: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3477: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3478: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3479: } else if (dof == 6) {
3480: B->ops->mult = MatMult_SeqMAIJ_6;
3481: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3482: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3483: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3484: } else if (dof == 7) {
3485: B->ops->mult = MatMult_SeqMAIJ_7;
3486: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3487: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3488: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3489: } else if (dof == 8) {
3490: B->ops->mult = MatMult_SeqMAIJ_8;
3491: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3492: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3493: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3494: } else if (dof == 9) {
3495: B->ops->mult = MatMult_SeqMAIJ_9;
3496: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3497: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3498: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3499: } else if (dof == 10) {
3500: B->ops->mult = MatMult_SeqMAIJ_10;
3501: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3502: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3503: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3504: } else if (dof == 11) {
3505: B->ops->mult = MatMult_SeqMAIJ_11;
3506: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3507: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3508: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3509: } else if (dof == 16) {
3510: B->ops->mult = MatMult_SeqMAIJ_16;
3511: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3512: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3513: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3514: } else if (dof == 18) {
3515: B->ops->mult = MatMult_SeqMAIJ_18;
3516: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3517: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3518: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3519: } else {
3520: B->ops->mult = MatMult_SeqMAIJ_N;
3521: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3522: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3523: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3524: }
3525: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3526: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3527: } else {
3528: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3529: Mat_MPIMAIJ *b;
3530: IS from,to;
3531: Vec gvec;
3533: MatSetType(B,MATMPIMAIJ);
3535: B->ops->setup = NULL;
3536: B->ops->destroy = MatDestroy_MPIMAIJ;
3537: B->ops->view = MatView_MPIMAIJ;
3539: b = (Mat_MPIMAIJ*)B->data;
3540: b->dof = dof;
3541: b->A = A;
3543: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3544: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3546: VecGetSize(mpiaij->lvec,&n);
3547: VecCreate(PETSC_COMM_SELF,&b->w);
3548: VecSetSizes(b->w,n*dof,n*dof);
3549: VecSetBlockSize(b->w,dof);
3550: VecSetType(b->w,VECSEQ);
3552: /* create two temporary Index sets for build scatter gather */
3553: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3554: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3556: /* create temporary global vector to generate scatter context */
3557: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3559: /* generate the scatter context */
3560: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3562: ISDestroy(&from);
3563: ISDestroy(&to);
3564: VecDestroy(&gvec);
3566: B->ops->mult = MatMult_MPIMAIJ_dof;
3567: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3568: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3569: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3571: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3572: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3573: }
3574: B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3575: MatSetUp(B);
3576: *maij = B;
3577: MatViewFromOptions(B,NULL,"-mat_view");
3578: }
3579: return(0);
3580: }