Actual source code: maij.c
petsc-3.3-p7 2013-05-11
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/
20: #include <../src/mat/utils/freespace.h>
21: #include <petsc-private/vecimpl.h>
25: /*@C
26: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
28: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
30: Input Parameter:
31: . A - the MAIJ matrix
33: Output Parameter:
34: . B - the AIJ matrix
36: Level: advanced
38: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
40: .seealso: MatCreateMAIJ()
41: @*/
42: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
43: {
45: PetscBool ismpimaij,isseqmaij;
48: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
49: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
50: if (ismpimaij) {
51: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
53: *B = b->A;
54: } else if (isseqmaij) {
55: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
57: *B = b->AIJ;
58: } else {
59: *B = A;
60: }
61: return(0);
62: }
66: /*@C
67: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
69: Logically Collective
71: Input Parameter:
72: + A - the MAIJ matrix
73: - dof - the block size for the new matrix
75: Output Parameter:
76: . B - the new MAIJ matrix
78: Level: advanced
80: .seealso: MatCreateMAIJ()
81: @*/
82: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
83: {
85: Mat Aij = PETSC_NULL;
89: MatMAIJGetAIJ(A,&Aij);
90: MatCreateMAIJ(Aij,dof,B);
91: return(0);
92: }
96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97: {
99: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
102: MatDestroy(&b->AIJ);
103: PetscFree(A->data);
104: return(0);
105: }
109: PetscErrorCode MatSetUp_MAIJ(Mat A)
110: {
112: SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
113: return(0);
114: }
118: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
119: {
121: Mat B;
124: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
125: MatView(B,viewer);
126: MatDestroy(&B);
127: return(0);
128: }
132: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
133: {
135: Mat B;
138: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
139: MatView(B,viewer);
140: MatDestroy(&B);
141: return(0);
142: }
146: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
147: {
149: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
152: MatDestroy(&b->AIJ);
153: MatDestroy(&b->OAIJ);
154: MatDestroy(&b->A);
155: VecScatterDestroy(&b->ctx);
156: VecDestroy(&b->w);
157: PetscFree(A->data);
158: PetscObjectChangeTypeName((PetscObject)A,0);
159: return(0);
160: }
162: /*MC
163: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
164: multicomponent problems, interpolating or restricting each component the same way independently.
165: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
167: Operations provided:
168: . MatMult
169: . MatMultTranspose
170: . MatMultAdd
171: . MatMultTransposeAdd
173: Level: advanced
175: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
176: M*/
178: EXTERN_C_BEGIN
181: PetscErrorCode MatCreate_MAIJ(Mat A)
182: {
184: Mat_MPIMAIJ *b;
185: PetscMPIInt size;
188: PetscNewLog(A,Mat_MPIMAIJ,&b);
189: A->data = (void*)b;
190: PetscMemzero(A->ops,sizeof(struct _MatOps));
191: A->ops->setup = MatSetUp_MAIJ;
193: b->AIJ = 0;
194: b->dof = 0;
195: b->OAIJ = 0;
196: b->ctx = 0;
197: b->w = 0;
198: MPI_Comm_size(((PetscObject)A)->comm,&size);
199: if (size == 1){
200: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
201: } else {
202: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
203: }
204: return(0);
205: }
206: EXTERN_C_END
208: /* --------------------------------------------------------------------------------------*/
211: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
212: {
213: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
214: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
215: const PetscScalar *x,*v;
216: PetscScalar *y, sum1, sum2;
217: PetscErrorCode ierr;
218: PetscInt nonzerorow=0,n,i,jrow,j;
219: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
222: VecGetArrayRead(xx,&x);
223: VecGetArray(yy,&y);
224: idx = a->j;
225: v = a->a;
226: ii = a->i;
228: for (i=0; i<m; i++) {
229: jrow = ii[i];
230: n = ii[i+1] - jrow;
231: sum1 = 0.0;
232: sum2 = 0.0;
233: nonzerorow += (n>0);
234: for (j=0; j<n; j++) {
235: sum1 += v[jrow]*x[2*idx[jrow]];
236: sum2 += v[jrow]*x[2*idx[jrow]+1];
237: jrow++;
238: }
239: y[2*i] = sum1;
240: y[2*i+1] = sum2;
241: }
243: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
244: VecRestoreArrayRead(xx,&x);
245: VecRestoreArray(yy,&y);
246: return(0);
247: }
251: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
252: {
253: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
254: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
255: const PetscScalar *x,*v;
256: PetscScalar *y,alpha1,alpha2;
257: PetscErrorCode ierr;
258: PetscInt n,i;
259: const PetscInt m = b->AIJ->rmap->n,*idx;
262: VecSet(yy,0.0);
263: VecGetArrayRead(xx,&x);
264: VecGetArray(yy,&y);
265:
266: for (i=0; i<m; i++) {
267: idx = a->j + a->i[i] ;
268: v = a->a + a->i[i] ;
269: n = a->i[i+1] - a->i[i];
270: alpha1 = x[2*i];
271: alpha2 = x[2*i+1];
272: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
273: }
274: PetscLogFlops(4.0*a->nz);
275: VecRestoreArrayRead(xx,&x);
276: VecRestoreArray(yy,&y);
277: return(0);
278: }
282: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
283: {
284: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
285: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
286: const PetscScalar *x,*v;
287: PetscScalar *y,sum1, sum2;
288: PetscErrorCode ierr;
289: PetscInt n,i,jrow,j;
290: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
293: if (yy != zz) {VecCopy(yy,zz);}
294: VecGetArrayRead(xx,&x);
295: VecGetArray(zz,&y);
296: idx = a->j;
297: v = a->a;
298: ii = a->i;
300: for (i=0; i<m; i++) {
301: jrow = ii[i];
302: n = ii[i+1] - jrow;
303: sum1 = 0.0;
304: sum2 = 0.0;
305: for (j=0; j<n; j++) {
306: sum1 += v[jrow]*x[2*idx[jrow]];
307: sum2 += v[jrow]*x[2*idx[jrow]+1];
308: jrow++;
309: }
310: y[2*i] += sum1;
311: y[2*i+1] += sum2;
312: }
314: PetscLogFlops(4.0*a->nz);
315: VecRestoreArrayRead(xx,&x);
316: VecRestoreArray(zz,&y);
317: return(0);
318: }
321: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
322: {
323: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
324: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
325: const PetscScalar *x,*v;
326: PetscScalar *y,alpha1,alpha2;
327: PetscErrorCode ierr;
328: PetscInt n,i;
329: const PetscInt m = b->AIJ->rmap->n,*idx;
332: if (yy != zz) {VecCopy(yy,zz);}
333: VecGetArrayRead(xx,&x);
334: VecGetArray(zz,&y);
335:
336: for (i=0; i<m; i++) {
337: idx = a->j + a->i[i] ;
338: v = a->a + a->i[i] ;
339: n = a->i[i+1] - a->i[i];
340: alpha1 = x[2*i];
341: alpha2 = x[2*i+1];
342: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
343: }
344: PetscLogFlops(4.0*a->nz);
345: VecRestoreArrayRead(xx,&x);
346: VecRestoreArray(zz,&y);
347: return(0);
348: }
349: /* --------------------------------------------------------------------------------------*/
352: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
353: {
354: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
355: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
356: const PetscScalar *x,*v;
357: PetscScalar *y,sum1, sum2, sum3;
358: PetscErrorCode ierr;
359: PetscInt nonzerorow=0,n,i,jrow,j;
360: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
363: VecGetArrayRead(xx,&x);
364: VecGetArray(yy,&y);
365: idx = a->j;
366: v = a->a;
367: ii = a->i;
369: for (i=0; i<m; i++) {
370: jrow = ii[i];
371: n = ii[i+1] - jrow;
372: sum1 = 0.0;
373: sum2 = 0.0;
374: sum3 = 0.0;
375: nonzerorow += (n>0);
376: for (j=0; j<n; j++) {
377: sum1 += v[jrow]*x[3*idx[jrow]];
378: sum2 += v[jrow]*x[3*idx[jrow]+1];
379: sum3 += v[jrow]*x[3*idx[jrow]+2];
380: jrow++;
381: }
382: y[3*i] = sum1;
383: y[3*i+1] = sum2;
384: y[3*i+2] = sum3;
385: }
387: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
388: VecRestoreArrayRead(xx,&x);
389: VecRestoreArray(yy,&y);
390: return(0);
391: }
395: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
396: {
397: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
398: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
399: const PetscScalar *x,*v;
400: PetscScalar *y,alpha1,alpha2,alpha3;
401: PetscErrorCode ierr;
402: PetscInt n,i;
403: const PetscInt m = b->AIJ->rmap->n,*idx;
406: VecSet(yy,0.0);
407: VecGetArrayRead(xx,&x);
408: VecGetArray(yy,&y);
409:
410: for (i=0; i<m; i++) {
411: idx = a->j + a->i[i];
412: v = a->a + a->i[i];
413: n = a->i[i+1] - a->i[i];
414: alpha1 = x[3*i];
415: alpha2 = x[3*i+1];
416: alpha3 = x[3*i+2];
417: while (n-->0) {
418: y[3*(*idx)] += alpha1*(*v);
419: y[3*(*idx)+1] += alpha2*(*v);
420: y[3*(*idx)+2] += alpha3*(*v);
421: idx++; v++;
422: }
423: }
424: PetscLogFlops(6.0*a->nz);
425: VecRestoreArrayRead(xx,&x);
426: VecRestoreArray(yy,&y);
427: return(0);
428: }
432: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
433: {
434: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
435: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
436: const PetscScalar *x,*v;
437: PetscScalar *y,sum1, sum2, sum3;
438: PetscErrorCode ierr;
439: PetscInt n,i,jrow,j;
440: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
443: if (yy != zz) {VecCopy(yy,zz);}
444: VecGetArrayRead(xx,&x);
445: VecGetArray(zz,&y);
446: idx = a->j;
447: v = a->a;
448: ii = a->i;
450: for (i=0; i<m; i++) {
451: jrow = ii[i];
452: n = ii[i+1] - jrow;
453: sum1 = 0.0;
454: sum2 = 0.0;
455: sum3 = 0.0;
456: for (j=0; j<n; j++) {
457: sum1 += v[jrow]*x[3*idx[jrow]];
458: sum2 += v[jrow]*x[3*idx[jrow]+1];
459: sum3 += v[jrow]*x[3*idx[jrow]+2];
460: jrow++;
461: }
462: y[3*i] += sum1;
463: y[3*i+1] += sum2;
464: y[3*i+2] += sum3;
465: }
467: PetscLogFlops(6.0*a->nz);
468: VecRestoreArrayRead(xx,&x);
469: VecRestoreArray(zz,&y);
470: return(0);
471: }
474: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
475: {
476: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
477: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
478: const PetscScalar *x,*v;
479: PetscScalar *y,alpha1,alpha2,alpha3;
480: PetscErrorCode ierr;
481: PetscInt n,i;
482: const PetscInt m = b->AIJ->rmap->n,*idx;
485: if (yy != zz) {VecCopy(yy,zz);}
486: VecGetArrayRead(xx,&x);
487: VecGetArray(zz,&y);
488: for (i=0; i<m; i++) {
489: idx = a->j + a->i[i] ;
490: v = a->a + a->i[i] ;
491: n = a->i[i+1] - a->i[i];
492: alpha1 = x[3*i];
493: alpha2 = x[3*i+1];
494: alpha3 = x[3*i+2];
495: while (n-->0) {
496: y[3*(*idx)] += alpha1*(*v);
497: y[3*(*idx)+1] += alpha2*(*v);
498: y[3*(*idx)+2] += alpha3*(*v);
499: idx++; v++;
500: }
501: }
502: PetscLogFlops(6.0*a->nz);
503: VecRestoreArrayRead(xx,&x);
504: VecRestoreArray(zz,&y);
505: return(0);
506: }
508: /* ------------------------------------------------------------------------------*/
511: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
512: {
513: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
514: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
515: const PetscScalar *x,*v;
516: PetscScalar *y,sum1, sum2, sum3, sum4;
517: PetscErrorCode ierr;
518: PetscInt nonzerorow=0,n,i,jrow,j;
519: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
522: VecGetArrayRead(xx,&x);
523: VecGetArray(yy,&y);
524: idx = a->j;
525: v = a->a;
526: ii = a->i;
528: for (i=0; i<m; i++) {
529: jrow = ii[i];
530: n = ii[i+1] - jrow;
531: sum1 = 0.0;
532: sum2 = 0.0;
533: sum3 = 0.0;
534: sum4 = 0.0;
535: nonzerorow += (n>0);
536: for (j=0; j<n; j++) {
537: sum1 += v[jrow]*x[4*idx[jrow]];
538: sum2 += v[jrow]*x[4*idx[jrow]+1];
539: sum3 += v[jrow]*x[4*idx[jrow]+2];
540: sum4 += v[jrow]*x[4*idx[jrow]+3];
541: jrow++;
542: }
543: y[4*i] = sum1;
544: y[4*i+1] = sum2;
545: y[4*i+2] = sum3;
546: y[4*i+3] = sum4;
547: }
549: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
550: VecRestoreArrayRead(xx,&x);
551: VecRestoreArray(yy,&y);
552: return(0);
553: }
557: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
558: {
559: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
560: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
561: const PetscScalar *x,*v;
562: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
563: PetscErrorCode ierr;
564: PetscInt n,i;
565: const PetscInt m = b->AIJ->rmap->n,*idx;
568: VecSet(yy,0.0);
569: VecGetArrayRead(xx,&x);
570: VecGetArray(yy,&y);
571: for (i=0; i<m; i++) {
572: idx = a->j + a->i[i] ;
573: v = a->a + a->i[i] ;
574: n = a->i[i+1] - a->i[i];
575: alpha1 = x[4*i];
576: alpha2 = x[4*i+1];
577: alpha3 = x[4*i+2];
578: alpha4 = x[4*i+3];
579: while (n-->0) {
580: y[4*(*idx)] += alpha1*(*v);
581: y[4*(*idx)+1] += alpha2*(*v);
582: y[4*(*idx)+2] += alpha3*(*v);
583: y[4*(*idx)+3] += alpha4*(*v);
584: idx++; v++;
585: }
586: }
587: PetscLogFlops(8.0*a->nz);
588: VecRestoreArrayRead(xx,&x);
589: VecRestoreArray(yy,&y);
590: return(0);
591: }
595: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
596: {
597: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
598: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
599: const PetscScalar *x,*v;
600: PetscScalar *y,sum1, sum2, sum3, sum4;
601: PetscErrorCode ierr;
602: PetscInt n,i,jrow,j;
603: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
606: if (yy != zz) {VecCopy(yy,zz);}
607: VecGetArrayRead(xx,&x);
608: VecGetArray(zz,&y);
609: idx = a->j;
610: v = a->a;
611: ii = a->i;
613: for (i=0; i<m; i++) {
614: jrow = ii[i];
615: n = ii[i+1] - jrow;
616: sum1 = 0.0;
617: sum2 = 0.0;
618: sum3 = 0.0;
619: sum4 = 0.0;
620: for (j=0; j<n; j++) {
621: sum1 += v[jrow]*x[4*idx[jrow]];
622: sum2 += v[jrow]*x[4*idx[jrow]+1];
623: sum3 += v[jrow]*x[4*idx[jrow]+2];
624: sum4 += v[jrow]*x[4*idx[jrow]+3];
625: jrow++;
626: }
627: y[4*i] += sum1;
628: y[4*i+1] += sum2;
629: y[4*i+2] += sum3;
630: y[4*i+3] += sum4;
631: }
633: PetscLogFlops(8.0*a->nz);
634: VecRestoreArrayRead(xx,&x);
635: VecRestoreArray(zz,&y);
636: return(0);
637: }
640: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
641: {
642: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
643: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
644: const PetscScalar *x,*v;
645: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
646: PetscErrorCode ierr;
647: PetscInt n,i;
648: const PetscInt m = b->AIJ->rmap->n,*idx;
651: if (yy != zz) {VecCopy(yy,zz);}
652: VecGetArrayRead(xx,&x);
653: VecGetArray(zz,&y);
654:
655: for (i=0; i<m; i++) {
656: idx = a->j + a->i[i] ;
657: v = a->a + a->i[i] ;
658: n = a->i[i+1] - a->i[i];
659: alpha1 = x[4*i];
660: alpha2 = x[4*i+1];
661: alpha3 = x[4*i+2];
662: alpha4 = x[4*i+3];
663: while (n-->0) {
664: y[4*(*idx)] += alpha1*(*v);
665: y[4*(*idx)+1] += alpha2*(*v);
666: y[4*(*idx)+2] += alpha3*(*v);
667: y[4*(*idx)+3] += alpha4*(*v);
668: idx++; v++;
669: }
670: }
671: PetscLogFlops(8.0*a->nz);
672: VecRestoreArrayRead(xx,&x);
673: VecRestoreArray(zz,&y);
674: return(0);
675: }
676: /* ------------------------------------------------------------------------------*/
680: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
681: {
682: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
683: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
684: const PetscScalar *x,*v;
685: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
686: PetscErrorCode ierr;
687: PetscInt nonzerorow=0,n,i,jrow,j;
688: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
691: VecGetArrayRead(xx,&x);
692: VecGetArray(yy,&y);
693: idx = a->j;
694: v = a->a;
695: ii = a->i;
697: for (i=0; i<m; i++) {
698: jrow = ii[i];
699: n = ii[i+1] - jrow;
700: sum1 = 0.0;
701: sum2 = 0.0;
702: sum3 = 0.0;
703: sum4 = 0.0;
704: sum5 = 0.0;
705: nonzerorow += (n>0);
706: for (j=0; j<n; j++) {
707: sum1 += v[jrow]*x[5*idx[jrow]];
708: sum2 += v[jrow]*x[5*idx[jrow]+1];
709: sum3 += v[jrow]*x[5*idx[jrow]+2];
710: sum4 += v[jrow]*x[5*idx[jrow]+3];
711: sum5 += v[jrow]*x[5*idx[jrow]+4];
712: jrow++;
713: }
714: y[5*i] = sum1;
715: y[5*i+1] = sum2;
716: y[5*i+2] = sum3;
717: y[5*i+3] = sum4;
718: y[5*i+4] = sum5;
719: }
721: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
722: VecRestoreArrayRead(xx,&x);
723: VecRestoreArray(yy,&y);
724: return(0);
725: }
729: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
730: {
731: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
732: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
733: const PetscScalar *x,*v;
734: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
735: PetscErrorCode ierr;
736: PetscInt n,i;
737: const PetscInt m = b->AIJ->rmap->n,*idx;
740: VecSet(yy,0.0);
741: VecGetArrayRead(xx,&x);
742: VecGetArray(yy,&y);
743:
744: for (i=0; i<m; i++) {
745: idx = a->j + a->i[i] ;
746: v = a->a + a->i[i] ;
747: n = a->i[i+1] - a->i[i];
748: alpha1 = x[5*i];
749: alpha2 = x[5*i+1];
750: alpha3 = x[5*i+2];
751: alpha4 = x[5*i+3];
752: alpha5 = x[5*i+4];
753: while (n-->0) {
754: y[5*(*idx)] += alpha1*(*v);
755: y[5*(*idx)+1] += alpha2*(*v);
756: y[5*(*idx)+2] += alpha3*(*v);
757: y[5*(*idx)+3] += alpha4*(*v);
758: y[5*(*idx)+4] += alpha5*(*v);
759: idx++; v++;
760: }
761: }
762: PetscLogFlops(10.0*a->nz);
763: VecRestoreArrayRead(xx,&x);
764: VecRestoreArray(yy,&y);
765: return(0);
766: }
770: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
771: {
772: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
773: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
774: const PetscScalar *x,*v;
775: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
776: PetscErrorCode ierr;
777: PetscInt n,i,jrow,j;
778: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
781: if (yy != zz) {VecCopy(yy,zz);}
782: VecGetArrayRead(xx,&x);
783: VecGetArray(zz,&y);
784: idx = a->j;
785: v = a->a;
786: ii = a->i;
788: for (i=0; i<m; i++) {
789: jrow = ii[i];
790: n = ii[i+1] - jrow;
791: sum1 = 0.0;
792: sum2 = 0.0;
793: sum3 = 0.0;
794: sum4 = 0.0;
795: sum5 = 0.0;
796: for (j=0; j<n; j++) {
797: sum1 += v[jrow]*x[5*idx[jrow]];
798: sum2 += v[jrow]*x[5*idx[jrow]+1];
799: sum3 += v[jrow]*x[5*idx[jrow]+2];
800: sum4 += v[jrow]*x[5*idx[jrow]+3];
801: sum5 += v[jrow]*x[5*idx[jrow]+4];
802: jrow++;
803: }
804: y[5*i] += sum1;
805: y[5*i+1] += sum2;
806: y[5*i+2] += sum3;
807: y[5*i+3] += sum4;
808: y[5*i+4] += sum5;
809: }
811: PetscLogFlops(10.0*a->nz);
812: VecRestoreArrayRead(xx,&x);
813: VecRestoreArray(zz,&y);
814: return(0);
815: }
819: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
820: {
821: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
822: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
823: const PetscScalar *x,*v;
824: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
825: PetscErrorCode ierr;
826: PetscInt n,i;
827: const PetscInt m = b->AIJ->rmap->n,*idx;
830: if (yy != zz) {VecCopy(yy,zz);}
831: VecGetArrayRead(xx,&x);
832: VecGetArray(zz,&y);
833:
834: for (i=0; i<m; i++) {
835: idx = a->j + a->i[i] ;
836: v = a->a + a->i[i] ;
837: n = a->i[i+1] - a->i[i];
838: alpha1 = x[5*i];
839: alpha2 = x[5*i+1];
840: alpha3 = x[5*i+2];
841: alpha4 = x[5*i+3];
842: alpha5 = x[5*i+4];
843: while (n-->0) {
844: y[5*(*idx)] += alpha1*(*v);
845: y[5*(*idx)+1] += alpha2*(*v);
846: y[5*(*idx)+2] += alpha3*(*v);
847: y[5*(*idx)+3] += alpha4*(*v);
848: y[5*(*idx)+4] += alpha5*(*v);
849: idx++; v++;
850: }
851: }
852: PetscLogFlops(10.0*a->nz);
853: VecRestoreArrayRead(xx,&x);
854: VecRestoreArray(zz,&y);
855: return(0);
856: }
858: /* ------------------------------------------------------------------------------*/
861: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
862: {
863: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
864: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
865: const PetscScalar *x,*v;
866: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
867: PetscErrorCode ierr;
868: PetscInt nonzerorow=0,n,i,jrow,j;
869: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
872: VecGetArrayRead(xx,&x);
873: VecGetArray(yy,&y);
874: idx = a->j;
875: v = a->a;
876: ii = a->i;
878: for (i=0; i<m; i++) {
879: jrow = ii[i];
880: n = ii[i+1] - jrow;
881: sum1 = 0.0;
882: sum2 = 0.0;
883: sum3 = 0.0;
884: sum4 = 0.0;
885: sum5 = 0.0;
886: sum6 = 0.0;
887: nonzerorow += (n>0);
888: for (j=0; j<n; j++) {
889: sum1 += v[jrow]*x[6*idx[jrow]];
890: sum2 += v[jrow]*x[6*idx[jrow]+1];
891: sum3 += v[jrow]*x[6*idx[jrow]+2];
892: sum4 += v[jrow]*x[6*idx[jrow]+3];
893: sum5 += v[jrow]*x[6*idx[jrow]+4];
894: sum6 += v[jrow]*x[6*idx[jrow]+5];
895: jrow++;
896: }
897: y[6*i] = sum1;
898: y[6*i+1] = sum2;
899: y[6*i+2] = sum3;
900: y[6*i+3] = sum4;
901: y[6*i+4] = sum5;
902: y[6*i+5] = sum6;
903: }
905: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
906: VecRestoreArrayRead(xx,&x);
907: VecRestoreArray(yy,&y);
908: return(0);
909: }
913: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
914: {
915: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
916: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
917: const PetscScalar *x,*v;
918: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
919: PetscErrorCode ierr;
920: PetscInt n,i;
921: const PetscInt m = b->AIJ->rmap->n,*idx;
924: VecSet(yy,0.0);
925: VecGetArrayRead(xx,&x);
926: VecGetArray(yy,&y);
928: for (i=0; i<m; i++) {
929: idx = a->j + a->i[i] ;
930: v = a->a + a->i[i] ;
931: n = a->i[i+1] - a->i[i];
932: alpha1 = x[6*i];
933: alpha2 = x[6*i+1];
934: alpha3 = x[6*i+2];
935: alpha4 = x[6*i+3];
936: alpha5 = x[6*i+4];
937: alpha6 = x[6*i+5];
938: while (n-->0) {
939: y[6*(*idx)] += alpha1*(*v);
940: y[6*(*idx)+1] += alpha2*(*v);
941: y[6*(*idx)+2] += alpha3*(*v);
942: y[6*(*idx)+3] += alpha4*(*v);
943: y[6*(*idx)+4] += alpha5*(*v);
944: y[6*(*idx)+5] += alpha6*(*v);
945: idx++; v++;
946: }
947: }
948: PetscLogFlops(12.0*a->nz);
949: VecRestoreArrayRead(xx,&x);
950: VecRestoreArray(yy,&y);
951: return(0);
952: }
956: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
957: {
958: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
959: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
960: const PetscScalar *x,*v;
961: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
962: PetscErrorCode ierr;
963: PetscInt n,i,jrow,j;
964: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
967: if (yy != zz) {VecCopy(yy,zz);}
968: VecGetArrayRead(xx,&x);
969: VecGetArray(zz,&y);
970: idx = a->j;
971: v = a->a;
972: ii = a->i;
974: for (i=0; i<m; i++) {
975: jrow = ii[i];
976: n = ii[i+1] - jrow;
977: sum1 = 0.0;
978: sum2 = 0.0;
979: sum3 = 0.0;
980: sum4 = 0.0;
981: sum5 = 0.0;
982: sum6 = 0.0;
983: for (j=0; j<n; j++) {
984: sum1 += v[jrow]*x[6*idx[jrow]];
985: sum2 += v[jrow]*x[6*idx[jrow]+1];
986: sum3 += v[jrow]*x[6*idx[jrow]+2];
987: sum4 += v[jrow]*x[6*idx[jrow]+3];
988: sum5 += v[jrow]*x[6*idx[jrow]+4];
989: sum6 += v[jrow]*x[6*idx[jrow]+5];
990: jrow++;
991: }
992: y[6*i] += sum1;
993: y[6*i+1] += sum2;
994: y[6*i+2] += sum3;
995: y[6*i+3] += sum4;
996: y[6*i+4] += sum5;
997: y[6*i+5] += sum6;
998: }
1000: PetscLogFlops(12.0*a->nz);
1001: VecRestoreArrayRead(xx,&x);
1002: VecRestoreArray(zz,&y);
1003: return(0);
1004: }
1008: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1009: {
1010: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1011: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1012: const PetscScalar *x,*v;
1013: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1014: PetscErrorCode ierr;
1015: PetscInt n,i;
1016: const PetscInt m = b->AIJ->rmap->n,*idx;
1019: if (yy != zz) {VecCopy(yy,zz);}
1020: VecGetArrayRead(xx,&x);
1021: VecGetArray(zz,&y);
1022:
1023: for (i=0; i<m; i++) {
1024: idx = a->j + a->i[i] ;
1025: v = a->a + a->i[i] ;
1026: n = a->i[i+1] - a->i[i];
1027: alpha1 = x[6*i];
1028: alpha2 = x[6*i+1];
1029: alpha3 = x[6*i+2];
1030: alpha4 = x[6*i+3];
1031: alpha5 = x[6*i+4];
1032: alpha6 = x[6*i+5];
1033: while (n-->0) {
1034: y[6*(*idx)] += alpha1*(*v);
1035: y[6*(*idx)+1] += alpha2*(*v);
1036: y[6*(*idx)+2] += alpha3*(*v);
1037: y[6*(*idx)+3] += alpha4*(*v);
1038: y[6*(*idx)+4] += alpha5*(*v);
1039: y[6*(*idx)+5] += alpha6*(*v);
1040: idx++; v++;
1041: }
1042: }
1043: PetscLogFlops(12.0*a->nz);
1044: VecRestoreArrayRead(xx,&x);
1045: VecRestoreArray(zz,&y);
1046: return(0);
1047: }
1049: /* ------------------------------------------------------------------------------*/
1052: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1053: {
1054: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1055: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1056: const PetscScalar *x,*v;
1057: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1058: PetscErrorCode ierr;
1059: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1060: PetscInt nonzerorow=0,n,i,jrow,j;
1063: VecGetArrayRead(xx,&x);
1064: VecGetArray(yy,&y);
1065: idx = a->j;
1066: v = a->a;
1067: ii = a->i;
1069: for (i=0; i<m; i++) {
1070: jrow = ii[i];
1071: n = ii[i+1] - jrow;
1072: sum1 = 0.0;
1073: sum2 = 0.0;
1074: sum3 = 0.0;
1075: sum4 = 0.0;
1076: sum5 = 0.0;
1077: sum6 = 0.0;
1078: sum7 = 0.0;
1079: nonzerorow += (n>0);
1080: for (j=0; j<n; j++) {
1081: sum1 += v[jrow]*x[7*idx[jrow]];
1082: sum2 += v[jrow]*x[7*idx[jrow]+1];
1083: sum3 += v[jrow]*x[7*idx[jrow]+2];
1084: sum4 += v[jrow]*x[7*idx[jrow]+3];
1085: sum5 += v[jrow]*x[7*idx[jrow]+4];
1086: sum6 += v[jrow]*x[7*idx[jrow]+5];
1087: sum7 += v[jrow]*x[7*idx[jrow]+6];
1088: jrow++;
1089: }
1090: y[7*i] = sum1;
1091: y[7*i+1] = sum2;
1092: y[7*i+2] = sum3;
1093: y[7*i+3] = sum4;
1094: y[7*i+4] = sum5;
1095: y[7*i+5] = sum6;
1096: y[7*i+6] = sum7;
1097: }
1099: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1100: VecRestoreArrayRead(xx,&x);
1101: VecRestoreArray(yy,&y);
1102: return(0);
1103: }
1107: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1108: {
1109: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1110: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1111: const PetscScalar *x,*v;
1112: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1113: PetscErrorCode ierr;
1114: const PetscInt m = b->AIJ->rmap->n,*idx;
1115: PetscInt n,i;
1118: VecSet(yy,0.0);
1119: VecGetArrayRead(xx,&x);
1120: VecGetArray(yy,&y);
1122: for (i=0; i<m; i++) {
1123: idx = a->j + a->i[i] ;
1124: v = a->a + a->i[i] ;
1125: n = a->i[i+1] - a->i[i];
1126: alpha1 = x[7*i];
1127: alpha2 = x[7*i+1];
1128: alpha3 = x[7*i+2];
1129: alpha4 = x[7*i+3];
1130: alpha5 = x[7*i+4];
1131: alpha6 = x[7*i+5];
1132: alpha7 = x[7*i+6];
1133: while (n-->0) {
1134: y[7*(*idx)] += alpha1*(*v);
1135: y[7*(*idx)+1] += alpha2*(*v);
1136: y[7*(*idx)+2] += alpha3*(*v);
1137: y[7*(*idx)+3] += alpha4*(*v);
1138: y[7*(*idx)+4] += alpha5*(*v);
1139: y[7*(*idx)+5] += alpha6*(*v);
1140: y[7*(*idx)+6] += alpha7*(*v);
1141: idx++; v++;
1142: }
1143: }
1144: PetscLogFlops(14.0*a->nz);
1145: VecRestoreArrayRead(xx,&x);
1146: VecRestoreArray(yy,&y);
1147: return(0);
1148: }
1152: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1153: {
1154: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1155: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1156: const PetscScalar *x,*v;
1157: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1158: PetscErrorCode ierr;
1159: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1160: PetscInt n,i,jrow,j;
1163: if (yy != zz) {VecCopy(yy,zz);}
1164: VecGetArrayRead(xx,&x);
1165: VecGetArray(zz,&y);
1166: idx = a->j;
1167: v = a->a;
1168: ii = a->i;
1170: for (i=0; i<m; i++) {
1171: jrow = ii[i];
1172: n = ii[i+1] - jrow;
1173: sum1 = 0.0;
1174: sum2 = 0.0;
1175: sum3 = 0.0;
1176: sum4 = 0.0;
1177: sum5 = 0.0;
1178: sum6 = 0.0;
1179: sum7 = 0.0;
1180: for (j=0; j<n; j++) {
1181: sum1 += v[jrow]*x[7*idx[jrow]];
1182: sum2 += v[jrow]*x[7*idx[jrow]+1];
1183: sum3 += v[jrow]*x[7*idx[jrow]+2];
1184: sum4 += v[jrow]*x[7*idx[jrow]+3];
1185: sum5 += v[jrow]*x[7*idx[jrow]+4];
1186: sum6 += v[jrow]*x[7*idx[jrow]+5];
1187: sum7 += v[jrow]*x[7*idx[jrow]+6];
1188: jrow++;
1189: }
1190: y[7*i] += sum1;
1191: y[7*i+1] += sum2;
1192: y[7*i+2] += sum3;
1193: y[7*i+3] += sum4;
1194: y[7*i+4] += sum5;
1195: y[7*i+5] += sum6;
1196: y[7*i+6] += sum7;
1197: }
1199: PetscLogFlops(14.0*a->nz);
1200: VecRestoreArrayRead(xx,&x);
1201: VecRestoreArray(zz,&y);
1202: return(0);
1203: }
1207: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1208: {
1209: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1210: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1211: const PetscScalar *x,*v;
1212: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1213: PetscErrorCode ierr;
1214: const PetscInt m = b->AIJ->rmap->n,*idx;
1215: PetscInt n,i;
1218: if (yy != zz) {VecCopy(yy,zz);}
1219: VecGetArrayRead(xx,&x);
1220: VecGetArray(zz,&y);
1221: for (i=0; i<m; i++) {
1222: idx = a->j + a->i[i] ;
1223: v = a->a + a->i[i] ;
1224: n = a->i[i+1] - a->i[i];
1225: alpha1 = x[7*i];
1226: alpha2 = x[7*i+1];
1227: alpha3 = x[7*i+2];
1228: alpha4 = x[7*i+3];
1229: alpha5 = x[7*i+4];
1230: alpha6 = x[7*i+5];
1231: alpha7 = x[7*i+6];
1232: while (n-->0) {
1233: y[7*(*idx)] += alpha1*(*v);
1234: y[7*(*idx)+1] += alpha2*(*v);
1235: y[7*(*idx)+2] += alpha3*(*v);
1236: y[7*(*idx)+3] += alpha4*(*v);
1237: y[7*(*idx)+4] += alpha5*(*v);
1238: y[7*(*idx)+5] += alpha6*(*v);
1239: y[7*(*idx)+6] += alpha7*(*v);
1240: idx++; v++;
1241: }
1242: }
1243: PetscLogFlops(14.0*a->nz);
1244: VecRestoreArrayRead(xx,&x);
1245: VecRestoreArray(zz,&y);
1246: return(0);
1247: }
1251: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1252: {
1253: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1254: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1255: const PetscScalar *x,*v;
1256: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1257: PetscErrorCode ierr;
1258: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1259: PetscInt nonzerorow=0,n,i,jrow,j;
1262: VecGetArrayRead(xx,&x);
1263: VecGetArray(yy,&y);
1264: idx = a->j;
1265: v = a->a;
1266: ii = a->i;
1268: for (i=0; i<m; i++) {
1269: jrow = ii[i];
1270: n = ii[i+1] - jrow;
1271: sum1 = 0.0;
1272: sum2 = 0.0;
1273: sum3 = 0.0;
1274: sum4 = 0.0;
1275: sum5 = 0.0;
1276: sum6 = 0.0;
1277: sum7 = 0.0;
1278: sum8 = 0.0;
1279: nonzerorow += (n>0);
1280: for (j=0; j<n; j++) {
1281: sum1 += v[jrow]*x[8*idx[jrow]];
1282: sum2 += v[jrow]*x[8*idx[jrow]+1];
1283: sum3 += v[jrow]*x[8*idx[jrow]+2];
1284: sum4 += v[jrow]*x[8*idx[jrow]+3];
1285: sum5 += v[jrow]*x[8*idx[jrow]+4];
1286: sum6 += v[jrow]*x[8*idx[jrow]+5];
1287: sum7 += v[jrow]*x[8*idx[jrow]+6];
1288: sum8 += v[jrow]*x[8*idx[jrow]+7];
1289: jrow++;
1290: }
1291: y[8*i] = sum1;
1292: y[8*i+1] = sum2;
1293: y[8*i+2] = sum3;
1294: y[8*i+3] = sum4;
1295: y[8*i+4] = sum5;
1296: y[8*i+5] = sum6;
1297: y[8*i+6] = sum7;
1298: y[8*i+7] = sum8;
1299: }
1301: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1302: VecRestoreArrayRead(xx,&x);
1303: VecRestoreArray(yy,&y);
1304: return(0);
1305: }
1309: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1310: {
1311: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1312: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1313: const PetscScalar *x,*v;
1314: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1315: PetscErrorCode ierr;
1316: const PetscInt m = b->AIJ->rmap->n,*idx;
1317: PetscInt n,i;
1320: VecSet(yy,0.0);
1321: VecGetArrayRead(xx,&x);
1322: VecGetArray(yy,&y);
1324: for (i=0; i<m; i++) {
1325: idx = a->j + a->i[i] ;
1326: v = a->a + a->i[i] ;
1327: n = a->i[i+1] - a->i[i];
1328: alpha1 = x[8*i];
1329: alpha2 = x[8*i+1];
1330: alpha3 = x[8*i+2];
1331: alpha4 = x[8*i+3];
1332: alpha5 = x[8*i+4];
1333: alpha6 = x[8*i+5];
1334: alpha7 = x[8*i+6];
1335: alpha8 = x[8*i+7];
1336: while (n-->0) {
1337: y[8*(*idx)] += alpha1*(*v);
1338: y[8*(*idx)+1] += alpha2*(*v);
1339: y[8*(*idx)+2] += alpha3*(*v);
1340: y[8*(*idx)+3] += alpha4*(*v);
1341: y[8*(*idx)+4] += alpha5*(*v);
1342: y[8*(*idx)+5] += alpha6*(*v);
1343: y[8*(*idx)+6] += alpha7*(*v);
1344: y[8*(*idx)+7] += alpha8*(*v);
1345: idx++; v++;
1346: }
1347: }
1348: PetscLogFlops(16.0*a->nz);
1349: VecRestoreArrayRead(xx,&x);
1350: VecRestoreArray(yy,&y);
1351: return(0);
1352: }
1356: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1357: {
1358: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1359: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1360: const PetscScalar *x,*v;
1361: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1362: PetscErrorCode ierr;
1363: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1364: PetscInt n,i,jrow,j;
1367: if (yy != zz) {VecCopy(yy,zz);}
1368: VecGetArrayRead(xx,&x);
1369: VecGetArray(zz,&y);
1370: idx = a->j;
1371: v = a->a;
1372: ii = a->i;
1374: for (i=0; i<m; i++) {
1375: jrow = ii[i];
1376: n = ii[i+1] - jrow;
1377: sum1 = 0.0;
1378: sum2 = 0.0;
1379: sum3 = 0.0;
1380: sum4 = 0.0;
1381: sum5 = 0.0;
1382: sum6 = 0.0;
1383: sum7 = 0.0;
1384: sum8 = 0.0;
1385: for (j=0; j<n; j++) {
1386: sum1 += v[jrow]*x[8*idx[jrow]];
1387: sum2 += v[jrow]*x[8*idx[jrow]+1];
1388: sum3 += v[jrow]*x[8*idx[jrow]+2];
1389: sum4 += v[jrow]*x[8*idx[jrow]+3];
1390: sum5 += v[jrow]*x[8*idx[jrow]+4];
1391: sum6 += v[jrow]*x[8*idx[jrow]+5];
1392: sum7 += v[jrow]*x[8*idx[jrow]+6];
1393: sum8 += v[jrow]*x[8*idx[jrow]+7];
1394: jrow++;
1395: }
1396: y[8*i] += sum1;
1397: y[8*i+1] += sum2;
1398: y[8*i+2] += sum3;
1399: y[8*i+3] += sum4;
1400: y[8*i+4] += sum5;
1401: y[8*i+5] += sum6;
1402: y[8*i+6] += sum7;
1403: y[8*i+7] += sum8;
1404: }
1406: PetscLogFlops(16.0*a->nz);
1407: VecRestoreArrayRead(xx,&x);
1408: VecRestoreArray(zz,&y);
1409: return(0);
1410: }
1414: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1415: {
1416: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1417: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1418: const PetscScalar *x,*v;
1419: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1420: PetscErrorCode ierr;
1421: const PetscInt m = b->AIJ->rmap->n,*idx;
1422: PetscInt n,i;
1425: if (yy != zz) {VecCopy(yy,zz);}
1426: VecGetArrayRead(xx,&x);
1427: VecGetArray(zz,&y);
1428: for (i=0; i<m; i++) {
1429: idx = a->j + a->i[i] ;
1430: v = a->a + a->i[i] ;
1431: n = a->i[i+1] - a->i[i];
1432: alpha1 = x[8*i];
1433: alpha2 = x[8*i+1];
1434: alpha3 = x[8*i+2];
1435: alpha4 = x[8*i+3];
1436: alpha5 = x[8*i+4];
1437: alpha6 = x[8*i+5];
1438: alpha7 = x[8*i+6];
1439: alpha8 = x[8*i+7];
1440: while (n-->0) {
1441: y[8*(*idx)] += alpha1*(*v);
1442: y[8*(*idx)+1] += alpha2*(*v);
1443: y[8*(*idx)+2] += alpha3*(*v);
1444: y[8*(*idx)+3] += alpha4*(*v);
1445: y[8*(*idx)+4] += alpha5*(*v);
1446: y[8*(*idx)+5] += alpha6*(*v);
1447: y[8*(*idx)+6] += alpha7*(*v);
1448: y[8*(*idx)+7] += alpha8*(*v);
1449: idx++; v++;
1450: }
1451: }
1452: PetscLogFlops(16.0*a->nz);
1453: VecRestoreArrayRead(xx,&x);
1454: VecRestoreArray(zz,&y);
1455: return(0);
1456: }
1458: /* ------------------------------------------------------------------------------*/
1461: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1462: {
1463: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1464: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1465: const PetscScalar *x,*v;
1466: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1467: PetscErrorCode ierr;
1468: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1469: PetscInt nonzerorow=0,n,i,jrow,j;
1472: VecGetArrayRead(xx,&x);
1473: VecGetArray(yy,&y);
1474: idx = a->j;
1475: v = a->a;
1476: ii = a->i;
1478: for (i=0; i<m; i++) {
1479: jrow = ii[i];
1480: n = ii[i+1] - jrow;
1481: sum1 = 0.0;
1482: sum2 = 0.0;
1483: sum3 = 0.0;
1484: sum4 = 0.0;
1485: sum5 = 0.0;
1486: sum6 = 0.0;
1487: sum7 = 0.0;
1488: sum8 = 0.0;
1489: sum9 = 0.0;
1490: nonzerorow += (n>0);
1491: for (j=0; j<n; j++) {
1492: sum1 += v[jrow]*x[9*idx[jrow]];
1493: sum2 += v[jrow]*x[9*idx[jrow]+1];
1494: sum3 += v[jrow]*x[9*idx[jrow]+2];
1495: sum4 += v[jrow]*x[9*idx[jrow]+3];
1496: sum5 += v[jrow]*x[9*idx[jrow]+4];
1497: sum6 += v[jrow]*x[9*idx[jrow]+5];
1498: sum7 += v[jrow]*x[9*idx[jrow]+6];
1499: sum8 += v[jrow]*x[9*idx[jrow]+7];
1500: sum9 += v[jrow]*x[9*idx[jrow]+8];
1501: jrow++;
1502: }
1503: y[9*i] = sum1;
1504: y[9*i+1] = sum2;
1505: y[9*i+2] = sum3;
1506: y[9*i+3] = sum4;
1507: y[9*i+4] = sum5;
1508: y[9*i+5] = sum6;
1509: y[9*i+6] = sum7;
1510: y[9*i+7] = sum8;
1511: y[9*i+8] = sum9;
1512: }
1514: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1515: VecRestoreArrayRead(xx,&x);
1516: VecRestoreArray(yy,&y);
1517: return(0);
1518: }
1520: /* ------------------------------------------------------------------------------*/
1524: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1525: {
1526: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1527: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1528: const PetscScalar *x,*v;
1529: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1530: PetscErrorCode ierr;
1531: const PetscInt m = b->AIJ->rmap->n,*idx;
1532: PetscInt n,i;
1535: VecSet(yy,0.0);
1536: VecGetArrayRead(xx,&x);
1537: VecGetArray(yy,&y);
1539: for (i=0; i<m; i++) {
1540: idx = a->j + a->i[i] ;
1541: v = a->a + a->i[i] ;
1542: n = a->i[i+1] - a->i[i];
1543: alpha1 = x[9*i];
1544: alpha2 = x[9*i+1];
1545: alpha3 = x[9*i+2];
1546: alpha4 = x[9*i+3];
1547: alpha5 = x[9*i+4];
1548: alpha6 = x[9*i+5];
1549: alpha7 = x[9*i+6];
1550: alpha8 = x[9*i+7];
1551: alpha9 = x[9*i+8];
1552: while (n-->0) {
1553: y[9*(*idx)] += alpha1*(*v);
1554: y[9*(*idx)+1] += alpha2*(*v);
1555: y[9*(*idx)+2] += alpha3*(*v);
1556: y[9*(*idx)+3] += alpha4*(*v);
1557: y[9*(*idx)+4] += alpha5*(*v);
1558: y[9*(*idx)+5] += alpha6*(*v);
1559: y[9*(*idx)+6] += alpha7*(*v);
1560: y[9*(*idx)+7] += alpha8*(*v);
1561: y[9*(*idx)+8] += alpha9*(*v);
1562: idx++; v++;
1563: }
1564: }
1565: PetscLogFlops(18.0*a->nz);
1566: VecRestoreArrayRead(xx,&x);
1567: VecRestoreArray(yy,&y);
1568: return(0);
1569: }
1573: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1574: {
1575: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1576: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1577: const PetscScalar *x,*v;
1578: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1579: PetscErrorCode ierr;
1580: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1581: PetscInt n,i,jrow,j;
1584: if (yy != zz) {VecCopy(yy,zz);}
1585: VecGetArrayRead(xx,&x);
1586: VecGetArray(zz,&y);
1587: idx = a->j;
1588: v = a->a;
1589: ii = a->i;
1591: for (i=0; i<m; i++) {
1592: jrow = ii[i];
1593: n = ii[i+1] - jrow;
1594: sum1 = 0.0;
1595: sum2 = 0.0;
1596: sum3 = 0.0;
1597: sum4 = 0.0;
1598: sum5 = 0.0;
1599: sum6 = 0.0;
1600: sum7 = 0.0;
1601: sum8 = 0.0;
1602: sum9 = 0.0;
1603: for (j=0; j<n; j++) {
1604: sum1 += v[jrow]*x[9*idx[jrow]];
1605: sum2 += v[jrow]*x[9*idx[jrow]+1];
1606: sum3 += v[jrow]*x[9*idx[jrow]+2];
1607: sum4 += v[jrow]*x[9*idx[jrow]+3];
1608: sum5 += v[jrow]*x[9*idx[jrow]+4];
1609: sum6 += v[jrow]*x[9*idx[jrow]+5];
1610: sum7 += v[jrow]*x[9*idx[jrow]+6];
1611: sum8 += v[jrow]*x[9*idx[jrow]+7];
1612: sum9 += v[jrow]*x[9*idx[jrow]+8];
1613: jrow++;
1614: }
1615: y[9*i] += sum1;
1616: y[9*i+1] += sum2;
1617: y[9*i+2] += sum3;
1618: y[9*i+3] += sum4;
1619: y[9*i+4] += sum5;
1620: y[9*i+5] += sum6;
1621: y[9*i+6] += sum7;
1622: y[9*i+7] += sum8;
1623: y[9*i+8] += sum9;
1624: }
1626: PetscLogFlops(18*a->nz);
1627: VecRestoreArrayRead(xx,&x);
1628: VecRestoreArray(zz,&y);
1629: return(0);
1630: }
1634: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1635: {
1636: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1637: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1638: const PetscScalar *x,*v;
1639: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1640: PetscErrorCode ierr;
1641: const PetscInt m = b->AIJ->rmap->n,*idx;
1642: PetscInt n,i;
1645: if (yy != zz) {VecCopy(yy,zz);}
1646: VecGetArrayRead(xx,&x);
1647: VecGetArray(zz,&y);
1648: for (i=0; i<m; i++) {
1649: idx = a->j + a->i[i] ;
1650: v = a->a + a->i[i] ;
1651: n = a->i[i+1] - a->i[i];
1652: alpha1 = x[9*i];
1653: alpha2 = x[9*i+1];
1654: alpha3 = x[9*i+2];
1655: alpha4 = x[9*i+3];
1656: alpha5 = x[9*i+4];
1657: alpha6 = x[9*i+5];
1658: alpha7 = x[9*i+6];
1659: alpha8 = x[9*i+7];
1660: alpha9 = x[9*i+8];
1661: while (n-->0) {
1662: y[9*(*idx)] += alpha1*(*v);
1663: y[9*(*idx)+1] += alpha2*(*v);
1664: y[9*(*idx)+2] += alpha3*(*v);
1665: y[9*(*idx)+3] += alpha4*(*v);
1666: y[9*(*idx)+4] += alpha5*(*v);
1667: y[9*(*idx)+5] += alpha6*(*v);
1668: y[9*(*idx)+6] += alpha7*(*v);
1669: y[9*(*idx)+7] += alpha8*(*v);
1670: y[9*(*idx)+8] += alpha9*(*v);
1671: idx++; v++;
1672: }
1673: }
1674: PetscLogFlops(18.0*a->nz);
1675: VecRestoreArrayRead(xx,&x);
1676: VecRestoreArray(zz,&y);
1677: return(0);
1678: }
1681: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1682: {
1683: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1684: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1685: const PetscScalar *x,*v;
1686: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1687: PetscErrorCode ierr;
1688: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1689: PetscInt nonzerorow=0,n,i,jrow,j;
1692: VecGetArrayRead(xx,&x);
1693: VecGetArray(yy,&y);
1694: idx = a->j;
1695: v = a->a;
1696: ii = a->i;
1698: for (i=0; i<m; i++) {
1699: jrow = ii[i];
1700: n = ii[i+1] - jrow;
1701: sum1 = 0.0;
1702: sum2 = 0.0;
1703: sum3 = 0.0;
1704: sum4 = 0.0;
1705: sum5 = 0.0;
1706: sum6 = 0.0;
1707: sum7 = 0.0;
1708: sum8 = 0.0;
1709: sum9 = 0.0;
1710: sum10 = 0.0;
1711: nonzerorow += (n>0);
1712: for (j=0; j<n; j++) {
1713: sum1 += v[jrow]*x[10*idx[jrow]];
1714: sum2 += v[jrow]*x[10*idx[jrow]+1];
1715: sum3 += v[jrow]*x[10*idx[jrow]+2];
1716: sum4 += v[jrow]*x[10*idx[jrow]+3];
1717: sum5 += v[jrow]*x[10*idx[jrow]+4];
1718: sum6 += v[jrow]*x[10*idx[jrow]+5];
1719: sum7 += v[jrow]*x[10*idx[jrow]+6];
1720: sum8 += v[jrow]*x[10*idx[jrow]+7];
1721: sum9 += v[jrow]*x[10*idx[jrow]+8];
1722: sum10 += v[jrow]*x[10*idx[jrow]+9];
1723: jrow++;
1724: }
1725: y[10*i] = sum1;
1726: y[10*i+1] = sum2;
1727: y[10*i+2] = sum3;
1728: y[10*i+3] = sum4;
1729: y[10*i+4] = sum5;
1730: y[10*i+5] = sum6;
1731: y[10*i+6] = sum7;
1732: y[10*i+7] = sum8;
1733: y[10*i+8] = sum9;
1734: y[10*i+9] = sum10;
1735: }
1737: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1738: VecRestoreArrayRead(xx,&x);
1739: VecRestoreArray(yy,&y);
1740: return(0);
1741: }
1745: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1746: {
1747: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1748: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1749: const PetscScalar *x,*v;
1750: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1751: PetscErrorCode ierr;
1752: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1753: PetscInt n,i,jrow,j;
1756: if (yy != zz) {VecCopy(yy,zz);}
1757: VecGetArrayRead(xx,&x);
1758: VecGetArray(zz,&y);
1759: idx = a->j;
1760: v = a->a;
1761: ii = a->i;
1763: for (i=0; i<m; i++) {
1764: jrow = ii[i];
1765: n = ii[i+1] - jrow;
1766: sum1 = 0.0;
1767: sum2 = 0.0;
1768: sum3 = 0.0;
1769: sum4 = 0.0;
1770: sum5 = 0.0;
1771: sum6 = 0.0;
1772: sum7 = 0.0;
1773: sum8 = 0.0;
1774: sum9 = 0.0;
1775: sum10 = 0.0;
1776: for (j=0; j<n; j++) {
1777: sum1 += v[jrow]*x[10*idx[jrow]];
1778: sum2 += v[jrow]*x[10*idx[jrow]+1];
1779: sum3 += v[jrow]*x[10*idx[jrow]+2];
1780: sum4 += v[jrow]*x[10*idx[jrow]+3];
1781: sum5 += v[jrow]*x[10*idx[jrow]+4];
1782: sum6 += v[jrow]*x[10*idx[jrow]+5];
1783: sum7 += v[jrow]*x[10*idx[jrow]+6];
1784: sum8 += v[jrow]*x[10*idx[jrow]+7];
1785: sum9 += v[jrow]*x[10*idx[jrow]+8];
1786: sum10 += v[jrow]*x[10*idx[jrow]+9];
1787: jrow++;
1788: }
1789: y[10*i] += sum1;
1790: y[10*i+1] += sum2;
1791: y[10*i+2] += sum3;
1792: y[10*i+3] += sum4;
1793: y[10*i+4] += sum5;
1794: y[10*i+5] += sum6;
1795: y[10*i+6] += sum7;
1796: y[10*i+7] += sum8;
1797: y[10*i+8] += sum9;
1798: y[10*i+9] += sum10;
1799: }
1801: PetscLogFlops(20.0*a->nz);
1802: VecRestoreArrayRead(xx,&x);
1803: VecRestoreArray(yy,&y);
1804: return(0);
1805: }
1809: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1810: {
1811: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1812: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1813: const PetscScalar *x,*v;
1814: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1815: PetscErrorCode ierr;
1816: const PetscInt m = b->AIJ->rmap->n,*idx;
1817: PetscInt n,i;
1820: VecSet(yy,0.0);
1821: VecGetArrayRead(xx,&x);
1822: VecGetArray(yy,&y);
1824: for (i=0; i<m; i++) {
1825: idx = a->j + a->i[i] ;
1826: v = a->a + a->i[i] ;
1827: n = a->i[i+1] - a->i[i];
1828: alpha1 = x[10*i];
1829: alpha2 = x[10*i+1];
1830: alpha3 = x[10*i+2];
1831: alpha4 = x[10*i+3];
1832: alpha5 = x[10*i+4];
1833: alpha6 = x[10*i+5];
1834: alpha7 = x[10*i+6];
1835: alpha8 = x[10*i+7];
1836: alpha9 = x[10*i+8];
1837: alpha10 = x[10*i+9];
1838: while (n-->0) {
1839: y[10*(*idx)] += alpha1*(*v);
1840: y[10*(*idx)+1] += alpha2*(*v);
1841: y[10*(*idx)+2] += alpha3*(*v);
1842: y[10*(*idx)+3] += alpha4*(*v);
1843: y[10*(*idx)+4] += alpha5*(*v);
1844: y[10*(*idx)+5] += alpha6*(*v);
1845: y[10*(*idx)+6] += alpha7*(*v);
1846: y[10*(*idx)+7] += alpha8*(*v);
1847: y[10*(*idx)+8] += alpha9*(*v);
1848: y[10*(*idx)+9] += alpha10*(*v);
1849: idx++; v++;
1850: }
1851: }
1852: PetscLogFlops(20.0*a->nz);
1853: VecRestoreArrayRead(xx,&x);
1854: VecRestoreArray(yy,&y);
1855: return(0);
1856: }
1860: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1861: {
1862: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1863: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1864: const PetscScalar *x,*v;
1865: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1866: PetscErrorCode ierr;
1867: const PetscInt m = b->AIJ->rmap->n,*idx;
1868: PetscInt n,i;
1871: if (yy != zz) {VecCopy(yy,zz);}
1872: VecGetArrayRead(xx,&x);
1873: VecGetArray(zz,&y);
1874: for (i=0; i<m; i++) {
1875: idx = a->j + a->i[i] ;
1876: v = a->a + a->i[i] ;
1877: n = a->i[i+1] - a->i[i];
1878: alpha1 = x[10*i];
1879: alpha2 = x[10*i+1];
1880: alpha3 = x[10*i+2];
1881: alpha4 = x[10*i+3];
1882: alpha5 = x[10*i+4];
1883: alpha6 = x[10*i+5];
1884: alpha7 = x[10*i+6];
1885: alpha8 = x[10*i+7];
1886: alpha9 = x[10*i+8];
1887: alpha10 = x[10*i+9];
1888: while (n-->0) {
1889: y[10*(*idx)] += alpha1*(*v);
1890: y[10*(*idx)+1] += alpha2*(*v);
1891: y[10*(*idx)+2] += alpha3*(*v);
1892: y[10*(*idx)+3] += alpha4*(*v);
1893: y[10*(*idx)+4] += alpha5*(*v);
1894: y[10*(*idx)+5] += alpha6*(*v);
1895: y[10*(*idx)+6] += alpha7*(*v);
1896: y[10*(*idx)+7] += alpha8*(*v);
1897: y[10*(*idx)+8] += alpha9*(*v);
1898: y[10*(*idx)+9] += alpha10*(*v);
1899: idx++; v++;
1900: }
1901: }
1902: PetscLogFlops(20.0*a->nz);
1903: VecRestoreArrayRead(xx,&x);
1904: VecRestoreArray(zz,&y);
1905: return(0);
1906: }
1909: /*--------------------------------------------------------------------------------------------*/
1912: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1913: {
1914: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1915: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1916: const PetscScalar *x,*v;
1917: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1918: PetscErrorCode ierr;
1919: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1920: PetscInt nonzerorow=0,n,i,jrow,j;
1923: VecGetArrayRead(xx,&x);
1924: VecGetArray(yy,&y);
1925: idx = a->j;
1926: v = a->a;
1927: ii = a->i;
1929: for (i=0; i<m; i++) {
1930: jrow = ii[i];
1931: n = ii[i+1] - jrow;
1932: sum1 = 0.0;
1933: sum2 = 0.0;
1934: sum3 = 0.0;
1935: sum4 = 0.0;
1936: sum5 = 0.0;
1937: sum6 = 0.0;
1938: sum7 = 0.0;
1939: sum8 = 0.0;
1940: sum9 = 0.0;
1941: sum10 = 0.0;
1942: sum11 = 0.0;
1943: nonzerorow += (n>0);
1944: for (j=0; j<n; j++) {
1945: sum1 += v[jrow]*x[11*idx[jrow]];
1946: sum2 += v[jrow]*x[11*idx[jrow]+1];
1947: sum3 += v[jrow]*x[11*idx[jrow]+2];
1948: sum4 += v[jrow]*x[11*idx[jrow]+3];
1949: sum5 += v[jrow]*x[11*idx[jrow]+4];
1950: sum6 += v[jrow]*x[11*idx[jrow]+5];
1951: sum7 += v[jrow]*x[11*idx[jrow]+6];
1952: sum8 += v[jrow]*x[11*idx[jrow]+7];
1953: sum9 += v[jrow]*x[11*idx[jrow]+8];
1954: sum10 += v[jrow]*x[11*idx[jrow]+9];
1955: sum11 += v[jrow]*x[11*idx[jrow]+10];
1956: jrow++;
1957: }
1958: y[11*i] = sum1;
1959: y[11*i+1] = sum2;
1960: y[11*i+2] = sum3;
1961: y[11*i+3] = sum4;
1962: y[11*i+4] = sum5;
1963: y[11*i+5] = sum6;
1964: y[11*i+6] = sum7;
1965: y[11*i+7] = sum8;
1966: y[11*i+8] = sum9;
1967: y[11*i+9] = sum10;
1968: y[11*i+10] = sum11;
1969: }
1971: PetscLogFlops(22*a->nz - 11*nonzerorow);
1972: VecRestoreArrayRead(xx,&x);
1973: VecRestoreArray(yy,&y);
1974: return(0);
1975: }
1979: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1980: {
1981: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1982: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1983: const PetscScalar *x,*v;
1984: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1985: PetscErrorCode ierr;
1986: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1987: PetscInt n,i,jrow,j;
1990: if (yy != zz) {VecCopy(yy,zz);}
1991: VecGetArrayRead(xx,&x);
1992: VecGetArray(zz,&y);
1993: idx = a->j;
1994: v = a->a;
1995: ii = a->i;
1997: for (i=0; i<m; i++) {
1998: jrow = ii[i];
1999: n = ii[i+1] - jrow;
2000: sum1 = 0.0;
2001: sum2 = 0.0;
2002: sum3 = 0.0;
2003: sum4 = 0.0;
2004: sum5 = 0.0;
2005: sum6 = 0.0;
2006: sum7 = 0.0;
2007: sum8 = 0.0;
2008: sum9 = 0.0;
2009: sum10 = 0.0;
2010: sum11 = 0.0;
2011: for (j=0; j<n; j++) {
2012: sum1 += v[jrow]*x[11*idx[jrow]];
2013: sum2 += v[jrow]*x[11*idx[jrow]+1];
2014: sum3 += v[jrow]*x[11*idx[jrow]+2];
2015: sum4 += v[jrow]*x[11*idx[jrow]+3];
2016: sum5 += v[jrow]*x[11*idx[jrow]+4];
2017: sum6 += v[jrow]*x[11*idx[jrow]+5];
2018: sum7 += v[jrow]*x[11*idx[jrow]+6];
2019: sum8 += v[jrow]*x[11*idx[jrow]+7];
2020: sum9 += v[jrow]*x[11*idx[jrow]+8];
2021: sum10 += v[jrow]*x[11*idx[jrow]+9];
2022: sum11 += v[jrow]*x[11*idx[jrow]+10];
2023: jrow++;
2024: }
2025: y[11*i] += sum1;
2026: y[11*i+1] += sum2;
2027: y[11*i+2] += sum3;
2028: y[11*i+3] += sum4;
2029: y[11*i+4] += sum5;
2030: y[11*i+5] += sum6;
2031: y[11*i+6] += sum7;
2032: y[11*i+7] += sum8;
2033: y[11*i+8] += sum9;
2034: y[11*i+9] += sum10;
2035: y[11*i+10] += sum11;
2036: }
2038: PetscLogFlops(22*a->nz);
2039: VecRestoreArrayRead(xx,&x);
2040: VecRestoreArray(yy,&y);
2041: return(0);
2042: }
2046: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2047: {
2048: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2049: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2050: const PetscScalar *x,*v;
2051: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2052: PetscErrorCode ierr;
2053: const PetscInt m = b->AIJ->rmap->n,*idx;
2054: PetscInt n,i;
2057: VecSet(yy,0.0);
2058: VecGetArrayRead(xx,&x);
2059: VecGetArray(yy,&y);
2061: for (i=0; i<m; i++) {
2062: idx = a->j + a->i[i] ;
2063: v = a->a + a->i[i] ;
2064: n = a->i[i+1] - a->i[i];
2065: alpha1 = x[11*i];
2066: alpha2 = x[11*i+1];
2067: alpha3 = x[11*i+2];
2068: alpha4 = x[11*i+3];
2069: alpha5 = x[11*i+4];
2070: alpha6 = x[11*i+5];
2071: alpha7 = x[11*i+6];
2072: alpha8 = x[11*i+7];
2073: alpha9 = x[11*i+8];
2074: alpha10 = x[11*i+9];
2075: alpha11 = x[11*i+10];
2076: while (n-->0) {
2077: y[11*(*idx)] += alpha1*(*v);
2078: y[11*(*idx)+1] += alpha2*(*v);
2079: y[11*(*idx)+2] += alpha3*(*v);
2080: y[11*(*idx)+3] += alpha4*(*v);
2081: y[11*(*idx)+4] += alpha5*(*v);
2082: y[11*(*idx)+5] += alpha6*(*v);
2083: y[11*(*idx)+6] += alpha7*(*v);
2084: y[11*(*idx)+7] += alpha8*(*v);
2085: y[11*(*idx)+8] += alpha9*(*v);
2086: y[11*(*idx)+9] += alpha10*(*v);
2087: y[11*(*idx)+10] += alpha11*(*v);
2088: idx++; v++;
2089: }
2090: }
2091: PetscLogFlops(22*a->nz);
2092: VecRestoreArrayRead(xx,&x);
2093: VecRestoreArray(yy,&y);
2094: return(0);
2095: }
2099: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2100: {
2101: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2102: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2103: const PetscScalar *x,*v;
2104: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2105: PetscErrorCode ierr;
2106: const PetscInt m = b->AIJ->rmap->n,*idx;
2107: PetscInt n,i;
2110: if (yy != zz) {VecCopy(yy,zz);}
2111: VecGetArrayRead(xx,&x);
2112: VecGetArray(zz,&y);
2113: for (i=0; i<m; i++) {
2114: idx = a->j + a->i[i] ;
2115: v = a->a + a->i[i] ;
2116: n = a->i[i+1] - a->i[i];
2117: alpha1 = x[11*i];
2118: alpha2 = x[11*i+1];
2119: alpha3 = x[11*i+2];
2120: alpha4 = x[11*i+3];
2121: alpha5 = x[11*i+4];
2122: alpha6 = x[11*i+5];
2123: alpha7 = x[11*i+6];
2124: alpha8 = x[11*i+7];
2125: alpha9 = x[11*i+8];
2126: alpha10 = x[11*i+9];
2127: alpha11 = x[11*i+10];
2128: while (n-->0) {
2129: y[11*(*idx)] += alpha1*(*v);
2130: y[11*(*idx)+1] += alpha2*(*v);
2131: y[11*(*idx)+2] += alpha3*(*v);
2132: y[11*(*idx)+3] += alpha4*(*v);
2133: y[11*(*idx)+4] += alpha5*(*v);
2134: y[11*(*idx)+5] += alpha6*(*v);
2135: y[11*(*idx)+6] += alpha7*(*v);
2136: y[11*(*idx)+7] += alpha8*(*v);
2137: y[11*(*idx)+8] += alpha9*(*v);
2138: y[11*(*idx)+9] += alpha10*(*v);
2139: y[11*(*idx)+10] += alpha11*(*v);
2140: idx++; v++;
2141: }
2142: }
2143: PetscLogFlops(22*a->nz);
2144: VecRestoreArrayRead(xx,&x);
2145: VecRestoreArray(zz,&y);
2146: return(0);
2147: }
2150: /*--------------------------------------------------------------------------------------------*/
2153: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2154: {
2155: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2156: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2157: const PetscScalar *x,*v;
2158: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2159: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2160: PetscErrorCode ierr;
2161: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2162: PetscInt nonzerorow=0,n,i,jrow,j;
2165: VecGetArrayRead(xx,&x);
2166: VecGetArray(yy,&y);
2167: idx = a->j;
2168: v = a->a;
2169: ii = a->i;
2171: for (i=0; i<m; i++) {
2172: jrow = ii[i];
2173: n = ii[i+1] - jrow;
2174: sum1 = 0.0;
2175: sum2 = 0.0;
2176: sum3 = 0.0;
2177: sum4 = 0.0;
2178: sum5 = 0.0;
2179: sum6 = 0.0;
2180: sum7 = 0.0;
2181: sum8 = 0.0;
2182: sum9 = 0.0;
2183: sum10 = 0.0;
2184: sum11 = 0.0;
2185: sum12 = 0.0;
2186: sum13 = 0.0;
2187: sum14 = 0.0;
2188: sum15 = 0.0;
2189: sum16 = 0.0;
2190: nonzerorow += (n>0);
2191: for (j=0; j<n; j++) {
2192: sum1 += v[jrow]*x[16*idx[jrow]];
2193: sum2 += v[jrow]*x[16*idx[jrow]+1];
2194: sum3 += v[jrow]*x[16*idx[jrow]+2];
2195: sum4 += v[jrow]*x[16*idx[jrow]+3];
2196: sum5 += v[jrow]*x[16*idx[jrow]+4];
2197: sum6 += v[jrow]*x[16*idx[jrow]+5];
2198: sum7 += v[jrow]*x[16*idx[jrow]+6];
2199: sum8 += v[jrow]*x[16*idx[jrow]+7];
2200: sum9 += v[jrow]*x[16*idx[jrow]+8];
2201: sum10 += v[jrow]*x[16*idx[jrow]+9];
2202: sum11 += v[jrow]*x[16*idx[jrow]+10];
2203: sum12 += v[jrow]*x[16*idx[jrow]+11];
2204: sum13 += v[jrow]*x[16*idx[jrow]+12];
2205: sum14 += v[jrow]*x[16*idx[jrow]+13];
2206: sum15 += v[jrow]*x[16*idx[jrow]+14];
2207: sum16 += v[jrow]*x[16*idx[jrow]+15];
2208: jrow++;
2209: }
2210: y[16*i] = sum1;
2211: y[16*i+1] = sum2;
2212: y[16*i+2] = sum3;
2213: y[16*i+3] = sum4;
2214: y[16*i+4] = sum5;
2215: y[16*i+5] = sum6;
2216: y[16*i+6] = sum7;
2217: y[16*i+7] = sum8;
2218: y[16*i+8] = sum9;
2219: y[16*i+9] = sum10;
2220: y[16*i+10] = sum11;
2221: y[16*i+11] = sum12;
2222: y[16*i+12] = sum13;
2223: y[16*i+13] = sum14;
2224: y[16*i+14] = sum15;
2225: y[16*i+15] = sum16;
2226: }
2228: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2229: VecRestoreArrayRead(xx,&x);
2230: VecRestoreArray(yy,&y);
2231: return(0);
2232: }
2236: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2237: {
2238: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2239: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2240: const PetscScalar *x,*v;
2241: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2242: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2243: PetscErrorCode ierr;
2244: const PetscInt m = b->AIJ->rmap->n,*idx;
2245: PetscInt n,i;
2248: VecSet(yy,0.0);
2249: VecGetArrayRead(xx,&x);
2250: VecGetArray(yy,&y);
2252: for (i=0; i<m; i++) {
2253: idx = a->j + a->i[i] ;
2254: v = a->a + a->i[i] ;
2255: n = a->i[i+1] - a->i[i];
2256: alpha1 = x[16*i];
2257: alpha2 = x[16*i+1];
2258: alpha3 = x[16*i+2];
2259: alpha4 = x[16*i+3];
2260: alpha5 = x[16*i+4];
2261: alpha6 = x[16*i+5];
2262: alpha7 = x[16*i+6];
2263: alpha8 = x[16*i+7];
2264: alpha9 = x[16*i+8];
2265: alpha10 = x[16*i+9];
2266: alpha11 = x[16*i+10];
2267: alpha12 = x[16*i+11];
2268: alpha13 = x[16*i+12];
2269: alpha14 = x[16*i+13];
2270: alpha15 = x[16*i+14];
2271: alpha16 = x[16*i+15];
2272: while (n-->0) {
2273: y[16*(*idx)] += alpha1*(*v);
2274: y[16*(*idx)+1] += alpha2*(*v);
2275: y[16*(*idx)+2] += alpha3*(*v);
2276: y[16*(*idx)+3] += alpha4*(*v);
2277: y[16*(*idx)+4] += alpha5*(*v);
2278: y[16*(*idx)+5] += alpha6*(*v);
2279: y[16*(*idx)+6] += alpha7*(*v);
2280: y[16*(*idx)+7] += alpha8*(*v);
2281: y[16*(*idx)+8] += alpha9*(*v);
2282: y[16*(*idx)+9] += alpha10*(*v);
2283: y[16*(*idx)+10] += alpha11*(*v);
2284: y[16*(*idx)+11] += alpha12*(*v);
2285: y[16*(*idx)+12] += alpha13*(*v);
2286: y[16*(*idx)+13] += alpha14*(*v);
2287: y[16*(*idx)+14] += alpha15*(*v);
2288: y[16*(*idx)+15] += alpha16*(*v);
2289: idx++; v++;
2290: }
2291: }
2292: PetscLogFlops(32.0*a->nz);
2293: VecRestoreArrayRead(xx,&x);
2294: VecRestoreArray(yy,&y);
2295: return(0);
2296: }
2300: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2301: {
2302: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2303: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2304: const PetscScalar *x,*v;
2305: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2306: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2307: PetscErrorCode ierr;
2308: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2309: PetscInt n,i,jrow,j;
2312: if (yy != zz) {VecCopy(yy,zz);}
2313: VecGetArrayRead(xx,&x);
2314: VecGetArray(zz,&y);
2315: idx = a->j;
2316: v = a->a;
2317: ii = a->i;
2319: for (i=0; i<m; i++) {
2320: jrow = ii[i];
2321: n = ii[i+1] - jrow;
2322: sum1 = 0.0;
2323: sum2 = 0.0;
2324: sum3 = 0.0;
2325: sum4 = 0.0;
2326: sum5 = 0.0;
2327: sum6 = 0.0;
2328: sum7 = 0.0;
2329: sum8 = 0.0;
2330: sum9 = 0.0;
2331: sum10 = 0.0;
2332: sum11 = 0.0;
2333: sum12 = 0.0;
2334: sum13 = 0.0;
2335: sum14 = 0.0;
2336: sum15 = 0.0;
2337: sum16 = 0.0;
2338: for (j=0; j<n; j++) {
2339: sum1 += v[jrow]*x[16*idx[jrow]];
2340: sum2 += v[jrow]*x[16*idx[jrow]+1];
2341: sum3 += v[jrow]*x[16*idx[jrow]+2];
2342: sum4 += v[jrow]*x[16*idx[jrow]+3];
2343: sum5 += v[jrow]*x[16*idx[jrow]+4];
2344: sum6 += v[jrow]*x[16*idx[jrow]+5];
2345: sum7 += v[jrow]*x[16*idx[jrow]+6];
2346: sum8 += v[jrow]*x[16*idx[jrow]+7];
2347: sum9 += v[jrow]*x[16*idx[jrow]+8];
2348: sum10 += v[jrow]*x[16*idx[jrow]+9];
2349: sum11 += v[jrow]*x[16*idx[jrow]+10];
2350: sum12 += v[jrow]*x[16*idx[jrow]+11];
2351: sum13 += v[jrow]*x[16*idx[jrow]+12];
2352: sum14 += v[jrow]*x[16*idx[jrow]+13];
2353: sum15 += v[jrow]*x[16*idx[jrow]+14];
2354: sum16 += v[jrow]*x[16*idx[jrow]+15];
2355: jrow++;
2356: }
2357: y[16*i] += sum1;
2358: y[16*i+1] += sum2;
2359: y[16*i+2] += sum3;
2360: y[16*i+3] += sum4;
2361: y[16*i+4] += sum5;
2362: y[16*i+5] += sum6;
2363: y[16*i+6] += sum7;
2364: y[16*i+7] += sum8;
2365: y[16*i+8] += sum9;
2366: y[16*i+9] += sum10;
2367: y[16*i+10] += sum11;
2368: y[16*i+11] += sum12;
2369: y[16*i+12] += sum13;
2370: y[16*i+13] += sum14;
2371: y[16*i+14] += sum15;
2372: y[16*i+15] += sum16;
2373: }
2375: PetscLogFlops(32.0*a->nz);
2376: VecRestoreArrayRead(xx,&x);
2377: VecRestoreArray(zz,&y);
2378: return(0);
2379: }
2383: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2384: {
2385: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2386: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2387: const PetscScalar *x,*v;
2388: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2389: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2390: PetscErrorCode ierr;
2391: const PetscInt m = b->AIJ->rmap->n,*idx;
2392: PetscInt n,i;
2395: if (yy != zz) {VecCopy(yy,zz);}
2396: VecGetArrayRead(xx,&x);
2397: VecGetArray(zz,&y);
2398: for (i=0; i<m; i++) {
2399: idx = a->j + a->i[i] ;
2400: v = a->a + a->i[i] ;
2401: n = a->i[i+1] - a->i[i];
2402: alpha1 = x[16*i];
2403: alpha2 = x[16*i+1];
2404: alpha3 = x[16*i+2];
2405: alpha4 = x[16*i+3];
2406: alpha5 = x[16*i+4];
2407: alpha6 = x[16*i+5];
2408: alpha7 = x[16*i+6];
2409: alpha8 = x[16*i+7];
2410: alpha9 = x[16*i+8];
2411: alpha10 = x[16*i+9];
2412: alpha11 = x[16*i+10];
2413: alpha12 = x[16*i+11];
2414: alpha13 = x[16*i+12];
2415: alpha14 = x[16*i+13];
2416: alpha15 = x[16*i+14];
2417: alpha16 = x[16*i+15];
2418: while (n-->0) {
2419: y[16*(*idx)] += alpha1*(*v);
2420: y[16*(*idx)+1] += alpha2*(*v);
2421: y[16*(*idx)+2] += alpha3*(*v);
2422: y[16*(*idx)+3] += alpha4*(*v);
2423: y[16*(*idx)+4] += alpha5*(*v);
2424: y[16*(*idx)+5] += alpha6*(*v);
2425: y[16*(*idx)+6] += alpha7*(*v);
2426: y[16*(*idx)+7] += alpha8*(*v);
2427: y[16*(*idx)+8] += alpha9*(*v);
2428: y[16*(*idx)+9] += alpha10*(*v);
2429: y[16*(*idx)+10] += alpha11*(*v);
2430: y[16*(*idx)+11] += alpha12*(*v);
2431: y[16*(*idx)+12] += alpha13*(*v);
2432: y[16*(*idx)+13] += alpha14*(*v);
2433: y[16*(*idx)+14] += alpha15*(*v);
2434: y[16*(*idx)+15] += alpha16*(*v);
2435: idx++; v++;
2436: }
2437: }
2438: PetscLogFlops(32.0*a->nz);
2439: VecRestoreArrayRead(xx,&x);
2440: VecRestoreArray(zz,&y);
2441: return(0);
2442: }
2444: /*--------------------------------------------------------------------------------------------*/
2447: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2448: {
2449: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2450: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2451: const PetscScalar *x,*v;
2452: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2453: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2454: PetscErrorCode ierr;
2455: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2456: PetscInt nonzerorow=0,n,i,jrow,j;
2459: VecGetArrayRead(xx,&x);
2460: VecGetArray(yy,&y);
2461: idx = a->j;
2462: v = a->a;
2463: ii = a->i;
2465: for (i=0; i<m; i++) {
2466: jrow = ii[i];
2467: n = ii[i+1] - jrow;
2468: sum1 = 0.0;
2469: sum2 = 0.0;
2470: sum3 = 0.0;
2471: sum4 = 0.0;
2472: sum5 = 0.0;
2473: sum6 = 0.0;
2474: sum7 = 0.0;
2475: sum8 = 0.0;
2476: sum9 = 0.0;
2477: sum10 = 0.0;
2478: sum11 = 0.0;
2479: sum12 = 0.0;
2480: sum13 = 0.0;
2481: sum14 = 0.0;
2482: sum15 = 0.0;
2483: sum16 = 0.0;
2484: sum17 = 0.0;
2485: sum18 = 0.0;
2486: nonzerorow += (n>0);
2487: for (j=0; j<n; j++) {
2488: sum1 += v[jrow]*x[18*idx[jrow]];
2489: sum2 += v[jrow]*x[18*idx[jrow]+1];
2490: sum3 += v[jrow]*x[18*idx[jrow]+2];
2491: sum4 += v[jrow]*x[18*idx[jrow]+3];
2492: sum5 += v[jrow]*x[18*idx[jrow]+4];
2493: sum6 += v[jrow]*x[18*idx[jrow]+5];
2494: sum7 += v[jrow]*x[18*idx[jrow]+6];
2495: sum8 += v[jrow]*x[18*idx[jrow]+7];
2496: sum9 += v[jrow]*x[18*idx[jrow]+8];
2497: sum10 += v[jrow]*x[18*idx[jrow]+9];
2498: sum11 += v[jrow]*x[18*idx[jrow]+10];
2499: sum12 += v[jrow]*x[18*idx[jrow]+11];
2500: sum13 += v[jrow]*x[18*idx[jrow]+12];
2501: sum14 += v[jrow]*x[18*idx[jrow]+13];
2502: sum15 += v[jrow]*x[18*idx[jrow]+14];
2503: sum16 += v[jrow]*x[18*idx[jrow]+15];
2504: sum17 += v[jrow]*x[18*idx[jrow]+16];
2505: sum18 += v[jrow]*x[18*idx[jrow]+17];
2506: jrow++;
2507: }
2508: y[18*i] = sum1;
2509: y[18*i+1] = sum2;
2510: y[18*i+2] = sum3;
2511: y[18*i+3] = sum4;
2512: y[18*i+4] = sum5;
2513: y[18*i+5] = sum6;
2514: y[18*i+6] = sum7;
2515: y[18*i+7] = sum8;
2516: y[18*i+8] = sum9;
2517: y[18*i+9] = sum10;
2518: y[18*i+10] = sum11;
2519: y[18*i+11] = sum12;
2520: y[18*i+12] = sum13;
2521: y[18*i+13] = sum14;
2522: y[18*i+14] = sum15;
2523: y[18*i+15] = sum16;
2524: y[18*i+16] = sum17;
2525: y[18*i+17] = sum18;
2526: }
2528: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2529: VecRestoreArrayRead(xx,&x);
2530: VecRestoreArray(yy,&y);
2531: return(0);
2532: }
2536: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2537: {
2538: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2539: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2540: const PetscScalar *x,*v;
2541: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2542: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2543: PetscErrorCode ierr;
2544: const PetscInt m = b->AIJ->rmap->n,*idx;
2545: PetscInt n,i;
2548: VecSet(yy,0.0);
2549: VecGetArrayRead(xx,&x);
2550: VecGetArray(yy,&y);
2552: for (i=0; i<m; i++) {
2553: idx = a->j + a->i[i] ;
2554: v = a->a + a->i[i] ;
2555: n = a->i[i+1] - a->i[i];
2556: alpha1 = x[18*i];
2557: alpha2 = x[18*i+1];
2558: alpha3 = x[18*i+2];
2559: alpha4 = x[18*i+3];
2560: alpha5 = x[18*i+4];
2561: alpha6 = x[18*i+5];
2562: alpha7 = x[18*i+6];
2563: alpha8 = x[18*i+7];
2564: alpha9 = x[18*i+8];
2565: alpha10 = x[18*i+9];
2566: alpha11 = x[18*i+10];
2567: alpha12 = x[18*i+11];
2568: alpha13 = x[18*i+12];
2569: alpha14 = x[18*i+13];
2570: alpha15 = x[18*i+14];
2571: alpha16 = x[18*i+15];
2572: alpha17 = x[18*i+16];
2573: alpha18 = x[18*i+17];
2574: while (n-->0) {
2575: y[18*(*idx)] += alpha1*(*v);
2576: y[18*(*idx)+1] += alpha2*(*v);
2577: y[18*(*idx)+2] += alpha3*(*v);
2578: y[18*(*idx)+3] += alpha4*(*v);
2579: y[18*(*idx)+4] += alpha5*(*v);
2580: y[18*(*idx)+5] += alpha6*(*v);
2581: y[18*(*idx)+6] += alpha7*(*v);
2582: y[18*(*idx)+7] += alpha8*(*v);
2583: y[18*(*idx)+8] += alpha9*(*v);
2584: y[18*(*idx)+9] += alpha10*(*v);
2585: y[18*(*idx)+10] += alpha11*(*v);
2586: y[18*(*idx)+11] += alpha12*(*v);
2587: y[18*(*idx)+12] += alpha13*(*v);
2588: y[18*(*idx)+13] += alpha14*(*v);
2589: y[18*(*idx)+14] += alpha15*(*v);
2590: y[18*(*idx)+15] += alpha16*(*v);
2591: y[18*(*idx)+16] += alpha17*(*v);
2592: y[18*(*idx)+17] += alpha18*(*v);
2593: idx++; v++;
2594: }
2595: }
2596: PetscLogFlops(36.0*a->nz);
2597: VecRestoreArrayRead(xx,&x);
2598: VecRestoreArray(yy,&y);
2599: return(0);
2600: }
2604: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2605: {
2606: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2607: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2608: const PetscScalar *x,*v;
2609: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2610: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2611: PetscErrorCode ierr;
2612: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2613: PetscInt n,i,jrow,j;
2616: if (yy != zz) {VecCopy(yy,zz);}
2617: VecGetArrayRead(xx,&x);
2618: VecGetArray(zz,&y);
2619: idx = a->j;
2620: v = a->a;
2621: ii = a->i;
2623: for (i=0; i<m; i++) {
2624: jrow = ii[i];
2625: n = ii[i+1] - jrow;
2626: sum1 = 0.0;
2627: sum2 = 0.0;
2628: sum3 = 0.0;
2629: sum4 = 0.0;
2630: sum5 = 0.0;
2631: sum6 = 0.0;
2632: sum7 = 0.0;
2633: sum8 = 0.0;
2634: sum9 = 0.0;
2635: sum10 = 0.0;
2636: sum11 = 0.0;
2637: sum12 = 0.0;
2638: sum13 = 0.0;
2639: sum14 = 0.0;
2640: sum15 = 0.0;
2641: sum16 = 0.0;
2642: sum17 = 0.0;
2643: sum18 = 0.0;
2644: for (j=0; j<n; j++) {
2645: sum1 += v[jrow]*x[18*idx[jrow]];
2646: sum2 += v[jrow]*x[18*idx[jrow]+1];
2647: sum3 += v[jrow]*x[18*idx[jrow]+2];
2648: sum4 += v[jrow]*x[18*idx[jrow]+3];
2649: sum5 += v[jrow]*x[18*idx[jrow]+4];
2650: sum6 += v[jrow]*x[18*idx[jrow]+5];
2651: sum7 += v[jrow]*x[18*idx[jrow]+6];
2652: sum8 += v[jrow]*x[18*idx[jrow]+7];
2653: sum9 += v[jrow]*x[18*idx[jrow]+8];
2654: sum10 += v[jrow]*x[18*idx[jrow]+9];
2655: sum11 += v[jrow]*x[18*idx[jrow]+10];
2656: sum12 += v[jrow]*x[18*idx[jrow]+11];
2657: sum13 += v[jrow]*x[18*idx[jrow]+12];
2658: sum14 += v[jrow]*x[18*idx[jrow]+13];
2659: sum15 += v[jrow]*x[18*idx[jrow]+14];
2660: sum16 += v[jrow]*x[18*idx[jrow]+15];
2661: sum17 += v[jrow]*x[18*idx[jrow]+16];
2662: sum18 += v[jrow]*x[18*idx[jrow]+17];
2663: jrow++;
2664: }
2665: y[18*i] += sum1;
2666: y[18*i+1] += sum2;
2667: y[18*i+2] += sum3;
2668: y[18*i+3] += sum4;
2669: y[18*i+4] += sum5;
2670: y[18*i+5] += sum6;
2671: y[18*i+6] += sum7;
2672: y[18*i+7] += sum8;
2673: y[18*i+8] += sum9;
2674: y[18*i+9] += sum10;
2675: y[18*i+10] += sum11;
2676: y[18*i+11] += sum12;
2677: y[18*i+12] += sum13;
2678: y[18*i+13] += sum14;
2679: y[18*i+14] += sum15;
2680: y[18*i+15] += sum16;
2681: y[18*i+16] += sum17;
2682: y[18*i+17] += sum18;
2683: }
2685: PetscLogFlops(36.0*a->nz);
2686: VecRestoreArrayRead(xx,&x);
2687: VecRestoreArray(zz,&y);
2688: return(0);
2689: }
2693: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2694: {
2695: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2696: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2697: const PetscScalar *x,*v;
2698: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2699: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2700: PetscErrorCode ierr;
2701: const PetscInt m = b->AIJ->rmap->n,*idx;
2702: PetscInt n,i;
2705: if (yy != zz) {VecCopy(yy,zz);}
2706: VecGetArrayRead(xx,&x);
2707: VecGetArray(zz,&y);
2708: for (i=0; i<m; i++) {
2709: idx = a->j + a->i[i] ;
2710: v = a->a + a->i[i] ;
2711: n = a->i[i+1] - a->i[i];
2712: alpha1 = x[18*i];
2713: alpha2 = x[18*i+1];
2714: alpha3 = x[18*i+2];
2715: alpha4 = x[18*i+3];
2716: alpha5 = x[18*i+4];
2717: alpha6 = x[18*i+5];
2718: alpha7 = x[18*i+6];
2719: alpha8 = x[18*i+7];
2720: alpha9 = x[18*i+8];
2721: alpha10 = x[18*i+9];
2722: alpha11 = x[18*i+10];
2723: alpha12 = x[18*i+11];
2724: alpha13 = x[18*i+12];
2725: alpha14 = x[18*i+13];
2726: alpha15 = x[18*i+14];
2727: alpha16 = x[18*i+15];
2728: alpha17 = x[18*i+16];
2729: alpha18 = x[18*i+17];
2730: while (n-->0) {
2731: y[18*(*idx)] += alpha1*(*v);
2732: y[18*(*idx)+1] += alpha2*(*v);
2733: y[18*(*idx)+2] += alpha3*(*v);
2734: y[18*(*idx)+3] += alpha4*(*v);
2735: y[18*(*idx)+4] += alpha5*(*v);
2736: y[18*(*idx)+5] += alpha6*(*v);
2737: y[18*(*idx)+6] += alpha7*(*v);
2738: y[18*(*idx)+7] += alpha8*(*v);
2739: y[18*(*idx)+8] += alpha9*(*v);
2740: y[18*(*idx)+9] += alpha10*(*v);
2741: y[18*(*idx)+10] += alpha11*(*v);
2742: y[18*(*idx)+11] += alpha12*(*v);
2743: y[18*(*idx)+12] += alpha13*(*v);
2744: y[18*(*idx)+13] += alpha14*(*v);
2745: y[18*(*idx)+14] += alpha15*(*v);
2746: y[18*(*idx)+15] += alpha16*(*v);
2747: y[18*(*idx)+16] += alpha17*(*v);
2748: y[18*(*idx)+17] += alpha18*(*v);
2749: idx++; v++;
2750: }
2751: }
2752: PetscLogFlops(36.0*a->nz);
2753: VecRestoreArrayRead(xx,&x);
2754: VecRestoreArray(zz,&y);
2755: return(0);
2756: }
2760: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2761: {
2762: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2763: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2764: const PetscScalar *x,*v;
2765: PetscScalar *y,*sums;
2766: PetscErrorCode ierr;
2767: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2768: PetscInt n,i,jrow,j,dof = b->dof,k;
2771: VecGetArrayRead(xx,&x);
2772: VecSet(yy,0.0);
2773: VecGetArray(yy,&y);
2774: idx = a->j;
2775: v = a->a;
2776: ii = a->i;
2778: for (i=0; i<m; i++) {
2779: jrow = ii[i];
2780: n = ii[i+1] - jrow;
2781: sums = y + dof*i;
2782: for (j=0; j<n; j++) {
2783: for (k=0; k<dof; k++) {
2784: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2785: }
2786: jrow++;
2787: }
2788: }
2790: PetscLogFlops(2.0*dof*a->nz);
2791: VecRestoreArrayRead(xx,&x);
2792: VecRestoreArray(yy,&y);
2793: return(0);
2794: }
2798: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2799: {
2800: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2801: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2802: const PetscScalar *x,*v;
2803: PetscScalar *y,*sums;
2804: PetscErrorCode ierr;
2805: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2806: PetscInt n,i,jrow,j,dof = b->dof,k;
2809: if (yy != zz) {VecCopy(yy,zz);}
2810: VecGetArrayRead(xx,&x);
2811: VecGetArray(zz,&y);
2812: idx = a->j;
2813: v = a->a;
2814: ii = a->i;
2816: for (i=0; i<m; i++) {
2817: jrow = ii[i];
2818: n = ii[i+1] - jrow;
2819: sums = y + dof*i;
2820: for (j=0; j<n; j++) {
2821: for (k=0; k<dof; k++) {
2822: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2823: }
2824: jrow++;
2825: }
2826: }
2828: PetscLogFlops(2.0*dof*a->nz);
2829: VecRestoreArrayRead(xx,&x);
2830: VecRestoreArray(zz,&y);
2831: return(0);
2832: }
2836: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2837: {
2838: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2839: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2840: const PetscScalar *x,*v,*alpha;
2841: PetscScalar *y;
2842: PetscErrorCode ierr;
2843: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2844: PetscInt n,i,k;
2847: VecGetArrayRead(xx,&x);
2848: VecSet(yy,0.0);
2849: VecGetArray(yy,&y);
2850: for (i=0; i<m; i++) {
2851: idx = a->j + a->i[i] ;
2852: v = a->a + a->i[i] ;
2853: n = a->i[i+1] - a->i[i];
2854: alpha = x + dof*i;
2855: while (n-->0) {
2856: for (k=0; k<dof; k++) {
2857: y[dof*(*idx)+k] += alpha[k]*(*v);
2858: }
2859: idx++; v++;
2860: }
2861: }
2862: PetscLogFlops(2.0*dof*a->nz);
2863: VecRestoreArrayRead(xx,&x);
2864: VecRestoreArray(yy,&y);
2865: return(0);
2866: }
2870: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2871: {
2872: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2873: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2874: const PetscScalar *x,*v,*alpha;
2875: PetscScalar *y;
2876: PetscErrorCode ierr;
2877: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2878: PetscInt n,i,k;
2881: if (yy != zz) {VecCopy(yy,zz);}
2882: VecGetArrayRead(xx,&x);
2883: VecGetArray(zz,&y);
2884: for (i=0; i<m; i++) {
2885: idx = a->j + a->i[i] ;
2886: v = a->a + a->i[i] ;
2887: n = a->i[i+1] - a->i[i];
2888: alpha = x + dof*i;
2889: while (n-->0) {
2890: for (k=0; k<dof; k++) {
2891: y[dof*(*idx)+k] += alpha[k]*(*v);
2892: }
2893: idx++; v++;
2894: }
2895: }
2896: PetscLogFlops(2.0*dof*a->nz);
2897: VecRestoreArrayRead(xx,&x);
2898: VecRestoreArray(zz,&y);
2899: return(0);
2900: }
2902: /*===================================================================================*/
2905: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2906: {
2907: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2911: /* start the scatter */
2912: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2913: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2914: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2915: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2916: return(0);
2917: }
2921: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2922: {
2923: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2927: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2928: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2929: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2930: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2931: return(0);
2932: }
2936: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2937: {
2938: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2942: /* start the scatter */
2943: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2944: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2945: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2946: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2947: return(0);
2948: }
2952: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2953: {
2954: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2958: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2959: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2960: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2961: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2962: return(0);
2963: }
2965: /* ----------------------------------------------------------------*/
2968: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2969: {
2970: /* This routine requires testing -- but it's getting better. */
2971: PetscErrorCode ierr;
2972: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2973: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2974: Mat P=pp->AIJ;
2975: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2976: PetscInt *pti,*ptj,*ptJ;
2977: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2978: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2979: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2980: MatScalar *ca;
2981: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2984: /* Start timer */
2985: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
2987: /* Get ij structure of P^T */
2988: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2990: cn = pn*ppdof;
2991: /* Allocate ci array, arrays for fill computation and */
2992: /* free space for accumulating nonzero column info */
2993: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2994: ci[0] = 0;
2996: /* Work arrays for rows of P^T*A */
2997: PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);
2998: PetscMemzero(ptadenserow,an*sizeof(PetscInt));
2999: PetscMemzero(denserow,cn*sizeof(PetscInt));
3001: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3002: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3003: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3004: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3005: current_space = free_space;
3007: /* Determine symbolic info for each row of C: */
3008: for (i=0;i<pn;i++) {
3009: ptnzi = pti[i+1] - pti[i];
3010: ptJ = ptj + pti[i];
3011: for (dof=0;dof<ppdof;dof++) {
3012: ptanzi = 0;
3013: /* Determine symbolic row of PtA: */
3014: for (j=0;j<ptnzi;j++) {
3015: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3016: arow = ptJ[j]*ppdof + dof;
3017: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3018: anzj = ai[arow+1] - ai[arow];
3019: ajj = aj + ai[arow];
3020: for (k=0;k<anzj;k++) {
3021: if (!ptadenserow[ajj[k]]) {
3022: ptadenserow[ajj[k]] = -1;
3023: ptasparserow[ptanzi++] = ajj[k];
3024: }
3025: }
3026: }
3027: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3028: ptaj = ptasparserow;
3029: cnzi = 0;
3030: for (j=0;j<ptanzi;j++) {
3031: /* Get offset within block of P */
3032: pshift = *ptaj%ppdof;
3033: /* Get block row of P */
3034: prow = (*ptaj++)/ppdof; /* integer division */
3035: /* P has same number of nonzeros per row as the compressed form */
3036: pnzj = pi[prow+1] - pi[prow];
3037: pjj = pj + pi[prow];
3038: for (k=0;k<pnzj;k++) {
3039: /* Locations in C are shifted by the offset within the block */
3040: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3041: if (!denserow[pjj[k]*ppdof+pshift]) {
3042: denserow[pjj[k]*ppdof+pshift] = -1;
3043: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3044: }
3045: }
3046: }
3048: /* sort sparserow */
3049: PetscSortInt(cnzi,sparserow);
3050:
3051: /* If free space is not available, make more free space */
3052: /* Double the amount of total space in the list */
3053: if (current_space->local_remaining<cnzi) {
3054: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
3055: }
3057: /* Copy data into free space, and zero out denserows */
3058: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3059: current_space->array += cnzi;
3060: current_space->local_used += cnzi;
3061: current_space->local_remaining -= cnzi;
3063: for (j=0;j<ptanzi;j++) {
3064: ptadenserow[ptasparserow[j]] = 0;
3065: }
3066: for (j=0;j<cnzi;j++) {
3067: denserow[sparserow[j]] = 0;
3068: }
3069: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3070: /* For now, we will recompute what is needed. */
3071: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3072: }
3073: }
3074: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3075: /* Allocate space for cj, initialize cj, and */
3076: /* destroy list of free space and other temporary array(s) */
3077: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3078: PetscFreeSpaceContiguous(&free_space,cj);
3079: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3080:
3081: /* Allocate space for ca */
3082: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3083: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
3084:
3085: /* put together the new matrix */
3086: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
3088: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3089: /* Since these are PETSc arrays, change flags to free them as necessary. */
3090: c = (Mat_SeqAIJ *)((*C)->data);
3091: c->free_a = PETSC_TRUE;
3092: c->free_ij = PETSC_TRUE;
3093: c->nonew = 0;
3095: /* Clean up. */
3096: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3098: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
3099: return(0);
3100: }
3104: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3105: {
3106: /* This routine requires testing -- first draft only */
3107: PetscErrorCode ierr;
3108: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3109: Mat P=pp->AIJ;
3110: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
3111: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
3112: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
3113: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3114: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3115: const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3116: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3117: const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3118: MatScalar *ca=c->a,*caj,*apa;
3121: /* Allocate temporary array for storage of one row of A*P */
3122: PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3123: PetscMemzero(apa,cn*sizeof(MatScalar));
3124: PetscMemzero(apj,cn*sizeof(PetscInt));
3125: PetscMemzero(apjdense,cn*sizeof(PetscInt));
3127: /* Clear old values in C */
3128: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
3130: for (i=0;i<am;i++) {
3131: /* Form sparse row of A*P */
3132: anzi = ai[i+1] - ai[i];
3133: apnzj = 0;
3134: for (j=0;j<anzi;j++) {
3135: /* Get offset within block of P */
3136: pshift = *aj%ppdof;
3137: /* Get block row of P */
3138: prow = *aj++/ppdof; /* integer division */
3139: pnzj = pi[prow+1] - pi[prow];
3140: pjj = pj + pi[prow];
3141: paj = pa + pi[prow];
3142: for (k=0;k<pnzj;k++) {
3143: poffset = pjj[k]*ppdof+pshift;
3144: if (!apjdense[poffset]) {
3145: apjdense[poffset] = -1;
3146: apj[apnzj++] = poffset;
3147: }
3148: apa[poffset] += (*aa)*paj[k];
3149: }
3150: PetscLogFlops(2.0*pnzj);
3151: aa++;
3152: }
3154: /* Sort the j index array for quick sparse axpy. */
3155: /* Note: a array does not need sorting as it is in dense storage locations. */
3156: PetscSortInt(apnzj,apj);
3158: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3159: prow = i/ppdof; /* integer division */
3160: pshift = i%ppdof;
3161: poffset = pi[prow];
3162: pnzi = pi[prow+1] - poffset;
3163: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3164: pJ = pj+poffset;
3165: pA = pa+poffset;
3166: for (j=0;j<pnzi;j++) {
3167: crow = (*pJ)*ppdof+pshift;
3168: cjj = cj + ci[crow];
3169: caj = ca + ci[crow];
3170: pJ++;
3171: /* Perform sparse axpy operation. Note cjj includes apj. */
3172: for (k=0,nextap=0;nextap<apnzj;k++) {
3173: if (cjj[k]==apj[nextap]) {
3174: caj[k] += (*pA)*apa[apj[nextap++]];
3175: }
3176: }
3177: PetscLogFlops(2.0*apnzj);
3178: pA++;
3179: }
3181: /* Zero the current row info for A*P */
3182: for (j=0;j<apnzj;j++) {
3183: apa[apj[j]] = 0.;
3184: apjdense[apj[j]] = 0;
3185: }
3186: }
3188: /* Assemble the final matrix and clean up */
3189: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3190: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3191: PetscFree3(apa,apj,apjdense);
3192: return(0);
3193: }
3197: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3198: {
3199: PetscErrorCode ierr;
3202: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3203: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3204: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3205: return(0);
3206: }
3210: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3211: {
3213: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3214: return(0);
3215: }
3217: EXTERN_C_BEGIN
3220: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3221: {
3222: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3223: Mat a = b->AIJ,B;
3224: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3225: PetscErrorCode ierr;
3226: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3227: PetscInt *cols;
3228: PetscScalar *vals;
3231: MatGetSize(a,&m,&n);
3232: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3233: for (i=0; i<m; i++) {
3234: nmax = PetscMax(nmax,aij->ilen[i]);
3235: for (j=0; j<dof; j++) {
3236: ilen[dof*i+j] = aij->ilen[i];
3237: }
3238: }
3239: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3240: PetscFree(ilen);
3241: PetscMalloc(nmax*sizeof(PetscInt),&icols);
3242: ii = 0;
3243: for (i=0; i<m; i++) {
3244: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3245: for (j=0; j<dof; j++) {
3246: for (k=0; k<ncols; k++) {
3247: icols[k] = dof*cols[k]+j;
3248: }
3249: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3250: ii++;
3251: }
3252: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3253: }
3254: PetscFree(icols);
3255: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3256: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3258: if (reuse == MAT_REUSE_MATRIX) {
3259: MatHeaderReplace(A,B);
3260: } else {
3261: *newmat = B;
3262: }
3263: return(0);
3264: }
3265: EXTERN_C_END
3267: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3269: EXTERN_C_BEGIN
3272: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3273: {
3274: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3275: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3276: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3277: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3278: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3279: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3280: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3281: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3282: PetscInt rstart,cstart,*garray,ii,k;
3283: PetscErrorCode ierr;
3284: PetscScalar *vals,*ovals;
3287: PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3288: for (i=0; i<A->rmap->n/dof; i++) {
3289: nmax = PetscMax(nmax,AIJ->ilen[i]);
3290: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3291: for (j=0; j<dof; j++) {
3292: dnz[dof*i+j] = AIJ->ilen[i];
3293: onz[dof*i+j] = OAIJ->ilen[i];
3294: }
3295: }
3296: MatCreateAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3297: PetscFree2(dnz,onz);
3299: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3300: rstart = dof*maij->A->rmap->rstart;
3301: cstart = dof*maij->A->cmap->rstart;
3302: garray = mpiaij->garray;
3304: ii = rstart;
3305: for (i=0; i<A->rmap->n/dof; i++) {
3306: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3307: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3308: for (j=0; j<dof; j++) {
3309: for (k=0; k<ncols; k++) {
3310: icols[k] = cstart + dof*cols[k]+j;
3311: }
3312: for (k=0; k<oncols; k++) {
3313: oicols[k] = dof*garray[ocols[k]]+j;
3314: }
3315: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3316: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3317: ii++;
3318: }
3319: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3320: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3321: }
3322: PetscFree2(icols,oicols);
3324: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3325: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3327: if (reuse == MAT_REUSE_MATRIX) {
3328: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3329: ((PetscObject)A)->refct = 1;
3330: MatHeaderReplace(A,B);
3331: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3332: } else {
3333: *newmat = B;
3334: }
3335: return(0);
3336: }
3337: EXTERN_C_END
3341: PetscErrorCode MatGetSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3342: {
3344: Mat A;
3347: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3348: MatGetSubMatrix(A,isrow,iscol,cll,newmat);
3349: MatDestroy(&A);
3350: return(0);
3351: }
3353: /* ---------------------------------------------------------------------------------- */
3356: /*@C
3357: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3358: operations for multicomponent problems. It interpolates each component the same
3359: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3360: and MATMPIAIJ for distributed matrices.
3362: Collective
3364: Input Parameters:
3365: + A - the AIJ matrix describing the action on blocks
3366: - dof - the block size (number of components per node)
3368: Output Parameter:
3369: . maij - the new MAIJ matrix
3371: Operations provided:
3372: + MatMult
3373: . MatMultTranspose
3374: . MatMultAdd
3375: . MatMultTransposeAdd
3376: - MatView
3378: Level: advanced
3380: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3381: @*/
3382: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3383: {
3385: PetscMPIInt size;
3386: PetscInt n;
3387: Mat B;
3390: PetscObjectReference((PetscObject)A);
3392: if (dof == 1) {
3393: *maij = A;
3394: } else {
3395: MatCreate(((PetscObject)A)->comm,&B);
3396: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3397: PetscLayoutSetBlockSize(B->rmap,dof);
3398: PetscLayoutSetBlockSize(B->cmap,dof);
3399: PetscLayoutSetUp(B->rmap);
3400: PetscLayoutSetUp(B->cmap);
3401: B->assembled = PETSC_TRUE;
3403: MPI_Comm_size(((PetscObject)A)->comm,&size);
3404: if (size == 1) {
3405: Mat_SeqMAIJ *b;
3407: MatSetType(B,MATSEQMAIJ);
3408: B->ops->setup = PETSC_NULL;
3409: B->ops->destroy = MatDestroy_SeqMAIJ;
3410: B->ops->view = MatView_SeqMAIJ;
3411: b = (Mat_SeqMAIJ*)B->data;
3412: b->dof = dof;
3413: b->AIJ = A;
3414: if (dof == 2) {
3415: B->ops->mult = MatMult_SeqMAIJ_2;
3416: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3417: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3418: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3419: } else if (dof == 3) {
3420: B->ops->mult = MatMult_SeqMAIJ_3;
3421: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3422: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3423: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3424: } else if (dof == 4) {
3425: B->ops->mult = MatMult_SeqMAIJ_4;
3426: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3427: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3428: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3429: } else if (dof == 5) {
3430: B->ops->mult = MatMult_SeqMAIJ_5;
3431: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3432: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3433: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3434: } else if (dof == 6) {
3435: B->ops->mult = MatMult_SeqMAIJ_6;
3436: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3437: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3438: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3439: } else if (dof == 7) {
3440: B->ops->mult = MatMult_SeqMAIJ_7;
3441: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3442: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3443: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3444: } else if (dof == 8) {
3445: B->ops->mult = MatMult_SeqMAIJ_8;
3446: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3447: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3448: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3449: } else if (dof == 9) {
3450: B->ops->mult = MatMult_SeqMAIJ_9;
3451: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3452: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3453: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3454: } else if (dof == 10) {
3455: B->ops->mult = MatMult_SeqMAIJ_10;
3456: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3457: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3458: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3459: } else if (dof == 11) {
3460: B->ops->mult = MatMult_SeqMAIJ_11;
3461: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3462: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3463: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3464: } else if (dof == 16) {
3465: B->ops->mult = MatMult_SeqMAIJ_16;
3466: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3467: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3468: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3469: } else if (dof == 18) {
3470: B->ops->mult = MatMult_SeqMAIJ_18;
3471: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3472: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3473: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3474: } else {
3475: B->ops->mult = MatMult_SeqMAIJ_N;
3476: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3477: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3478: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3479: }
3480: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3481: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3482: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3483: } else {
3484: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3485: Mat_MPIMAIJ *b;
3486: IS from,to;
3487: Vec gvec;
3489: MatSetType(B,MATMPIMAIJ);
3490: B->ops->setup = PETSC_NULL;
3491: B->ops->destroy = MatDestroy_MPIMAIJ;
3492: B->ops->view = MatView_MPIMAIJ;
3493: b = (Mat_MPIMAIJ*)B->data;
3494: b->dof = dof;
3495: b->A = A;
3496: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3497: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3499: VecGetSize(mpiaij->lvec,&n);
3500: VecCreate(PETSC_COMM_SELF,&b->w);
3501: VecSetSizes(b->w,n*dof,n*dof);
3502: VecSetBlockSize(b->w,dof);
3503: VecSetType(b->w,VECSEQ);
3505: /* create two temporary Index sets for build scatter gather */
3506: ISCreateBlock(((PetscObject)A)->comm,dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3507: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3509: /* create temporary global vector to generate scatter context */
3510: VecCreateMPIWithArray(((PetscObject)A)->comm,dof,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3512: /* generate the scatter context */
3513: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3515: ISDestroy(&from);
3516: ISDestroy(&to);
3517: VecDestroy(&gvec);
3519: B->ops->mult = MatMult_MPIMAIJ_dof;
3520: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3521: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3522: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3523: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3524: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3525: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3526: }
3527: B->ops->getsubmatrix = MatGetSubMatrix_MAIJ;
3528: MatSetUp(B);
3529: *maij = B;
3530: MatView_Private(B);
3531: }
3532: return(0);
3533: }