Actual source code: maij.c
petsc-3.13.6 2020-09-29
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h>
20: #include <../src/mat/utils/freespace.h>
22: /*@
23: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
25: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
27: Input Parameter:
28: . A - the MAIJ matrix
30: Output Parameter:
31: . B - the AIJ matrix
33: Level: advanced
35: Notes:
36: The reference count on the AIJ matrix is not increased so you should not destroy it.
38: .seealso: MatCreateMAIJ()
39: @*/
40: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
41: {
43: PetscBool ismpimaij,isseqmaij;
46: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
47: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
48: if (ismpimaij) {
49: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
51: *B = b->A;
52: } else if (isseqmaij) {
53: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
55: *B = b->AIJ;
56: } else {
57: *B = A;
58: }
59: return(0);
60: }
62: /*@
63: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
65: Logically Collective
67: Input Parameter:
68: + A - the MAIJ matrix
69: - dof - the block size for the new matrix
71: Output Parameter:
72: . B - the new MAIJ matrix
74: Level: advanced
76: .seealso: MatCreateMAIJ()
77: @*/
78: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79: {
81: Mat Aij = NULL;
85: MatMAIJGetAIJ(A,&Aij);
86: MatCreateMAIJ(Aij,dof,B);
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91: {
93: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
96: MatDestroy(&b->AIJ);
97: PetscFree(A->data);
98: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
99: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
100: return(0);
101: }
103: PetscErrorCode MatSetUp_MAIJ(Mat A)
104: {
106: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107: return(0);
108: }
110: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
111: {
113: Mat B;
116: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
117: MatView(B,viewer);
118: MatDestroy(&B);
119: return(0);
120: }
122: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
123: {
125: Mat B;
128: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
129: MatView(B,viewer);
130: MatDestroy(&B);
131: return(0);
132: }
134: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
135: {
137: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
140: MatDestroy(&b->AIJ);
141: MatDestroy(&b->OAIJ);
142: MatDestroy(&b->A);
143: VecScatterDestroy(&b->ctx);
144: VecDestroy(&b->w);
145: PetscFree(A->data);
146: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
147: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
148: PetscObjectChangeTypeName((PetscObject)A,0);
149: return(0);
150: }
152: /*MC
153: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
154: multicomponent problems, interpolating or restricting each component the same way independently.
155: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
157: Operations provided:
158: + MatMult
159: . MatMultTranspose
160: . MatMultAdd
161: - MatMultTransposeAdd
163: Level: advanced
165: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
166: M*/
168: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
169: {
171: Mat_MPIMAIJ *b;
172: PetscMPIInt size;
175: PetscNewLog(A,&b);
176: A->data = (void*)b;
178: PetscMemzero(A->ops,sizeof(struct _MatOps));
180: A->ops->setup = MatSetUp_MAIJ;
182: b->AIJ = 0;
183: b->dof = 0;
184: b->OAIJ = 0;
185: b->ctx = 0;
186: b->w = 0;
187: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
188: if (size == 1) {
189: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
190: } else {
191: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
192: }
193: A->preallocated = PETSC_TRUE;
194: A->assembled = PETSC_TRUE;
195: return(0);
196: }
198: /* --------------------------------------------------------------------------------------*/
199: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
200: {
201: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
202: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
203: const PetscScalar *x,*v;
204: PetscScalar *y, sum1, sum2;
205: PetscErrorCode ierr;
206: PetscInt nonzerorow=0,n,i,jrow,j;
207: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
210: VecGetArrayRead(xx,&x);
211: VecGetArray(yy,&y);
212: idx = a->j;
213: v = a->a;
214: ii = a->i;
216: for (i=0; i<m; i++) {
217: jrow = ii[i];
218: n = ii[i+1] - jrow;
219: sum1 = 0.0;
220: sum2 = 0.0;
222: nonzerorow += (n>0);
223: for (j=0; j<n; j++) {
224: sum1 += v[jrow]*x[2*idx[jrow]];
225: sum2 += v[jrow]*x[2*idx[jrow]+1];
226: jrow++;
227: }
228: y[2*i] = sum1;
229: y[2*i+1] = sum2;
230: }
232: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
233: VecRestoreArrayRead(xx,&x);
234: VecRestoreArray(yy,&y);
235: return(0);
236: }
238: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
239: {
240: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
241: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
242: const PetscScalar *x,*v;
243: PetscScalar *y,alpha1,alpha2;
244: PetscErrorCode ierr;
245: PetscInt n,i;
246: const PetscInt m = b->AIJ->rmap->n,*idx;
249: VecSet(yy,0.0);
250: VecGetArrayRead(xx,&x);
251: VecGetArray(yy,&y);
253: for (i=0; i<m; i++) {
254: idx = a->j + a->i[i];
255: v = a->a + a->i[i];
256: n = a->i[i+1] - a->i[i];
257: alpha1 = x[2*i];
258: alpha2 = x[2*i+1];
259: while (n-->0) {
260: y[2*(*idx)] += alpha1*(*v);
261: y[2*(*idx)+1] += alpha2*(*v);
262: idx++; v++;
263: }
264: }
265: PetscLogFlops(4.0*a->nz);
266: VecRestoreArrayRead(xx,&x);
267: VecRestoreArray(yy,&y);
268: return(0);
269: }
271: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
272: {
273: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
274: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
275: const PetscScalar *x,*v;
276: PetscScalar *y,sum1, sum2;
277: PetscErrorCode ierr;
278: PetscInt n,i,jrow,j;
279: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
282: if (yy != zz) {VecCopy(yy,zz);}
283: VecGetArrayRead(xx,&x);
284: VecGetArray(zz,&y);
285: idx = a->j;
286: v = a->a;
287: ii = a->i;
289: for (i=0; i<m; i++) {
290: jrow = ii[i];
291: n = ii[i+1] - jrow;
292: sum1 = 0.0;
293: sum2 = 0.0;
294: for (j=0; j<n; j++) {
295: sum1 += v[jrow]*x[2*idx[jrow]];
296: sum2 += v[jrow]*x[2*idx[jrow]+1];
297: jrow++;
298: }
299: y[2*i] += sum1;
300: y[2*i+1] += sum2;
301: }
303: PetscLogFlops(4.0*a->nz);
304: VecRestoreArrayRead(xx,&x);
305: VecRestoreArray(zz,&y);
306: return(0);
307: }
308: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
309: {
310: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
311: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
312: const PetscScalar *x,*v;
313: PetscScalar *y,alpha1,alpha2;
314: PetscErrorCode ierr;
315: PetscInt n,i;
316: const PetscInt m = b->AIJ->rmap->n,*idx;
319: if (yy != zz) {VecCopy(yy,zz);}
320: VecGetArrayRead(xx,&x);
321: VecGetArray(zz,&y);
323: for (i=0; i<m; i++) {
324: idx = a->j + a->i[i];
325: v = a->a + a->i[i];
326: n = a->i[i+1] - a->i[i];
327: alpha1 = x[2*i];
328: alpha2 = x[2*i+1];
329: while (n-->0) {
330: y[2*(*idx)] += alpha1*(*v);
331: y[2*(*idx)+1] += alpha2*(*v);
332: idx++; v++;
333: }
334: }
335: PetscLogFlops(4.0*a->nz);
336: VecRestoreArrayRead(xx,&x);
337: VecRestoreArray(zz,&y);
338: return(0);
339: }
340: /* --------------------------------------------------------------------------------------*/
341: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
342: {
343: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
344: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
345: const PetscScalar *x,*v;
346: PetscScalar *y,sum1, sum2, sum3;
347: PetscErrorCode ierr;
348: PetscInt nonzerorow=0,n,i,jrow,j;
349: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
352: VecGetArrayRead(xx,&x);
353: VecGetArray(yy,&y);
354: idx = a->j;
355: v = a->a;
356: ii = a->i;
358: for (i=0; i<m; i++) {
359: jrow = ii[i];
360: n = ii[i+1] - jrow;
361: sum1 = 0.0;
362: sum2 = 0.0;
363: sum3 = 0.0;
365: nonzerorow += (n>0);
366: for (j=0; j<n; j++) {
367: sum1 += v[jrow]*x[3*idx[jrow]];
368: sum2 += v[jrow]*x[3*idx[jrow]+1];
369: sum3 += v[jrow]*x[3*idx[jrow]+2];
370: jrow++;
371: }
372: y[3*i] = sum1;
373: y[3*i+1] = sum2;
374: y[3*i+2] = sum3;
375: }
377: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
378: VecRestoreArrayRead(xx,&x);
379: VecRestoreArray(yy,&y);
380: return(0);
381: }
383: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
384: {
385: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
386: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
387: const PetscScalar *x,*v;
388: PetscScalar *y,alpha1,alpha2,alpha3;
389: PetscErrorCode ierr;
390: PetscInt n,i;
391: const PetscInt m = b->AIJ->rmap->n,*idx;
394: VecSet(yy,0.0);
395: VecGetArrayRead(xx,&x);
396: VecGetArray(yy,&y);
398: for (i=0; i<m; i++) {
399: idx = a->j + a->i[i];
400: v = a->a + a->i[i];
401: n = a->i[i+1] - a->i[i];
402: alpha1 = x[3*i];
403: alpha2 = x[3*i+1];
404: alpha3 = x[3*i+2];
405: while (n-->0) {
406: y[3*(*idx)] += alpha1*(*v);
407: y[3*(*idx)+1] += alpha2*(*v);
408: y[3*(*idx)+2] += alpha3*(*v);
409: idx++; v++;
410: }
411: }
412: PetscLogFlops(6.0*a->nz);
413: VecRestoreArrayRead(xx,&x);
414: VecRestoreArray(yy,&y);
415: return(0);
416: }
418: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
419: {
420: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
421: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
422: const PetscScalar *x,*v;
423: PetscScalar *y,sum1, sum2, sum3;
424: PetscErrorCode ierr;
425: PetscInt n,i,jrow,j;
426: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
429: if (yy != zz) {VecCopy(yy,zz);}
430: VecGetArrayRead(xx,&x);
431: VecGetArray(zz,&y);
432: idx = a->j;
433: v = a->a;
434: ii = a->i;
436: for (i=0; i<m; i++) {
437: jrow = ii[i];
438: n = ii[i+1] - jrow;
439: sum1 = 0.0;
440: sum2 = 0.0;
441: sum3 = 0.0;
442: for (j=0; j<n; j++) {
443: sum1 += v[jrow]*x[3*idx[jrow]];
444: sum2 += v[jrow]*x[3*idx[jrow]+1];
445: sum3 += v[jrow]*x[3*idx[jrow]+2];
446: jrow++;
447: }
448: y[3*i] += sum1;
449: y[3*i+1] += sum2;
450: y[3*i+2] += sum3;
451: }
453: PetscLogFlops(6.0*a->nz);
454: VecRestoreArrayRead(xx,&x);
455: VecRestoreArray(zz,&y);
456: return(0);
457: }
458: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
459: {
460: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
461: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
462: const PetscScalar *x,*v;
463: PetscScalar *y,alpha1,alpha2,alpha3;
464: PetscErrorCode ierr;
465: PetscInt n,i;
466: const PetscInt m = b->AIJ->rmap->n,*idx;
469: if (yy != zz) {VecCopy(yy,zz);}
470: VecGetArrayRead(xx,&x);
471: VecGetArray(zz,&y);
472: for (i=0; i<m; i++) {
473: idx = a->j + a->i[i];
474: v = a->a + a->i[i];
475: n = a->i[i+1] - a->i[i];
476: alpha1 = x[3*i];
477: alpha2 = x[3*i+1];
478: alpha3 = x[3*i+2];
479: while (n-->0) {
480: y[3*(*idx)] += alpha1*(*v);
481: y[3*(*idx)+1] += alpha2*(*v);
482: y[3*(*idx)+2] += alpha3*(*v);
483: idx++; v++;
484: }
485: }
486: PetscLogFlops(6.0*a->nz);
487: VecRestoreArrayRead(xx,&x);
488: VecRestoreArray(zz,&y);
489: return(0);
490: }
492: /* ------------------------------------------------------------------------------*/
493: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
494: {
495: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
496: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
497: const PetscScalar *x,*v;
498: PetscScalar *y,sum1, sum2, sum3, sum4;
499: PetscErrorCode ierr;
500: PetscInt nonzerorow=0,n,i,jrow,j;
501: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
504: VecGetArrayRead(xx,&x);
505: VecGetArray(yy,&y);
506: idx = a->j;
507: v = a->a;
508: ii = a->i;
510: for (i=0; i<m; i++) {
511: jrow = ii[i];
512: n = ii[i+1] - jrow;
513: sum1 = 0.0;
514: sum2 = 0.0;
515: sum3 = 0.0;
516: sum4 = 0.0;
517: nonzerorow += (n>0);
518: for (j=0; j<n; j++) {
519: sum1 += v[jrow]*x[4*idx[jrow]];
520: sum2 += v[jrow]*x[4*idx[jrow]+1];
521: sum3 += v[jrow]*x[4*idx[jrow]+2];
522: sum4 += v[jrow]*x[4*idx[jrow]+3];
523: jrow++;
524: }
525: y[4*i] = sum1;
526: y[4*i+1] = sum2;
527: y[4*i+2] = sum3;
528: y[4*i+3] = sum4;
529: }
531: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
532: VecRestoreArrayRead(xx,&x);
533: VecRestoreArray(yy,&y);
534: return(0);
535: }
537: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
538: {
539: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
540: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
541: const PetscScalar *x,*v;
542: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
543: PetscErrorCode ierr;
544: PetscInt n,i;
545: const PetscInt m = b->AIJ->rmap->n,*idx;
548: VecSet(yy,0.0);
549: VecGetArrayRead(xx,&x);
550: VecGetArray(yy,&y);
551: for (i=0; i<m; i++) {
552: idx = a->j + a->i[i];
553: v = a->a + a->i[i];
554: n = a->i[i+1] - a->i[i];
555: alpha1 = x[4*i];
556: alpha2 = x[4*i+1];
557: alpha3 = x[4*i+2];
558: alpha4 = x[4*i+3];
559: while (n-->0) {
560: y[4*(*idx)] += alpha1*(*v);
561: y[4*(*idx)+1] += alpha2*(*v);
562: y[4*(*idx)+2] += alpha3*(*v);
563: y[4*(*idx)+3] += alpha4*(*v);
564: idx++; v++;
565: }
566: }
567: PetscLogFlops(8.0*a->nz);
568: VecRestoreArrayRead(xx,&x);
569: VecRestoreArray(yy,&y);
570: return(0);
571: }
573: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
574: {
575: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
576: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
577: const PetscScalar *x,*v;
578: PetscScalar *y,sum1, sum2, sum3, sum4;
579: PetscErrorCode ierr;
580: PetscInt n,i,jrow,j;
581: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
584: if (yy != zz) {VecCopy(yy,zz);}
585: VecGetArrayRead(xx,&x);
586: VecGetArray(zz,&y);
587: idx = a->j;
588: v = a->a;
589: ii = a->i;
591: for (i=0; i<m; i++) {
592: jrow = ii[i];
593: n = ii[i+1] - jrow;
594: sum1 = 0.0;
595: sum2 = 0.0;
596: sum3 = 0.0;
597: sum4 = 0.0;
598: for (j=0; j<n; j++) {
599: sum1 += v[jrow]*x[4*idx[jrow]];
600: sum2 += v[jrow]*x[4*idx[jrow]+1];
601: sum3 += v[jrow]*x[4*idx[jrow]+2];
602: sum4 += v[jrow]*x[4*idx[jrow]+3];
603: jrow++;
604: }
605: y[4*i] += sum1;
606: y[4*i+1] += sum2;
607: y[4*i+2] += sum3;
608: y[4*i+3] += sum4;
609: }
611: PetscLogFlops(8.0*a->nz);
612: VecRestoreArrayRead(xx,&x);
613: VecRestoreArray(zz,&y);
614: return(0);
615: }
616: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
617: {
618: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
619: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
620: const PetscScalar *x,*v;
621: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
622: PetscErrorCode ierr;
623: PetscInt n,i;
624: const PetscInt m = b->AIJ->rmap->n,*idx;
627: if (yy != zz) {VecCopy(yy,zz);}
628: VecGetArrayRead(xx,&x);
629: VecGetArray(zz,&y);
631: for (i=0; i<m; i++) {
632: idx = a->j + a->i[i];
633: v = a->a + a->i[i];
634: n = a->i[i+1] - a->i[i];
635: alpha1 = x[4*i];
636: alpha2 = x[4*i+1];
637: alpha3 = x[4*i+2];
638: alpha4 = x[4*i+3];
639: while (n-->0) {
640: y[4*(*idx)] += alpha1*(*v);
641: y[4*(*idx)+1] += alpha2*(*v);
642: y[4*(*idx)+2] += alpha3*(*v);
643: y[4*(*idx)+3] += alpha4*(*v);
644: idx++; v++;
645: }
646: }
647: PetscLogFlops(8.0*a->nz);
648: VecRestoreArrayRead(xx,&x);
649: VecRestoreArray(zz,&y);
650: return(0);
651: }
652: /* ------------------------------------------------------------------------------*/
654: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
655: {
656: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
657: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
658: const PetscScalar *x,*v;
659: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
660: PetscErrorCode ierr;
661: PetscInt nonzerorow=0,n,i,jrow,j;
662: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
665: VecGetArrayRead(xx,&x);
666: VecGetArray(yy,&y);
667: idx = a->j;
668: v = a->a;
669: ii = a->i;
671: for (i=0; i<m; i++) {
672: jrow = ii[i];
673: n = ii[i+1] - jrow;
674: sum1 = 0.0;
675: sum2 = 0.0;
676: sum3 = 0.0;
677: sum4 = 0.0;
678: sum5 = 0.0;
680: nonzerorow += (n>0);
681: for (j=0; j<n; j++) {
682: sum1 += v[jrow]*x[5*idx[jrow]];
683: sum2 += v[jrow]*x[5*idx[jrow]+1];
684: sum3 += v[jrow]*x[5*idx[jrow]+2];
685: sum4 += v[jrow]*x[5*idx[jrow]+3];
686: sum5 += v[jrow]*x[5*idx[jrow]+4];
687: jrow++;
688: }
689: y[5*i] = sum1;
690: y[5*i+1] = sum2;
691: y[5*i+2] = sum3;
692: y[5*i+3] = sum4;
693: y[5*i+4] = sum5;
694: }
696: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
697: VecRestoreArrayRead(xx,&x);
698: VecRestoreArray(yy,&y);
699: return(0);
700: }
702: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
703: {
704: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
705: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
706: const PetscScalar *x,*v;
707: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
708: PetscErrorCode ierr;
709: PetscInt n,i;
710: const PetscInt m = b->AIJ->rmap->n,*idx;
713: VecSet(yy,0.0);
714: VecGetArrayRead(xx,&x);
715: VecGetArray(yy,&y);
717: for (i=0; i<m; i++) {
718: idx = a->j + a->i[i];
719: v = a->a + a->i[i];
720: n = a->i[i+1] - a->i[i];
721: alpha1 = x[5*i];
722: alpha2 = x[5*i+1];
723: alpha3 = x[5*i+2];
724: alpha4 = x[5*i+3];
725: alpha5 = x[5*i+4];
726: while (n-->0) {
727: y[5*(*idx)] += alpha1*(*v);
728: y[5*(*idx)+1] += alpha2*(*v);
729: y[5*(*idx)+2] += alpha3*(*v);
730: y[5*(*idx)+3] += alpha4*(*v);
731: y[5*(*idx)+4] += alpha5*(*v);
732: idx++; v++;
733: }
734: }
735: PetscLogFlops(10.0*a->nz);
736: VecRestoreArrayRead(xx,&x);
737: VecRestoreArray(yy,&y);
738: return(0);
739: }
741: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
742: {
743: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
744: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
745: const PetscScalar *x,*v;
746: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
747: PetscErrorCode ierr;
748: PetscInt n,i,jrow,j;
749: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
752: if (yy != zz) {VecCopy(yy,zz);}
753: VecGetArrayRead(xx,&x);
754: VecGetArray(zz,&y);
755: idx = a->j;
756: v = a->a;
757: ii = a->i;
759: for (i=0; i<m; i++) {
760: jrow = ii[i];
761: n = ii[i+1] - jrow;
762: sum1 = 0.0;
763: sum2 = 0.0;
764: sum3 = 0.0;
765: sum4 = 0.0;
766: sum5 = 0.0;
767: for (j=0; j<n; j++) {
768: sum1 += v[jrow]*x[5*idx[jrow]];
769: sum2 += v[jrow]*x[5*idx[jrow]+1];
770: sum3 += v[jrow]*x[5*idx[jrow]+2];
771: sum4 += v[jrow]*x[5*idx[jrow]+3];
772: sum5 += v[jrow]*x[5*idx[jrow]+4];
773: jrow++;
774: }
775: y[5*i] += sum1;
776: y[5*i+1] += sum2;
777: y[5*i+2] += sum3;
778: y[5*i+3] += sum4;
779: y[5*i+4] += sum5;
780: }
782: PetscLogFlops(10.0*a->nz);
783: VecRestoreArrayRead(xx,&x);
784: VecRestoreArray(zz,&y);
785: return(0);
786: }
788: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
789: {
790: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
791: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
792: const PetscScalar *x,*v;
793: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
794: PetscErrorCode ierr;
795: PetscInt n,i;
796: const PetscInt m = b->AIJ->rmap->n,*idx;
799: if (yy != zz) {VecCopy(yy,zz);}
800: VecGetArrayRead(xx,&x);
801: VecGetArray(zz,&y);
803: for (i=0; i<m; i++) {
804: idx = a->j + a->i[i];
805: v = a->a + a->i[i];
806: n = a->i[i+1] - a->i[i];
807: alpha1 = x[5*i];
808: alpha2 = x[5*i+1];
809: alpha3 = x[5*i+2];
810: alpha4 = x[5*i+3];
811: alpha5 = x[5*i+4];
812: while (n-->0) {
813: y[5*(*idx)] += alpha1*(*v);
814: y[5*(*idx)+1] += alpha2*(*v);
815: y[5*(*idx)+2] += alpha3*(*v);
816: y[5*(*idx)+3] += alpha4*(*v);
817: y[5*(*idx)+4] += alpha5*(*v);
818: idx++; v++;
819: }
820: }
821: PetscLogFlops(10.0*a->nz);
822: VecRestoreArrayRead(xx,&x);
823: VecRestoreArray(zz,&y);
824: return(0);
825: }
827: /* ------------------------------------------------------------------------------*/
828: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
829: {
830: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
831: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
832: const PetscScalar *x,*v;
833: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
834: PetscErrorCode ierr;
835: PetscInt nonzerorow=0,n,i,jrow,j;
836: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
839: VecGetArrayRead(xx,&x);
840: VecGetArray(yy,&y);
841: idx = a->j;
842: v = a->a;
843: ii = a->i;
845: for (i=0; i<m; i++) {
846: jrow = ii[i];
847: n = ii[i+1] - jrow;
848: sum1 = 0.0;
849: sum2 = 0.0;
850: sum3 = 0.0;
851: sum4 = 0.0;
852: sum5 = 0.0;
853: sum6 = 0.0;
855: nonzerorow += (n>0);
856: for (j=0; j<n; j++) {
857: sum1 += v[jrow]*x[6*idx[jrow]];
858: sum2 += v[jrow]*x[6*idx[jrow]+1];
859: sum3 += v[jrow]*x[6*idx[jrow]+2];
860: sum4 += v[jrow]*x[6*idx[jrow]+3];
861: sum5 += v[jrow]*x[6*idx[jrow]+4];
862: sum6 += v[jrow]*x[6*idx[jrow]+5];
863: jrow++;
864: }
865: y[6*i] = sum1;
866: y[6*i+1] = sum2;
867: y[6*i+2] = sum3;
868: y[6*i+3] = sum4;
869: y[6*i+4] = sum5;
870: y[6*i+5] = sum6;
871: }
873: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
874: VecRestoreArrayRead(xx,&x);
875: VecRestoreArray(yy,&y);
876: return(0);
877: }
879: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
880: {
881: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
882: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
883: const PetscScalar *x,*v;
884: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
885: PetscErrorCode ierr;
886: PetscInt n,i;
887: const PetscInt m = b->AIJ->rmap->n,*idx;
890: VecSet(yy,0.0);
891: VecGetArrayRead(xx,&x);
892: VecGetArray(yy,&y);
894: for (i=0; i<m; i++) {
895: idx = a->j + a->i[i];
896: v = a->a + a->i[i];
897: n = a->i[i+1] - a->i[i];
898: alpha1 = x[6*i];
899: alpha2 = x[6*i+1];
900: alpha3 = x[6*i+2];
901: alpha4 = x[6*i+3];
902: alpha5 = x[6*i+4];
903: alpha6 = x[6*i+5];
904: while (n-->0) {
905: y[6*(*idx)] += alpha1*(*v);
906: y[6*(*idx)+1] += alpha2*(*v);
907: y[6*(*idx)+2] += alpha3*(*v);
908: y[6*(*idx)+3] += alpha4*(*v);
909: y[6*(*idx)+4] += alpha5*(*v);
910: y[6*(*idx)+5] += alpha6*(*v);
911: idx++; v++;
912: }
913: }
914: PetscLogFlops(12.0*a->nz);
915: VecRestoreArrayRead(xx,&x);
916: VecRestoreArray(yy,&y);
917: return(0);
918: }
920: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
921: {
922: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
923: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
924: const PetscScalar *x,*v;
925: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
926: PetscErrorCode ierr;
927: PetscInt n,i,jrow,j;
928: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
931: if (yy != zz) {VecCopy(yy,zz);}
932: VecGetArrayRead(xx,&x);
933: VecGetArray(zz,&y);
934: idx = a->j;
935: v = a->a;
936: ii = a->i;
938: for (i=0; i<m; i++) {
939: jrow = ii[i];
940: n = ii[i+1] - jrow;
941: sum1 = 0.0;
942: sum2 = 0.0;
943: sum3 = 0.0;
944: sum4 = 0.0;
945: sum5 = 0.0;
946: sum6 = 0.0;
947: for (j=0; j<n; j++) {
948: sum1 += v[jrow]*x[6*idx[jrow]];
949: sum2 += v[jrow]*x[6*idx[jrow]+1];
950: sum3 += v[jrow]*x[6*idx[jrow]+2];
951: sum4 += v[jrow]*x[6*idx[jrow]+3];
952: sum5 += v[jrow]*x[6*idx[jrow]+4];
953: sum6 += v[jrow]*x[6*idx[jrow]+5];
954: jrow++;
955: }
956: y[6*i] += sum1;
957: y[6*i+1] += sum2;
958: y[6*i+2] += sum3;
959: y[6*i+3] += sum4;
960: y[6*i+4] += sum5;
961: y[6*i+5] += sum6;
962: }
964: PetscLogFlops(12.0*a->nz);
965: VecRestoreArrayRead(xx,&x);
966: VecRestoreArray(zz,&y);
967: return(0);
968: }
970: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
971: {
972: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
973: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
974: const PetscScalar *x,*v;
975: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
976: PetscErrorCode ierr;
977: PetscInt n,i;
978: const PetscInt m = b->AIJ->rmap->n,*idx;
981: if (yy != zz) {VecCopy(yy,zz);}
982: VecGetArrayRead(xx,&x);
983: VecGetArray(zz,&y);
985: for (i=0; i<m; i++) {
986: idx = a->j + a->i[i];
987: v = a->a + a->i[i];
988: n = a->i[i+1] - a->i[i];
989: alpha1 = x[6*i];
990: alpha2 = x[6*i+1];
991: alpha3 = x[6*i+2];
992: alpha4 = x[6*i+3];
993: alpha5 = x[6*i+4];
994: alpha6 = x[6*i+5];
995: while (n-->0) {
996: y[6*(*idx)] += alpha1*(*v);
997: y[6*(*idx)+1] += alpha2*(*v);
998: y[6*(*idx)+2] += alpha3*(*v);
999: y[6*(*idx)+3] += alpha4*(*v);
1000: y[6*(*idx)+4] += alpha5*(*v);
1001: y[6*(*idx)+5] += alpha6*(*v);
1002: idx++; v++;
1003: }
1004: }
1005: PetscLogFlops(12.0*a->nz);
1006: VecRestoreArrayRead(xx,&x);
1007: VecRestoreArray(zz,&y);
1008: return(0);
1009: }
1011: /* ------------------------------------------------------------------------------*/
1012: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1013: {
1014: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1015: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1016: const PetscScalar *x,*v;
1017: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1018: PetscErrorCode ierr;
1019: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1020: PetscInt nonzerorow=0,n,i,jrow,j;
1023: VecGetArrayRead(xx,&x);
1024: VecGetArray(yy,&y);
1025: idx = a->j;
1026: v = a->a;
1027: ii = a->i;
1029: for (i=0; i<m; i++) {
1030: jrow = ii[i];
1031: n = ii[i+1] - jrow;
1032: sum1 = 0.0;
1033: sum2 = 0.0;
1034: sum3 = 0.0;
1035: sum4 = 0.0;
1036: sum5 = 0.0;
1037: sum6 = 0.0;
1038: sum7 = 0.0;
1040: nonzerorow += (n>0);
1041: for (j=0; j<n; j++) {
1042: sum1 += v[jrow]*x[7*idx[jrow]];
1043: sum2 += v[jrow]*x[7*idx[jrow]+1];
1044: sum3 += v[jrow]*x[7*idx[jrow]+2];
1045: sum4 += v[jrow]*x[7*idx[jrow]+3];
1046: sum5 += v[jrow]*x[7*idx[jrow]+4];
1047: sum6 += v[jrow]*x[7*idx[jrow]+5];
1048: sum7 += v[jrow]*x[7*idx[jrow]+6];
1049: jrow++;
1050: }
1051: y[7*i] = sum1;
1052: y[7*i+1] = sum2;
1053: y[7*i+2] = sum3;
1054: y[7*i+3] = sum4;
1055: y[7*i+4] = sum5;
1056: y[7*i+5] = sum6;
1057: y[7*i+6] = sum7;
1058: }
1060: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1061: VecRestoreArrayRead(xx,&x);
1062: VecRestoreArray(yy,&y);
1063: return(0);
1064: }
1066: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1067: {
1068: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1069: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1070: const PetscScalar *x,*v;
1071: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1072: PetscErrorCode ierr;
1073: const PetscInt m = b->AIJ->rmap->n,*idx;
1074: PetscInt n,i;
1077: VecSet(yy,0.0);
1078: VecGetArrayRead(xx,&x);
1079: VecGetArray(yy,&y);
1081: for (i=0; i<m; i++) {
1082: idx = a->j + a->i[i];
1083: v = a->a + a->i[i];
1084: n = a->i[i+1] - a->i[i];
1085: alpha1 = x[7*i];
1086: alpha2 = x[7*i+1];
1087: alpha3 = x[7*i+2];
1088: alpha4 = x[7*i+3];
1089: alpha5 = x[7*i+4];
1090: alpha6 = x[7*i+5];
1091: alpha7 = x[7*i+6];
1092: while (n-->0) {
1093: y[7*(*idx)] += alpha1*(*v);
1094: y[7*(*idx)+1] += alpha2*(*v);
1095: y[7*(*idx)+2] += alpha3*(*v);
1096: y[7*(*idx)+3] += alpha4*(*v);
1097: y[7*(*idx)+4] += alpha5*(*v);
1098: y[7*(*idx)+5] += alpha6*(*v);
1099: y[7*(*idx)+6] += alpha7*(*v);
1100: idx++; v++;
1101: }
1102: }
1103: PetscLogFlops(14.0*a->nz);
1104: VecRestoreArrayRead(xx,&x);
1105: VecRestoreArray(yy,&y);
1106: return(0);
1107: }
1109: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1110: {
1111: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1112: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1113: const PetscScalar *x,*v;
1114: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1115: PetscErrorCode ierr;
1116: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1117: PetscInt n,i,jrow,j;
1120: if (yy != zz) {VecCopy(yy,zz);}
1121: VecGetArrayRead(xx,&x);
1122: VecGetArray(zz,&y);
1123: idx = a->j;
1124: v = a->a;
1125: ii = a->i;
1127: for (i=0; i<m; i++) {
1128: jrow = ii[i];
1129: n = ii[i+1] - jrow;
1130: sum1 = 0.0;
1131: sum2 = 0.0;
1132: sum3 = 0.0;
1133: sum4 = 0.0;
1134: sum5 = 0.0;
1135: sum6 = 0.0;
1136: sum7 = 0.0;
1137: for (j=0; j<n; j++) {
1138: sum1 += v[jrow]*x[7*idx[jrow]];
1139: sum2 += v[jrow]*x[7*idx[jrow]+1];
1140: sum3 += v[jrow]*x[7*idx[jrow]+2];
1141: sum4 += v[jrow]*x[7*idx[jrow]+3];
1142: sum5 += v[jrow]*x[7*idx[jrow]+4];
1143: sum6 += v[jrow]*x[7*idx[jrow]+5];
1144: sum7 += v[jrow]*x[7*idx[jrow]+6];
1145: jrow++;
1146: }
1147: y[7*i] += sum1;
1148: y[7*i+1] += sum2;
1149: y[7*i+2] += sum3;
1150: y[7*i+3] += sum4;
1151: y[7*i+4] += sum5;
1152: y[7*i+5] += sum6;
1153: y[7*i+6] += sum7;
1154: }
1156: PetscLogFlops(14.0*a->nz);
1157: VecRestoreArrayRead(xx,&x);
1158: VecRestoreArray(zz,&y);
1159: return(0);
1160: }
1162: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1163: {
1164: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1165: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1166: const PetscScalar *x,*v;
1167: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1168: PetscErrorCode ierr;
1169: const PetscInt m = b->AIJ->rmap->n,*idx;
1170: PetscInt n,i;
1173: if (yy != zz) {VecCopy(yy,zz);}
1174: VecGetArrayRead(xx,&x);
1175: VecGetArray(zz,&y);
1176: for (i=0; i<m; i++) {
1177: idx = a->j + a->i[i];
1178: v = a->a + a->i[i];
1179: n = a->i[i+1] - a->i[i];
1180: alpha1 = x[7*i];
1181: alpha2 = x[7*i+1];
1182: alpha3 = x[7*i+2];
1183: alpha4 = x[7*i+3];
1184: alpha5 = x[7*i+4];
1185: alpha6 = x[7*i+5];
1186: alpha7 = x[7*i+6];
1187: while (n-->0) {
1188: y[7*(*idx)] += alpha1*(*v);
1189: y[7*(*idx)+1] += alpha2*(*v);
1190: y[7*(*idx)+2] += alpha3*(*v);
1191: y[7*(*idx)+3] += alpha4*(*v);
1192: y[7*(*idx)+4] += alpha5*(*v);
1193: y[7*(*idx)+5] += alpha6*(*v);
1194: y[7*(*idx)+6] += alpha7*(*v);
1195: idx++; v++;
1196: }
1197: }
1198: PetscLogFlops(14.0*a->nz);
1199: VecRestoreArrayRead(xx,&x);
1200: VecRestoreArray(zz,&y);
1201: return(0);
1202: }
1204: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1205: {
1206: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1207: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1208: const PetscScalar *x,*v;
1209: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1210: PetscErrorCode ierr;
1211: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1212: PetscInt nonzerorow=0,n,i,jrow,j;
1215: VecGetArrayRead(xx,&x);
1216: VecGetArray(yy,&y);
1217: idx = a->j;
1218: v = a->a;
1219: ii = a->i;
1221: for (i=0; i<m; i++) {
1222: jrow = ii[i];
1223: n = ii[i+1] - jrow;
1224: sum1 = 0.0;
1225: sum2 = 0.0;
1226: sum3 = 0.0;
1227: sum4 = 0.0;
1228: sum5 = 0.0;
1229: sum6 = 0.0;
1230: sum7 = 0.0;
1231: sum8 = 0.0;
1233: nonzerorow += (n>0);
1234: for (j=0; j<n; j++) {
1235: sum1 += v[jrow]*x[8*idx[jrow]];
1236: sum2 += v[jrow]*x[8*idx[jrow]+1];
1237: sum3 += v[jrow]*x[8*idx[jrow]+2];
1238: sum4 += v[jrow]*x[8*idx[jrow]+3];
1239: sum5 += v[jrow]*x[8*idx[jrow]+4];
1240: sum6 += v[jrow]*x[8*idx[jrow]+5];
1241: sum7 += v[jrow]*x[8*idx[jrow]+6];
1242: sum8 += v[jrow]*x[8*idx[jrow]+7];
1243: jrow++;
1244: }
1245: y[8*i] = sum1;
1246: y[8*i+1] = sum2;
1247: y[8*i+2] = sum3;
1248: y[8*i+3] = sum4;
1249: y[8*i+4] = sum5;
1250: y[8*i+5] = sum6;
1251: y[8*i+6] = sum7;
1252: y[8*i+7] = sum8;
1253: }
1255: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1256: VecRestoreArrayRead(xx,&x);
1257: VecRestoreArray(yy,&y);
1258: return(0);
1259: }
1261: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1262: {
1263: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1264: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1265: const PetscScalar *x,*v;
1266: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1267: PetscErrorCode ierr;
1268: const PetscInt m = b->AIJ->rmap->n,*idx;
1269: PetscInt n,i;
1272: VecSet(yy,0.0);
1273: VecGetArrayRead(xx,&x);
1274: VecGetArray(yy,&y);
1276: for (i=0; i<m; i++) {
1277: idx = a->j + a->i[i];
1278: v = a->a + a->i[i];
1279: n = a->i[i+1] - a->i[i];
1280: alpha1 = x[8*i];
1281: alpha2 = x[8*i+1];
1282: alpha3 = x[8*i+2];
1283: alpha4 = x[8*i+3];
1284: alpha5 = x[8*i+4];
1285: alpha6 = x[8*i+5];
1286: alpha7 = x[8*i+6];
1287: alpha8 = x[8*i+7];
1288: while (n-->0) {
1289: y[8*(*idx)] += alpha1*(*v);
1290: y[8*(*idx)+1] += alpha2*(*v);
1291: y[8*(*idx)+2] += alpha3*(*v);
1292: y[8*(*idx)+3] += alpha4*(*v);
1293: y[8*(*idx)+4] += alpha5*(*v);
1294: y[8*(*idx)+5] += alpha6*(*v);
1295: y[8*(*idx)+6] += alpha7*(*v);
1296: y[8*(*idx)+7] += alpha8*(*v);
1297: idx++; v++;
1298: }
1299: }
1300: PetscLogFlops(16.0*a->nz);
1301: VecRestoreArrayRead(xx,&x);
1302: VecRestoreArray(yy,&y);
1303: return(0);
1304: }
1306: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1307: {
1308: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1309: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1310: const PetscScalar *x,*v;
1311: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1312: PetscErrorCode ierr;
1313: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1314: PetscInt n,i,jrow,j;
1317: if (yy != zz) {VecCopy(yy,zz);}
1318: VecGetArrayRead(xx,&x);
1319: VecGetArray(zz,&y);
1320: idx = a->j;
1321: v = a->a;
1322: ii = a->i;
1324: for (i=0; i<m; i++) {
1325: jrow = ii[i];
1326: n = ii[i+1] - jrow;
1327: sum1 = 0.0;
1328: sum2 = 0.0;
1329: sum3 = 0.0;
1330: sum4 = 0.0;
1331: sum5 = 0.0;
1332: sum6 = 0.0;
1333: sum7 = 0.0;
1334: sum8 = 0.0;
1335: for (j=0; j<n; j++) {
1336: sum1 += v[jrow]*x[8*idx[jrow]];
1337: sum2 += v[jrow]*x[8*idx[jrow]+1];
1338: sum3 += v[jrow]*x[8*idx[jrow]+2];
1339: sum4 += v[jrow]*x[8*idx[jrow]+3];
1340: sum5 += v[jrow]*x[8*idx[jrow]+4];
1341: sum6 += v[jrow]*x[8*idx[jrow]+5];
1342: sum7 += v[jrow]*x[8*idx[jrow]+6];
1343: sum8 += v[jrow]*x[8*idx[jrow]+7];
1344: jrow++;
1345: }
1346: y[8*i] += sum1;
1347: y[8*i+1] += sum2;
1348: y[8*i+2] += sum3;
1349: y[8*i+3] += sum4;
1350: y[8*i+4] += sum5;
1351: y[8*i+5] += sum6;
1352: y[8*i+6] += sum7;
1353: y[8*i+7] += sum8;
1354: }
1356: PetscLogFlops(16.0*a->nz);
1357: VecRestoreArrayRead(xx,&x);
1358: VecRestoreArray(zz,&y);
1359: return(0);
1360: }
1362: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1363: {
1364: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1365: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1366: const PetscScalar *x,*v;
1367: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1368: PetscErrorCode ierr;
1369: const PetscInt m = b->AIJ->rmap->n,*idx;
1370: PetscInt n,i;
1373: if (yy != zz) {VecCopy(yy,zz);}
1374: VecGetArrayRead(xx,&x);
1375: VecGetArray(zz,&y);
1376: for (i=0; i<m; i++) {
1377: idx = a->j + a->i[i];
1378: v = a->a + a->i[i];
1379: n = a->i[i+1] - a->i[i];
1380: alpha1 = x[8*i];
1381: alpha2 = x[8*i+1];
1382: alpha3 = x[8*i+2];
1383: alpha4 = x[8*i+3];
1384: alpha5 = x[8*i+4];
1385: alpha6 = x[8*i+5];
1386: alpha7 = x[8*i+6];
1387: alpha8 = x[8*i+7];
1388: while (n-->0) {
1389: y[8*(*idx)] += alpha1*(*v);
1390: y[8*(*idx)+1] += alpha2*(*v);
1391: y[8*(*idx)+2] += alpha3*(*v);
1392: y[8*(*idx)+3] += alpha4*(*v);
1393: y[8*(*idx)+4] += alpha5*(*v);
1394: y[8*(*idx)+5] += alpha6*(*v);
1395: y[8*(*idx)+6] += alpha7*(*v);
1396: y[8*(*idx)+7] += alpha8*(*v);
1397: idx++; v++;
1398: }
1399: }
1400: PetscLogFlops(16.0*a->nz);
1401: VecRestoreArrayRead(xx,&x);
1402: VecRestoreArray(zz,&y);
1403: return(0);
1404: }
1406: /* ------------------------------------------------------------------------------*/
1407: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1408: {
1409: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1410: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1411: const PetscScalar *x,*v;
1412: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1413: PetscErrorCode ierr;
1414: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1415: PetscInt nonzerorow=0,n,i,jrow,j;
1418: VecGetArrayRead(xx,&x);
1419: VecGetArray(yy,&y);
1420: idx = a->j;
1421: v = a->a;
1422: ii = a->i;
1424: for (i=0; i<m; i++) {
1425: jrow = ii[i];
1426: n = ii[i+1] - jrow;
1427: sum1 = 0.0;
1428: sum2 = 0.0;
1429: sum3 = 0.0;
1430: sum4 = 0.0;
1431: sum5 = 0.0;
1432: sum6 = 0.0;
1433: sum7 = 0.0;
1434: sum8 = 0.0;
1435: sum9 = 0.0;
1437: nonzerorow += (n>0);
1438: for (j=0; j<n; j++) {
1439: sum1 += v[jrow]*x[9*idx[jrow]];
1440: sum2 += v[jrow]*x[9*idx[jrow]+1];
1441: sum3 += v[jrow]*x[9*idx[jrow]+2];
1442: sum4 += v[jrow]*x[9*idx[jrow]+3];
1443: sum5 += v[jrow]*x[9*idx[jrow]+4];
1444: sum6 += v[jrow]*x[9*idx[jrow]+5];
1445: sum7 += v[jrow]*x[9*idx[jrow]+6];
1446: sum8 += v[jrow]*x[9*idx[jrow]+7];
1447: sum9 += v[jrow]*x[9*idx[jrow]+8];
1448: jrow++;
1449: }
1450: y[9*i] = sum1;
1451: y[9*i+1] = sum2;
1452: y[9*i+2] = sum3;
1453: y[9*i+3] = sum4;
1454: y[9*i+4] = sum5;
1455: y[9*i+5] = sum6;
1456: y[9*i+6] = sum7;
1457: y[9*i+7] = sum8;
1458: y[9*i+8] = sum9;
1459: }
1461: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1462: VecRestoreArrayRead(xx,&x);
1463: VecRestoreArray(yy,&y);
1464: return(0);
1465: }
1467: /* ------------------------------------------------------------------------------*/
1469: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1470: {
1471: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1472: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1473: const PetscScalar *x,*v;
1474: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1475: PetscErrorCode ierr;
1476: const PetscInt m = b->AIJ->rmap->n,*idx;
1477: PetscInt n,i;
1480: VecSet(yy,0.0);
1481: VecGetArrayRead(xx,&x);
1482: VecGetArray(yy,&y);
1484: for (i=0; i<m; i++) {
1485: idx = a->j + a->i[i];
1486: v = a->a + a->i[i];
1487: n = a->i[i+1] - a->i[i];
1488: alpha1 = x[9*i];
1489: alpha2 = x[9*i+1];
1490: alpha3 = x[9*i+2];
1491: alpha4 = x[9*i+3];
1492: alpha5 = x[9*i+4];
1493: alpha6 = x[9*i+5];
1494: alpha7 = x[9*i+6];
1495: alpha8 = x[9*i+7];
1496: alpha9 = x[9*i+8];
1497: while (n-->0) {
1498: y[9*(*idx)] += alpha1*(*v);
1499: y[9*(*idx)+1] += alpha2*(*v);
1500: y[9*(*idx)+2] += alpha3*(*v);
1501: y[9*(*idx)+3] += alpha4*(*v);
1502: y[9*(*idx)+4] += alpha5*(*v);
1503: y[9*(*idx)+5] += alpha6*(*v);
1504: y[9*(*idx)+6] += alpha7*(*v);
1505: y[9*(*idx)+7] += alpha8*(*v);
1506: y[9*(*idx)+8] += alpha9*(*v);
1507: idx++; v++;
1508: }
1509: }
1510: PetscLogFlops(18.0*a->nz);
1511: VecRestoreArrayRead(xx,&x);
1512: VecRestoreArray(yy,&y);
1513: return(0);
1514: }
1516: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1517: {
1518: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1519: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1520: const PetscScalar *x,*v;
1521: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1522: PetscErrorCode ierr;
1523: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1524: PetscInt n,i,jrow,j;
1527: if (yy != zz) {VecCopy(yy,zz);}
1528: VecGetArrayRead(xx,&x);
1529: VecGetArray(zz,&y);
1530: idx = a->j;
1531: v = a->a;
1532: ii = a->i;
1534: for (i=0; i<m; i++) {
1535: jrow = ii[i];
1536: n = ii[i+1] - jrow;
1537: sum1 = 0.0;
1538: sum2 = 0.0;
1539: sum3 = 0.0;
1540: sum4 = 0.0;
1541: sum5 = 0.0;
1542: sum6 = 0.0;
1543: sum7 = 0.0;
1544: sum8 = 0.0;
1545: sum9 = 0.0;
1546: for (j=0; j<n; j++) {
1547: sum1 += v[jrow]*x[9*idx[jrow]];
1548: sum2 += v[jrow]*x[9*idx[jrow]+1];
1549: sum3 += v[jrow]*x[9*idx[jrow]+2];
1550: sum4 += v[jrow]*x[9*idx[jrow]+3];
1551: sum5 += v[jrow]*x[9*idx[jrow]+4];
1552: sum6 += v[jrow]*x[9*idx[jrow]+5];
1553: sum7 += v[jrow]*x[9*idx[jrow]+6];
1554: sum8 += v[jrow]*x[9*idx[jrow]+7];
1555: sum9 += v[jrow]*x[9*idx[jrow]+8];
1556: jrow++;
1557: }
1558: y[9*i] += sum1;
1559: y[9*i+1] += sum2;
1560: y[9*i+2] += sum3;
1561: y[9*i+3] += sum4;
1562: y[9*i+4] += sum5;
1563: y[9*i+5] += sum6;
1564: y[9*i+6] += sum7;
1565: y[9*i+7] += sum8;
1566: y[9*i+8] += sum9;
1567: }
1569: PetscLogFlops(18.0*a->nz);
1570: VecRestoreArrayRead(xx,&x);
1571: VecRestoreArray(zz,&y);
1572: return(0);
1573: }
1575: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1576: {
1577: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1578: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1579: const PetscScalar *x,*v;
1580: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1581: PetscErrorCode ierr;
1582: const PetscInt m = b->AIJ->rmap->n,*idx;
1583: PetscInt n,i;
1586: if (yy != zz) {VecCopy(yy,zz);}
1587: VecGetArrayRead(xx,&x);
1588: VecGetArray(zz,&y);
1589: for (i=0; i<m; i++) {
1590: idx = a->j + a->i[i];
1591: v = a->a + a->i[i];
1592: n = a->i[i+1] - a->i[i];
1593: alpha1 = x[9*i];
1594: alpha2 = x[9*i+1];
1595: alpha3 = x[9*i+2];
1596: alpha4 = x[9*i+3];
1597: alpha5 = x[9*i+4];
1598: alpha6 = x[9*i+5];
1599: alpha7 = x[9*i+6];
1600: alpha8 = x[9*i+7];
1601: alpha9 = x[9*i+8];
1602: while (n-->0) {
1603: y[9*(*idx)] += alpha1*(*v);
1604: y[9*(*idx)+1] += alpha2*(*v);
1605: y[9*(*idx)+2] += alpha3*(*v);
1606: y[9*(*idx)+3] += alpha4*(*v);
1607: y[9*(*idx)+4] += alpha5*(*v);
1608: y[9*(*idx)+5] += alpha6*(*v);
1609: y[9*(*idx)+6] += alpha7*(*v);
1610: y[9*(*idx)+7] += alpha8*(*v);
1611: y[9*(*idx)+8] += alpha9*(*v);
1612: idx++; v++;
1613: }
1614: }
1615: PetscLogFlops(18.0*a->nz);
1616: VecRestoreArrayRead(xx,&x);
1617: VecRestoreArray(zz,&y);
1618: return(0);
1619: }
1620: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1621: {
1622: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1623: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1624: const PetscScalar *x,*v;
1625: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1626: PetscErrorCode ierr;
1627: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1628: PetscInt nonzerorow=0,n,i,jrow,j;
1631: VecGetArrayRead(xx,&x);
1632: VecGetArray(yy,&y);
1633: idx = a->j;
1634: v = a->a;
1635: ii = a->i;
1637: for (i=0; i<m; i++) {
1638: jrow = ii[i];
1639: n = ii[i+1] - jrow;
1640: sum1 = 0.0;
1641: sum2 = 0.0;
1642: sum3 = 0.0;
1643: sum4 = 0.0;
1644: sum5 = 0.0;
1645: sum6 = 0.0;
1646: sum7 = 0.0;
1647: sum8 = 0.0;
1648: sum9 = 0.0;
1649: sum10 = 0.0;
1651: nonzerorow += (n>0);
1652: for (j=0; j<n; j++) {
1653: sum1 += v[jrow]*x[10*idx[jrow]];
1654: sum2 += v[jrow]*x[10*idx[jrow]+1];
1655: sum3 += v[jrow]*x[10*idx[jrow]+2];
1656: sum4 += v[jrow]*x[10*idx[jrow]+3];
1657: sum5 += v[jrow]*x[10*idx[jrow]+4];
1658: sum6 += v[jrow]*x[10*idx[jrow]+5];
1659: sum7 += v[jrow]*x[10*idx[jrow]+6];
1660: sum8 += v[jrow]*x[10*idx[jrow]+7];
1661: sum9 += v[jrow]*x[10*idx[jrow]+8];
1662: sum10 += v[jrow]*x[10*idx[jrow]+9];
1663: jrow++;
1664: }
1665: y[10*i] = sum1;
1666: y[10*i+1] = sum2;
1667: y[10*i+2] = sum3;
1668: y[10*i+3] = sum4;
1669: y[10*i+4] = sum5;
1670: y[10*i+5] = sum6;
1671: y[10*i+6] = sum7;
1672: y[10*i+7] = sum8;
1673: y[10*i+8] = sum9;
1674: y[10*i+9] = sum10;
1675: }
1677: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1678: VecRestoreArrayRead(xx,&x);
1679: VecRestoreArray(yy,&y);
1680: return(0);
1681: }
1683: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1684: {
1685: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1686: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1687: const PetscScalar *x,*v;
1688: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1689: PetscErrorCode ierr;
1690: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1691: PetscInt n,i,jrow,j;
1694: if (yy != zz) {VecCopy(yy,zz);}
1695: VecGetArrayRead(xx,&x);
1696: VecGetArray(zz,&y);
1697: idx = a->j;
1698: v = a->a;
1699: ii = a->i;
1701: for (i=0; i<m; i++) {
1702: jrow = ii[i];
1703: n = ii[i+1] - jrow;
1704: sum1 = 0.0;
1705: sum2 = 0.0;
1706: sum3 = 0.0;
1707: sum4 = 0.0;
1708: sum5 = 0.0;
1709: sum6 = 0.0;
1710: sum7 = 0.0;
1711: sum8 = 0.0;
1712: sum9 = 0.0;
1713: sum10 = 0.0;
1714: for (j=0; j<n; j++) {
1715: sum1 += v[jrow]*x[10*idx[jrow]];
1716: sum2 += v[jrow]*x[10*idx[jrow]+1];
1717: sum3 += v[jrow]*x[10*idx[jrow]+2];
1718: sum4 += v[jrow]*x[10*idx[jrow]+3];
1719: sum5 += v[jrow]*x[10*idx[jrow]+4];
1720: sum6 += v[jrow]*x[10*idx[jrow]+5];
1721: sum7 += v[jrow]*x[10*idx[jrow]+6];
1722: sum8 += v[jrow]*x[10*idx[jrow]+7];
1723: sum9 += v[jrow]*x[10*idx[jrow]+8];
1724: sum10 += v[jrow]*x[10*idx[jrow]+9];
1725: jrow++;
1726: }
1727: y[10*i] += sum1;
1728: y[10*i+1] += sum2;
1729: y[10*i+2] += sum3;
1730: y[10*i+3] += sum4;
1731: y[10*i+4] += sum5;
1732: y[10*i+5] += sum6;
1733: y[10*i+6] += sum7;
1734: y[10*i+7] += sum8;
1735: y[10*i+8] += sum9;
1736: y[10*i+9] += sum10;
1737: }
1739: PetscLogFlops(20.0*a->nz);
1740: VecRestoreArrayRead(xx,&x);
1741: VecRestoreArray(yy,&y);
1742: return(0);
1743: }
1745: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1746: {
1747: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1748: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1749: const PetscScalar *x,*v;
1750: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1751: PetscErrorCode ierr;
1752: const PetscInt m = b->AIJ->rmap->n,*idx;
1753: PetscInt n,i;
1756: VecSet(yy,0.0);
1757: VecGetArrayRead(xx,&x);
1758: VecGetArray(yy,&y);
1760: for (i=0; i<m; i++) {
1761: idx = a->j + a->i[i];
1762: v = a->a + a->i[i];
1763: n = a->i[i+1] - a->i[i];
1764: alpha1 = x[10*i];
1765: alpha2 = x[10*i+1];
1766: alpha3 = x[10*i+2];
1767: alpha4 = x[10*i+3];
1768: alpha5 = x[10*i+4];
1769: alpha6 = x[10*i+5];
1770: alpha7 = x[10*i+6];
1771: alpha8 = x[10*i+7];
1772: alpha9 = x[10*i+8];
1773: alpha10 = x[10*i+9];
1774: while (n-->0) {
1775: y[10*(*idx)] += alpha1*(*v);
1776: y[10*(*idx)+1] += alpha2*(*v);
1777: y[10*(*idx)+2] += alpha3*(*v);
1778: y[10*(*idx)+3] += alpha4*(*v);
1779: y[10*(*idx)+4] += alpha5*(*v);
1780: y[10*(*idx)+5] += alpha6*(*v);
1781: y[10*(*idx)+6] += alpha7*(*v);
1782: y[10*(*idx)+7] += alpha8*(*v);
1783: y[10*(*idx)+8] += alpha9*(*v);
1784: y[10*(*idx)+9] += alpha10*(*v);
1785: idx++; v++;
1786: }
1787: }
1788: PetscLogFlops(20.0*a->nz);
1789: VecRestoreArrayRead(xx,&x);
1790: VecRestoreArray(yy,&y);
1791: return(0);
1792: }
1794: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1795: {
1796: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1797: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1798: const PetscScalar *x,*v;
1799: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1800: PetscErrorCode ierr;
1801: const PetscInt m = b->AIJ->rmap->n,*idx;
1802: PetscInt n,i;
1805: if (yy != zz) {VecCopy(yy,zz);}
1806: VecGetArrayRead(xx,&x);
1807: VecGetArray(zz,&y);
1808: for (i=0; i<m; i++) {
1809: idx = a->j + a->i[i];
1810: v = a->a + a->i[i];
1811: n = a->i[i+1] - a->i[i];
1812: alpha1 = x[10*i];
1813: alpha2 = x[10*i+1];
1814: alpha3 = x[10*i+2];
1815: alpha4 = x[10*i+3];
1816: alpha5 = x[10*i+4];
1817: alpha6 = x[10*i+5];
1818: alpha7 = x[10*i+6];
1819: alpha8 = x[10*i+7];
1820: alpha9 = x[10*i+8];
1821: alpha10 = x[10*i+9];
1822: while (n-->0) {
1823: y[10*(*idx)] += alpha1*(*v);
1824: y[10*(*idx)+1] += alpha2*(*v);
1825: y[10*(*idx)+2] += alpha3*(*v);
1826: y[10*(*idx)+3] += alpha4*(*v);
1827: y[10*(*idx)+4] += alpha5*(*v);
1828: y[10*(*idx)+5] += alpha6*(*v);
1829: y[10*(*idx)+6] += alpha7*(*v);
1830: y[10*(*idx)+7] += alpha8*(*v);
1831: y[10*(*idx)+8] += alpha9*(*v);
1832: y[10*(*idx)+9] += alpha10*(*v);
1833: idx++; v++;
1834: }
1835: }
1836: PetscLogFlops(20.0*a->nz);
1837: VecRestoreArrayRead(xx,&x);
1838: VecRestoreArray(zz,&y);
1839: return(0);
1840: }
1843: /*--------------------------------------------------------------------------------------------*/
1844: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1845: {
1846: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1847: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1848: const PetscScalar *x,*v;
1849: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1850: PetscErrorCode ierr;
1851: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1852: PetscInt nonzerorow=0,n,i,jrow,j;
1855: VecGetArrayRead(xx,&x);
1856: VecGetArray(yy,&y);
1857: idx = a->j;
1858: v = a->a;
1859: ii = a->i;
1861: for (i=0; i<m; i++) {
1862: jrow = ii[i];
1863: n = ii[i+1] - jrow;
1864: sum1 = 0.0;
1865: sum2 = 0.0;
1866: sum3 = 0.0;
1867: sum4 = 0.0;
1868: sum5 = 0.0;
1869: sum6 = 0.0;
1870: sum7 = 0.0;
1871: sum8 = 0.0;
1872: sum9 = 0.0;
1873: sum10 = 0.0;
1874: sum11 = 0.0;
1876: nonzerorow += (n>0);
1877: for (j=0; j<n; j++) {
1878: sum1 += v[jrow]*x[11*idx[jrow]];
1879: sum2 += v[jrow]*x[11*idx[jrow]+1];
1880: sum3 += v[jrow]*x[11*idx[jrow]+2];
1881: sum4 += v[jrow]*x[11*idx[jrow]+3];
1882: sum5 += v[jrow]*x[11*idx[jrow]+4];
1883: sum6 += v[jrow]*x[11*idx[jrow]+5];
1884: sum7 += v[jrow]*x[11*idx[jrow]+6];
1885: sum8 += v[jrow]*x[11*idx[jrow]+7];
1886: sum9 += v[jrow]*x[11*idx[jrow]+8];
1887: sum10 += v[jrow]*x[11*idx[jrow]+9];
1888: sum11 += v[jrow]*x[11*idx[jrow]+10];
1889: jrow++;
1890: }
1891: y[11*i] = sum1;
1892: y[11*i+1] = sum2;
1893: y[11*i+2] = sum3;
1894: y[11*i+3] = sum4;
1895: y[11*i+4] = sum5;
1896: y[11*i+5] = sum6;
1897: y[11*i+6] = sum7;
1898: y[11*i+7] = sum8;
1899: y[11*i+8] = sum9;
1900: y[11*i+9] = sum10;
1901: y[11*i+10] = sum11;
1902: }
1904: PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1905: VecRestoreArrayRead(xx,&x);
1906: VecRestoreArray(yy,&y);
1907: return(0);
1908: }
1910: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1911: {
1912: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1913: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1914: const PetscScalar *x,*v;
1915: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1916: PetscErrorCode ierr;
1917: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1918: PetscInt n,i,jrow,j;
1921: if (yy != zz) {VecCopy(yy,zz);}
1922: VecGetArrayRead(xx,&x);
1923: VecGetArray(zz,&y);
1924: idx = a->j;
1925: v = a->a;
1926: ii = a->i;
1928: for (i=0; i<m; i++) {
1929: jrow = ii[i];
1930: n = ii[i+1] - jrow;
1931: sum1 = 0.0;
1932: sum2 = 0.0;
1933: sum3 = 0.0;
1934: sum4 = 0.0;
1935: sum5 = 0.0;
1936: sum6 = 0.0;
1937: sum7 = 0.0;
1938: sum8 = 0.0;
1939: sum9 = 0.0;
1940: sum10 = 0.0;
1941: sum11 = 0.0;
1942: for (j=0; j<n; j++) {
1943: sum1 += v[jrow]*x[11*idx[jrow]];
1944: sum2 += v[jrow]*x[11*idx[jrow]+1];
1945: sum3 += v[jrow]*x[11*idx[jrow]+2];
1946: sum4 += v[jrow]*x[11*idx[jrow]+3];
1947: sum5 += v[jrow]*x[11*idx[jrow]+4];
1948: sum6 += v[jrow]*x[11*idx[jrow]+5];
1949: sum7 += v[jrow]*x[11*idx[jrow]+6];
1950: sum8 += v[jrow]*x[11*idx[jrow]+7];
1951: sum9 += v[jrow]*x[11*idx[jrow]+8];
1952: sum10 += v[jrow]*x[11*idx[jrow]+9];
1953: sum11 += v[jrow]*x[11*idx[jrow]+10];
1954: jrow++;
1955: }
1956: y[11*i] += sum1;
1957: y[11*i+1] += sum2;
1958: y[11*i+2] += sum3;
1959: y[11*i+3] += sum4;
1960: y[11*i+4] += sum5;
1961: y[11*i+5] += sum6;
1962: y[11*i+6] += sum7;
1963: y[11*i+7] += sum8;
1964: y[11*i+8] += sum9;
1965: y[11*i+9] += sum10;
1966: y[11*i+10] += sum11;
1967: }
1969: PetscLogFlops(22.0*a->nz);
1970: VecRestoreArrayRead(xx,&x);
1971: VecRestoreArray(yy,&y);
1972: return(0);
1973: }
1975: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1976: {
1977: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1978: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1979: const PetscScalar *x,*v;
1980: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1981: PetscErrorCode ierr;
1982: const PetscInt m = b->AIJ->rmap->n,*idx;
1983: PetscInt n,i;
1986: VecSet(yy,0.0);
1987: VecGetArrayRead(xx,&x);
1988: VecGetArray(yy,&y);
1990: for (i=0; i<m; i++) {
1991: idx = a->j + a->i[i];
1992: v = a->a + a->i[i];
1993: n = a->i[i+1] - a->i[i];
1994: alpha1 = x[11*i];
1995: alpha2 = x[11*i+1];
1996: alpha3 = x[11*i+2];
1997: alpha4 = x[11*i+3];
1998: alpha5 = x[11*i+4];
1999: alpha6 = x[11*i+5];
2000: alpha7 = x[11*i+6];
2001: alpha8 = x[11*i+7];
2002: alpha9 = x[11*i+8];
2003: alpha10 = x[11*i+9];
2004: alpha11 = x[11*i+10];
2005: while (n-->0) {
2006: y[11*(*idx)] += alpha1*(*v);
2007: y[11*(*idx)+1] += alpha2*(*v);
2008: y[11*(*idx)+2] += alpha3*(*v);
2009: y[11*(*idx)+3] += alpha4*(*v);
2010: y[11*(*idx)+4] += alpha5*(*v);
2011: y[11*(*idx)+5] += alpha6*(*v);
2012: y[11*(*idx)+6] += alpha7*(*v);
2013: y[11*(*idx)+7] += alpha8*(*v);
2014: y[11*(*idx)+8] += alpha9*(*v);
2015: y[11*(*idx)+9] += alpha10*(*v);
2016: y[11*(*idx)+10] += alpha11*(*v);
2017: idx++; v++;
2018: }
2019: }
2020: PetscLogFlops(22.0*a->nz);
2021: VecRestoreArrayRead(xx,&x);
2022: VecRestoreArray(yy,&y);
2023: return(0);
2024: }
2026: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2027: {
2028: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2029: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2030: const PetscScalar *x,*v;
2031: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2032: PetscErrorCode ierr;
2033: const PetscInt m = b->AIJ->rmap->n,*idx;
2034: PetscInt n,i;
2037: if (yy != zz) {VecCopy(yy,zz);}
2038: VecGetArrayRead(xx,&x);
2039: VecGetArray(zz,&y);
2040: for (i=0; i<m; i++) {
2041: idx = a->j + a->i[i];
2042: v = a->a + a->i[i];
2043: n = a->i[i+1] - a->i[i];
2044: alpha1 = x[11*i];
2045: alpha2 = x[11*i+1];
2046: alpha3 = x[11*i+2];
2047: alpha4 = x[11*i+3];
2048: alpha5 = x[11*i+4];
2049: alpha6 = x[11*i+5];
2050: alpha7 = x[11*i+6];
2051: alpha8 = x[11*i+7];
2052: alpha9 = x[11*i+8];
2053: alpha10 = x[11*i+9];
2054: alpha11 = x[11*i+10];
2055: while (n-->0) {
2056: y[11*(*idx)] += alpha1*(*v);
2057: y[11*(*idx)+1] += alpha2*(*v);
2058: y[11*(*idx)+2] += alpha3*(*v);
2059: y[11*(*idx)+3] += alpha4*(*v);
2060: y[11*(*idx)+4] += alpha5*(*v);
2061: y[11*(*idx)+5] += alpha6*(*v);
2062: y[11*(*idx)+6] += alpha7*(*v);
2063: y[11*(*idx)+7] += alpha8*(*v);
2064: y[11*(*idx)+8] += alpha9*(*v);
2065: y[11*(*idx)+9] += alpha10*(*v);
2066: y[11*(*idx)+10] += alpha11*(*v);
2067: idx++; v++;
2068: }
2069: }
2070: PetscLogFlops(22.0*a->nz);
2071: VecRestoreArrayRead(xx,&x);
2072: VecRestoreArray(zz,&y);
2073: return(0);
2074: }
2077: /*--------------------------------------------------------------------------------------------*/
2078: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2079: {
2080: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2081: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2082: const PetscScalar *x,*v;
2083: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2084: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2085: PetscErrorCode ierr;
2086: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2087: PetscInt nonzerorow=0,n,i,jrow,j;
2090: VecGetArrayRead(xx,&x);
2091: VecGetArray(yy,&y);
2092: idx = a->j;
2093: v = a->a;
2094: ii = a->i;
2096: for (i=0; i<m; i++) {
2097: jrow = ii[i];
2098: n = ii[i+1] - jrow;
2099: sum1 = 0.0;
2100: sum2 = 0.0;
2101: sum3 = 0.0;
2102: sum4 = 0.0;
2103: sum5 = 0.0;
2104: sum6 = 0.0;
2105: sum7 = 0.0;
2106: sum8 = 0.0;
2107: sum9 = 0.0;
2108: sum10 = 0.0;
2109: sum11 = 0.0;
2110: sum12 = 0.0;
2111: sum13 = 0.0;
2112: sum14 = 0.0;
2113: sum15 = 0.0;
2114: sum16 = 0.0;
2116: nonzerorow += (n>0);
2117: for (j=0; j<n; j++) {
2118: sum1 += v[jrow]*x[16*idx[jrow]];
2119: sum2 += v[jrow]*x[16*idx[jrow]+1];
2120: sum3 += v[jrow]*x[16*idx[jrow]+2];
2121: sum4 += v[jrow]*x[16*idx[jrow]+3];
2122: sum5 += v[jrow]*x[16*idx[jrow]+4];
2123: sum6 += v[jrow]*x[16*idx[jrow]+5];
2124: sum7 += v[jrow]*x[16*idx[jrow]+6];
2125: sum8 += v[jrow]*x[16*idx[jrow]+7];
2126: sum9 += v[jrow]*x[16*idx[jrow]+8];
2127: sum10 += v[jrow]*x[16*idx[jrow]+9];
2128: sum11 += v[jrow]*x[16*idx[jrow]+10];
2129: sum12 += v[jrow]*x[16*idx[jrow]+11];
2130: sum13 += v[jrow]*x[16*idx[jrow]+12];
2131: sum14 += v[jrow]*x[16*idx[jrow]+13];
2132: sum15 += v[jrow]*x[16*idx[jrow]+14];
2133: sum16 += v[jrow]*x[16*idx[jrow]+15];
2134: jrow++;
2135: }
2136: y[16*i] = sum1;
2137: y[16*i+1] = sum2;
2138: y[16*i+2] = sum3;
2139: y[16*i+3] = sum4;
2140: y[16*i+4] = sum5;
2141: y[16*i+5] = sum6;
2142: y[16*i+6] = sum7;
2143: y[16*i+7] = sum8;
2144: y[16*i+8] = sum9;
2145: y[16*i+9] = sum10;
2146: y[16*i+10] = sum11;
2147: y[16*i+11] = sum12;
2148: y[16*i+12] = sum13;
2149: y[16*i+13] = sum14;
2150: y[16*i+14] = sum15;
2151: y[16*i+15] = sum16;
2152: }
2154: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2155: VecRestoreArrayRead(xx,&x);
2156: VecRestoreArray(yy,&y);
2157: return(0);
2158: }
2160: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2161: {
2162: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2163: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2164: const PetscScalar *x,*v;
2165: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2166: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2167: PetscErrorCode ierr;
2168: const PetscInt m = b->AIJ->rmap->n,*idx;
2169: PetscInt n,i;
2172: VecSet(yy,0.0);
2173: VecGetArrayRead(xx,&x);
2174: VecGetArray(yy,&y);
2176: for (i=0; i<m; i++) {
2177: idx = a->j + a->i[i];
2178: v = a->a + a->i[i];
2179: n = a->i[i+1] - a->i[i];
2180: alpha1 = x[16*i];
2181: alpha2 = x[16*i+1];
2182: alpha3 = x[16*i+2];
2183: alpha4 = x[16*i+3];
2184: alpha5 = x[16*i+4];
2185: alpha6 = x[16*i+5];
2186: alpha7 = x[16*i+6];
2187: alpha8 = x[16*i+7];
2188: alpha9 = x[16*i+8];
2189: alpha10 = x[16*i+9];
2190: alpha11 = x[16*i+10];
2191: alpha12 = x[16*i+11];
2192: alpha13 = x[16*i+12];
2193: alpha14 = x[16*i+13];
2194: alpha15 = x[16*i+14];
2195: alpha16 = x[16*i+15];
2196: while (n-->0) {
2197: y[16*(*idx)] += alpha1*(*v);
2198: y[16*(*idx)+1] += alpha2*(*v);
2199: y[16*(*idx)+2] += alpha3*(*v);
2200: y[16*(*idx)+3] += alpha4*(*v);
2201: y[16*(*idx)+4] += alpha5*(*v);
2202: y[16*(*idx)+5] += alpha6*(*v);
2203: y[16*(*idx)+6] += alpha7*(*v);
2204: y[16*(*idx)+7] += alpha8*(*v);
2205: y[16*(*idx)+8] += alpha9*(*v);
2206: y[16*(*idx)+9] += alpha10*(*v);
2207: y[16*(*idx)+10] += alpha11*(*v);
2208: y[16*(*idx)+11] += alpha12*(*v);
2209: y[16*(*idx)+12] += alpha13*(*v);
2210: y[16*(*idx)+13] += alpha14*(*v);
2211: y[16*(*idx)+14] += alpha15*(*v);
2212: y[16*(*idx)+15] += alpha16*(*v);
2213: idx++; v++;
2214: }
2215: }
2216: PetscLogFlops(32.0*a->nz);
2217: VecRestoreArrayRead(xx,&x);
2218: VecRestoreArray(yy,&y);
2219: return(0);
2220: }
2222: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2223: {
2224: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2225: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2226: const PetscScalar *x,*v;
2227: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2228: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2229: PetscErrorCode ierr;
2230: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2231: PetscInt n,i,jrow,j;
2234: if (yy != zz) {VecCopy(yy,zz);}
2235: VecGetArrayRead(xx,&x);
2236: VecGetArray(zz,&y);
2237: idx = a->j;
2238: v = a->a;
2239: ii = a->i;
2241: for (i=0; i<m; i++) {
2242: jrow = ii[i];
2243: n = ii[i+1] - jrow;
2244: sum1 = 0.0;
2245: sum2 = 0.0;
2246: sum3 = 0.0;
2247: sum4 = 0.0;
2248: sum5 = 0.0;
2249: sum6 = 0.0;
2250: sum7 = 0.0;
2251: sum8 = 0.0;
2252: sum9 = 0.0;
2253: sum10 = 0.0;
2254: sum11 = 0.0;
2255: sum12 = 0.0;
2256: sum13 = 0.0;
2257: sum14 = 0.0;
2258: sum15 = 0.0;
2259: sum16 = 0.0;
2260: for (j=0; j<n; j++) {
2261: sum1 += v[jrow]*x[16*idx[jrow]];
2262: sum2 += v[jrow]*x[16*idx[jrow]+1];
2263: sum3 += v[jrow]*x[16*idx[jrow]+2];
2264: sum4 += v[jrow]*x[16*idx[jrow]+3];
2265: sum5 += v[jrow]*x[16*idx[jrow]+4];
2266: sum6 += v[jrow]*x[16*idx[jrow]+5];
2267: sum7 += v[jrow]*x[16*idx[jrow]+6];
2268: sum8 += v[jrow]*x[16*idx[jrow]+7];
2269: sum9 += v[jrow]*x[16*idx[jrow]+8];
2270: sum10 += v[jrow]*x[16*idx[jrow]+9];
2271: sum11 += v[jrow]*x[16*idx[jrow]+10];
2272: sum12 += v[jrow]*x[16*idx[jrow]+11];
2273: sum13 += v[jrow]*x[16*idx[jrow]+12];
2274: sum14 += v[jrow]*x[16*idx[jrow]+13];
2275: sum15 += v[jrow]*x[16*idx[jrow]+14];
2276: sum16 += v[jrow]*x[16*idx[jrow]+15];
2277: jrow++;
2278: }
2279: y[16*i] += sum1;
2280: y[16*i+1] += sum2;
2281: y[16*i+2] += sum3;
2282: y[16*i+3] += sum4;
2283: y[16*i+4] += sum5;
2284: y[16*i+5] += sum6;
2285: y[16*i+6] += sum7;
2286: y[16*i+7] += sum8;
2287: y[16*i+8] += sum9;
2288: y[16*i+9] += sum10;
2289: y[16*i+10] += sum11;
2290: y[16*i+11] += sum12;
2291: y[16*i+12] += sum13;
2292: y[16*i+13] += sum14;
2293: y[16*i+14] += sum15;
2294: y[16*i+15] += sum16;
2295: }
2297: PetscLogFlops(32.0*a->nz);
2298: VecRestoreArrayRead(xx,&x);
2299: VecRestoreArray(zz,&y);
2300: return(0);
2301: }
2303: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304: {
2305: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2306: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2307: const PetscScalar *x,*v;
2308: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2309: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2310: PetscErrorCode ierr;
2311: const PetscInt m = b->AIJ->rmap->n,*idx;
2312: PetscInt n,i;
2315: if (yy != zz) {VecCopy(yy,zz);}
2316: VecGetArrayRead(xx,&x);
2317: VecGetArray(zz,&y);
2318: for (i=0; i<m; i++) {
2319: idx = a->j + a->i[i];
2320: v = a->a + a->i[i];
2321: n = a->i[i+1] - a->i[i];
2322: alpha1 = x[16*i];
2323: alpha2 = x[16*i+1];
2324: alpha3 = x[16*i+2];
2325: alpha4 = x[16*i+3];
2326: alpha5 = x[16*i+4];
2327: alpha6 = x[16*i+5];
2328: alpha7 = x[16*i+6];
2329: alpha8 = x[16*i+7];
2330: alpha9 = x[16*i+8];
2331: alpha10 = x[16*i+9];
2332: alpha11 = x[16*i+10];
2333: alpha12 = x[16*i+11];
2334: alpha13 = x[16*i+12];
2335: alpha14 = x[16*i+13];
2336: alpha15 = x[16*i+14];
2337: alpha16 = x[16*i+15];
2338: while (n-->0) {
2339: y[16*(*idx)] += alpha1*(*v);
2340: y[16*(*idx)+1] += alpha2*(*v);
2341: y[16*(*idx)+2] += alpha3*(*v);
2342: y[16*(*idx)+3] += alpha4*(*v);
2343: y[16*(*idx)+4] += alpha5*(*v);
2344: y[16*(*idx)+5] += alpha6*(*v);
2345: y[16*(*idx)+6] += alpha7*(*v);
2346: y[16*(*idx)+7] += alpha8*(*v);
2347: y[16*(*idx)+8] += alpha9*(*v);
2348: y[16*(*idx)+9] += alpha10*(*v);
2349: y[16*(*idx)+10] += alpha11*(*v);
2350: y[16*(*idx)+11] += alpha12*(*v);
2351: y[16*(*idx)+12] += alpha13*(*v);
2352: y[16*(*idx)+13] += alpha14*(*v);
2353: y[16*(*idx)+14] += alpha15*(*v);
2354: y[16*(*idx)+15] += alpha16*(*v);
2355: idx++; v++;
2356: }
2357: }
2358: PetscLogFlops(32.0*a->nz);
2359: VecRestoreArrayRead(xx,&x);
2360: VecRestoreArray(zz,&y);
2361: return(0);
2362: }
2364: /*--------------------------------------------------------------------------------------------*/
2365: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2366: {
2367: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2368: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2369: const PetscScalar *x,*v;
2370: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2371: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2372: PetscErrorCode ierr;
2373: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2374: PetscInt nonzerorow=0,n,i,jrow,j;
2377: VecGetArrayRead(xx,&x);
2378: VecGetArray(yy,&y);
2379: idx = a->j;
2380: v = a->a;
2381: ii = a->i;
2383: for (i=0; i<m; i++) {
2384: jrow = ii[i];
2385: n = ii[i+1] - jrow;
2386: sum1 = 0.0;
2387: sum2 = 0.0;
2388: sum3 = 0.0;
2389: sum4 = 0.0;
2390: sum5 = 0.0;
2391: sum6 = 0.0;
2392: sum7 = 0.0;
2393: sum8 = 0.0;
2394: sum9 = 0.0;
2395: sum10 = 0.0;
2396: sum11 = 0.0;
2397: sum12 = 0.0;
2398: sum13 = 0.0;
2399: sum14 = 0.0;
2400: sum15 = 0.0;
2401: sum16 = 0.0;
2402: sum17 = 0.0;
2403: sum18 = 0.0;
2405: nonzerorow += (n>0);
2406: for (j=0; j<n; j++) {
2407: sum1 += v[jrow]*x[18*idx[jrow]];
2408: sum2 += v[jrow]*x[18*idx[jrow]+1];
2409: sum3 += v[jrow]*x[18*idx[jrow]+2];
2410: sum4 += v[jrow]*x[18*idx[jrow]+3];
2411: sum5 += v[jrow]*x[18*idx[jrow]+4];
2412: sum6 += v[jrow]*x[18*idx[jrow]+5];
2413: sum7 += v[jrow]*x[18*idx[jrow]+6];
2414: sum8 += v[jrow]*x[18*idx[jrow]+7];
2415: sum9 += v[jrow]*x[18*idx[jrow]+8];
2416: sum10 += v[jrow]*x[18*idx[jrow]+9];
2417: sum11 += v[jrow]*x[18*idx[jrow]+10];
2418: sum12 += v[jrow]*x[18*idx[jrow]+11];
2419: sum13 += v[jrow]*x[18*idx[jrow]+12];
2420: sum14 += v[jrow]*x[18*idx[jrow]+13];
2421: sum15 += v[jrow]*x[18*idx[jrow]+14];
2422: sum16 += v[jrow]*x[18*idx[jrow]+15];
2423: sum17 += v[jrow]*x[18*idx[jrow]+16];
2424: sum18 += v[jrow]*x[18*idx[jrow]+17];
2425: jrow++;
2426: }
2427: y[18*i] = sum1;
2428: y[18*i+1] = sum2;
2429: y[18*i+2] = sum3;
2430: y[18*i+3] = sum4;
2431: y[18*i+4] = sum5;
2432: y[18*i+5] = sum6;
2433: y[18*i+6] = sum7;
2434: y[18*i+7] = sum8;
2435: y[18*i+8] = sum9;
2436: y[18*i+9] = sum10;
2437: y[18*i+10] = sum11;
2438: y[18*i+11] = sum12;
2439: y[18*i+12] = sum13;
2440: y[18*i+13] = sum14;
2441: y[18*i+14] = sum15;
2442: y[18*i+15] = sum16;
2443: y[18*i+16] = sum17;
2444: y[18*i+17] = sum18;
2445: }
2447: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2448: VecRestoreArrayRead(xx,&x);
2449: VecRestoreArray(yy,&y);
2450: return(0);
2451: }
2453: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2454: {
2455: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2456: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2457: const PetscScalar *x,*v;
2458: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2459: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2460: PetscErrorCode ierr;
2461: const PetscInt m = b->AIJ->rmap->n,*idx;
2462: PetscInt n,i;
2465: VecSet(yy,0.0);
2466: VecGetArrayRead(xx,&x);
2467: VecGetArray(yy,&y);
2469: for (i=0; i<m; i++) {
2470: idx = a->j + a->i[i];
2471: v = a->a + a->i[i];
2472: n = a->i[i+1] - a->i[i];
2473: alpha1 = x[18*i];
2474: alpha2 = x[18*i+1];
2475: alpha3 = x[18*i+2];
2476: alpha4 = x[18*i+3];
2477: alpha5 = x[18*i+4];
2478: alpha6 = x[18*i+5];
2479: alpha7 = x[18*i+6];
2480: alpha8 = x[18*i+7];
2481: alpha9 = x[18*i+8];
2482: alpha10 = x[18*i+9];
2483: alpha11 = x[18*i+10];
2484: alpha12 = x[18*i+11];
2485: alpha13 = x[18*i+12];
2486: alpha14 = x[18*i+13];
2487: alpha15 = x[18*i+14];
2488: alpha16 = x[18*i+15];
2489: alpha17 = x[18*i+16];
2490: alpha18 = x[18*i+17];
2491: while (n-->0) {
2492: y[18*(*idx)] += alpha1*(*v);
2493: y[18*(*idx)+1] += alpha2*(*v);
2494: y[18*(*idx)+2] += alpha3*(*v);
2495: y[18*(*idx)+3] += alpha4*(*v);
2496: y[18*(*idx)+4] += alpha5*(*v);
2497: y[18*(*idx)+5] += alpha6*(*v);
2498: y[18*(*idx)+6] += alpha7*(*v);
2499: y[18*(*idx)+7] += alpha8*(*v);
2500: y[18*(*idx)+8] += alpha9*(*v);
2501: y[18*(*idx)+9] += alpha10*(*v);
2502: y[18*(*idx)+10] += alpha11*(*v);
2503: y[18*(*idx)+11] += alpha12*(*v);
2504: y[18*(*idx)+12] += alpha13*(*v);
2505: y[18*(*idx)+13] += alpha14*(*v);
2506: y[18*(*idx)+14] += alpha15*(*v);
2507: y[18*(*idx)+15] += alpha16*(*v);
2508: y[18*(*idx)+16] += alpha17*(*v);
2509: y[18*(*idx)+17] += alpha18*(*v);
2510: idx++; v++;
2511: }
2512: }
2513: PetscLogFlops(36.0*a->nz);
2514: VecRestoreArrayRead(xx,&x);
2515: VecRestoreArray(yy,&y);
2516: return(0);
2517: }
2519: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2520: {
2521: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2522: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2523: const PetscScalar *x,*v;
2524: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2525: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2526: PetscErrorCode ierr;
2527: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2528: PetscInt n,i,jrow,j;
2531: if (yy != zz) {VecCopy(yy,zz);}
2532: VecGetArrayRead(xx,&x);
2533: VecGetArray(zz,&y);
2534: idx = a->j;
2535: v = a->a;
2536: ii = a->i;
2538: for (i=0; i<m; i++) {
2539: jrow = ii[i];
2540: n = ii[i+1] - jrow;
2541: sum1 = 0.0;
2542: sum2 = 0.0;
2543: sum3 = 0.0;
2544: sum4 = 0.0;
2545: sum5 = 0.0;
2546: sum6 = 0.0;
2547: sum7 = 0.0;
2548: sum8 = 0.0;
2549: sum9 = 0.0;
2550: sum10 = 0.0;
2551: sum11 = 0.0;
2552: sum12 = 0.0;
2553: sum13 = 0.0;
2554: sum14 = 0.0;
2555: sum15 = 0.0;
2556: sum16 = 0.0;
2557: sum17 = 0.0;
2558: sum18 = 0.0;
2559: for (j=0; j<n; j++) {
2560: sum1 += v[jrow]*x[18*idx[jrow]];
2561: sum2 += v[jrow]*x[18*idx[jrow]+1];
2562: sum3 += v[jrow]*x[18*idx[jrow]+2];
2563: sum4 += v[jrow]*x[18*idx[jrow]+3];
2564: sum5 += v[jrow]*x[18*idx[jrow]+4];
2565: sum6 += v[jrow]*x[18*idx[jrow]+5];
2566: sum7 += v[jrow]*x[18*idx[jrow]+6];
2567: sum8 += v[jrow]*x[18*idx[jrow]+7];
2568: sum9 += v[jrow]*x[18*idx[jrow]+8];
2569: sum10 += v[jrow]*x[18*idx[jrow]+9];
2570: sum11 += v[jrow]*x[18*idx[jrow]+10];
2571: sum12 += v[jrow]*x[18*idx[jrow]+11];
2572: sum13 += v[jrow]*x[18*idx[jrow]+12];
2573: sum14 += v[jrow]*x[18*idx[jrow]+13];
2574: sum15 += v[jrow]*x[18*idx[jrow]+14];
2575: sum16 += v[jrow]*x[18*idx[jrow]+15];
2576: sum17 += v[jrow]*x[18*idx[jrow]+16];
2577: sum18 += v[jrow]*x[18*idx[jrow]+17];
2578: jrow++;
2579: }
2580: y[18*i] += sum1;
2581: y[18*i+1] += sum2;
2582: y[18*i+2] += sum3;
2583: y[18*i+3] += sum4;
2584: y[18*i+4] += sum5;
2585: y[18*i+5] += sum6;
2586: y[18*i+6] += sum7;
2587: y[18*i+7] += sum8;
2588: y[18*i+8] += sum9;
2589: y[18*i+9] += sum10;
2590: y[18*i+10] += sum11;
2591: y[18*i+11] += sum12;
2592: y[18*i+12] += sum13;
2593: y[18*i+13] += sum14;
2594: y[18*i+14] += sum15;
2595: y[18*i+15] += sum16;
2596: y[18*i+16] += sum17;
2597: y[18*i+17] += sum18;
2598: }
2600: PetscLogFlops(36.0*a->nz);
2601: VecRestoreArrayRead(xx,&x);
2602: VecRestoreArray(zz,&y);
2603: return(0);
2604: }
2606: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2607: {
2608: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2609: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2610: const PetscScalar *x,*v;
2611: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2612: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2613: PetscErrorCode ierr;
2614: const PetscInt m = b->AIJ->rmap->n,*idx;
2615: PetscInt n,i;
2618: if (yy != zz) {VecCopy(yy,zz);}
2619: VecGetArrayRead(xx,&x);
2620: VecGetArray(zz,&y);
2621: for (i=0; i<m; i++) {
2622: idx = a->j + a->i[i];
2623: v = a->a + a->i[i];
2624: n = a->i[i+1] - a->i[i];
2625: alpha1 = x[18*i];
2626: alpha2 = x[18*i+1];
2627: alpha3 = x[18*i+2];
2628: alpha4 = x[18*i+3];
2629: alpha5 = x[18*i+4];
2630: alpha6 = x[18*i+5];
2631: alpha7 = x[18*i+6];
2632: alpha8 = x[18*i+7];
2633: alpha9 = x[18*i+8];
2634: alpha10 = x[18*i+9];
2635: alpha11 = x[18*i+10];
2636: alpha12 = x[18*i+11];
2637: alpha13 = x[18*i+12];
2638: alpha14 = x[18*i+13];
2639: alpha15 = x[18*i+14];
2640: alpha16 = x[18*i+15];
2641: alpha17 = x[18*i+16];
2642: alpha18 = x[18*i+17];
2643: while (n-->0) {
2644: y[18*(*idx)] += alpha1*(*v);
2645: y[18*(*idx)+1] += alpha2*(*v);
2646: y[18*(*idx)+2] += alpha3*(*v);
2647: y[18*(*idx)+3] += alpha4*(*v);
2648: y[18*(*idx)+4] += alpha5*(*v);
2649: y[18*(*idx)+5] += alpha6*(*v);
2650: y[18*(*idx)+6] += alpha7*(*v);
2651: y[18*(*idx)+7] += alpha8*(*v);
2652: y[18*(*idx)+8] += alpha9*(*v);
2653: y[18*(*idx)+9] += alpha10*(*v);
2654: y[18*(*idx)+10] += alpha11*(*v);
2655: y[18*(*idx)+11] += alpha12*(*v);
2656: y[18*(*idx)+12] += alpha13*(*v);
2657: y[18*(*idx)+13] += alpha14*(*v);
2658: y[18*(*idx)+14] += alpha15*(*v);
2659: y[18*(*idx)+15] += alpha16*(*v);
2660: y[18*(*idx)+16] += alpha17*(*v);
2661: y[18*(*idx)+17] += alpha18*(*v);
2662: idx++; v++;
2663: }
2664: }
2665: PetscLogFlops(36.0*a->nz);
2666: VecRestoreArrayRead(xx,&x);
2667: VecRestoreArray(zz,&y);
2668: return(0);
2669: }
2671: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2672: {
2673: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2674: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2675: const PetscScalar *x,*v;
2676: PetscScalar *y,*sums;
2677: PetscErrorCode ierr;
2678: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2679: PetscInt n,i,jrow,j,dof = b->dof,k;
2682: VecGetArrayRead(xx,&x);
2683: VecSet(yy,0.0);
2684: VecGetArray(yy,&y);
2685: idx = a->j;
2686: v = a->a;
2687: ii = a->i;
2689: for (i=0; i<m; i++) {
2690: jrow = ii[i];
2691: n = ii[i+1] - jrow;
2692: sums = y + dof*i;
2693: for (j=0; j<n; j++) {
2694: for (k=0; k<dof; k++) {
2695: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2696: }
2697: jrow++;
2698: }
2699: }
2701: PetscLogFlops(2.0*dof*a->nz);
2702: VecRestoreArrayRead(xx,&x);
2703: VecRestoreArray(yy,&y);
2704: return(0);
2705: }
2707: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2708: {
2709: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2710: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2711: const PetscScalar *x,*v;
2712: PetscScalar *y,*sums;
2713: PetscErrorCode ierr;
2714: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2715: PetscInt n,i,jrow,j,dof = b->dof,k;
2718: if (yy != zz) {VecCopy(yy,zz);}
2719: VecGetArrayRead(xx,&x);
2720: VecGetArray(zz,&y);
2721: idx = a->j;
2722: v = a->a;
2723: ii = a->i;
2725: for (i=0; i<m; i++) {
2726: jrow = ii[i];
2727: n = ii[i+1] - jrow;
2728: sums = y + dof*i;
2729: for (j=0; j<n; j++) {
2730: for (k=0; k<dof; k++) {
2731: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2732: }
2733: jrow++;
2734: }
2735: }
2737: PetscLogFlops(2.0*dof*a->nz);
2738: VecRestoreArrayRead(xx,&x);
2739: VecRestoreArray(zz,&y);
2740: return(0);
2741: }
2743: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2744: {
2745: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2746: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2747: const PetscScalar *x,*v,*alpha;
2748: PetscScalar *y;
2749: PetscErrorCode ierr;
2750: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2751: PetscInt n,i,k;
2754: VecGetArrayRead(xx,&x);
2755: VecSet(yy,0.0);
2756: VecGetArray(yy,&y);
2757: for (i=0; i<m; i++) {
2758: idx = a->j + a->i[i];
2759: v = a->a + a->i[i];
2760: n = a->i[i+1] - a->i[i];
2761: alpha = x + dof*i;
2762: while (n-->0) {
2763: for (k=0; k<dof; k++) {
2764: y[dof*(*idx)+k] += alpha[k]*(*v);
2765: }
2766: idx++; v++;
2767: }
2768: }
2769: PetscLogFlops(2.0*dof*a->nz);
2770: VecRestoreArrayRead(xx,&x);
2771: VecRestoreArray(yy,&y);
2772: return(0);
2773: }
2775: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2776: {
2777: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2778: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2779: const PetscScalar *x,*v,*alpha;
2780: PetscScalar *y;
2781: PetscErrorCode ierr;
2782: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2783: PetscInt n,i,k;
2786: if (yy != zz) {VecCopy(yy,zz);}
2787: VecGetArrayRead(xx,&x);
2788: VecGetArray(zz,&y);
2789: for (i=0; i<m; i++) {
2790: idx = a->j + a->i[i];
2791: v = a->a + a->i[i];
2792: n = a->i[i+1] - a->i[i];
2793: alpha = x + dof*i;
2794: while (n-->0) {
2795: for (k=0; k<dof; k++) {
2796: y[dof*(*idx)+k] += alpha[k]*(*v);
2797: }
2798: idx++; v++;
2799: }
2800: }
2801: PetscLogFlops(2.0*dof*a->nz);
2802: VecRestoreArrayRead(xx,&x);
2803: VecRestoreArray(zz,&y);
2804: return(0);
2805: }
2807: /*===================================================================================*/
2808: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2809: {
2810: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2814: /* start the scatter */
2815: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2816: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2817: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2818: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2819: return(0);
2820: }
2822: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2823: {
2824: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2828: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2829: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2830: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2831: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2832: return(0);
2833: }
2835: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2836: {
2837: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2841: /* start the scatter */
2842: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2843: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2844: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2845: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2846: return(0);
2847: }
2849: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2850: {
2851: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2855: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2856: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2857: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2858: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2859: return(0);
2860: }
2862: /* ----------------------------------------------------------------*/
2863: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2864: {
2865: Mat_Product *product = C->product;
2868: if (product->type == MATPRODUCT_PtAP) {
2869: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2870: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2871: return(0);
2872: }
2874: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2875: {
2877: Mat_Product *product = C->product;
2878: PetscBool flg = PETSC_FALSE;
2879: Mat A=product->A,P=product->B;
2880: PetscInt alg=1; /* set default algorithm */
2881: #if !defined(PETSC_HAVE_HYPRE)
2882: const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2883: PetscInt nalg=4;
2884: #else
2885: const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2886: PetscInt nalg=5;
2887: #endif
2890: if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2892: /* PtAP */
2893: /* Check matrix local sizes */
2894: if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2895: if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2897: /* Set the default algorithm */
2898: PetscStrcmp(C->product->alg,"default",&flg);
2899: if (flg) {
2900: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2901: }
2903: /* Get runtime option */
2904: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2905: PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2906: if (flg) {
2907: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2908: }
2909: PetscOptionsEnd();
2911: PetscStrcmp(C->product->alg,"allatonce",&flg);
2912: if (flg) {
2913: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2914: return(0);
2915: }
2917: PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2918: if (flg) {
2919: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2920: return(0);
2921: }
2923: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2924: PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ");
2925: MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2926: (C->ops->productsetfromoptions)(C);
2927: return(0);
2928: }
2930: /* ----------------------------------------------------------------*/
2931: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2932: {
2933: PetscErrorCode ierr;
2934: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2935: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2936: Mat P =pp->AIJ;
2937: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2938: PetscInt *pti,*ptj,*ptJ;
2939: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2940: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2941: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2942: MatScalar *ca;
2943: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2946: /* Get ij structure of P^T */
2947: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2949: cn = pn*ppdof;
2950: /* Allocate ci array, arrays for fill computation and */
2951: /* free space for accumulating nonzero column info */
2952: PetscMalloc1(cn+1,&ci);
2953: ci[0] = 0;
2955: /* Work arrays for rows of P^T*A */
2956: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2957: PetscArrayzero(ptadenserow,an);
2958: PetscArrayzero(denserow,cn);
2960: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2961: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2962: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2963: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2964: current_space = free_space;
2966: /* Determine symbolic info for each row of C: */
2967: for (i=0; i<pn; i++) {
2968: ptnzi = pti[i+1] - pti[i];
2969: ptJ = ptj + pti[i];
2970: for (dof=0; dof<ppdof; dof++) {
2971: ptanzi = 0;
2972: /* Determine symbolic row of PtA: */
2973: for (j=0; j<ptnzi; j++) {
2974: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2975: arow = ptJ[j]*ppdof + dof;
2976: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2977: anzj = ai[arow+1] - ai[arow];
2978: ajj = aj + ai[arow];
2979: for (k=0; k<anzj; k++) {
2980: if (!ptadenserow[ajj[k]]) {
2981: ptadenserow[ajj[k]] = -1;
2982: ptasparserow[ptanzi++] = ajj[k];
2983: }
2984: }
2985: }
2986: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2987: ptaj = ptasparserow;
2988: cnzi = 0;
2989: for (j=0; j<ptanzi; j++) {
2990: /* Get offset within block of P */
2991: pshift = *ptaj%ppdof;
2992: /* Get block row of P */
2993: prow = (*ptaj++)/ppdof; /* integer division */
2994: /* P has same number of nonzeros per row as the compressed form */
2995: pnzj = pi[prow+1] - pi[prow];
2996: pjj = pj + pi[prow];
2997: for (k=0;k<pnzj;k++) {
2998: /* Locations in C are shifted by the offset within the block */
2999: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3000: if (!denserow[pjj[k]*ppdof+pshift]) {
3001: denserow[pjj[k]*ppdof+pshift] = -1;
3002: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3003: }
3004: }
3005: }
3007: /* sort sparserow */
3008: PetscSortInt(cnzi,sparserow);
3010: /* If free space is not available, make more free space */
3011: /* Double the amount of total space in the list */
3012: if (current_space->local_remaining<cnzi) {
3013: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
3014: }
3016: /* Copy data into free space, and zero out denserows */
3017: PetscArraycpy(current_space->array,sparserow,cnzi);
3019: current_space->array += cnzi;
3020: current_space->local_used += cnzi;
3021: current_space->local_remaining -= cnzi;
3023: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3024: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3026: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3027: /* For now, we will recompute what is needed. */
3028: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3029: }
3030: }
3031: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3032: /* Allocate space for cj, initialize cj, and */
3033: /* destroy list of free space and other temporary array(s) */
3034: PetscMalloc1(ci[cn]+1,&cj);
3035: PetscFreeSpaceContiguous(&free_space,cj);
3036: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3038: /* Allocate space for ca */
3039: PetscCalloc1(ci[cn]+1,&ca);
3041: /* put together the new matrix */
3042: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
3043: MatSetBlockSize(C,pp->dof);
3045: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3046: /* Since these are PETSc arrays, change flags to free them as necessary. */
3047: c = (Mat_SeqAIJ*)(C->data);
3048: c->free_a = PETSC_TRUE;
3049: c->free_ij = PETSC_TRUE;
3050: c->nonew = 0;
3052: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3053: C->ops->productnumeric = MatProductNumeric_PtAP;
3055: /* Clean up. */
3056: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3057: return(0);
3058: }
3060: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3061: {
3062: /* This routine requires testing -- first draft only */
3063: PetscErrorCode ierr;
3064: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3065: Mat P =pp->AIJ;
3066: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
3067: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3068: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3069: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3070: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3071: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3072: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3073: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3074: MatScalar *ca=c->a,*caj,*apa;
3077: /* Allocate temporary array for storage of one row of A*P */
3078: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3080: /* Clear old values in C */
3081: PetscArrayzero(ca,ci[cm]);
3083: for (i=0; i<am; i++) {
3084: /* Form sparse row of A*P */
3085: anzi = ai[i+1] - ai[i];
3086: apnzj = 0;
3087: for (j=0; j<anzi; j++) {
3088: /* Get offset within block of P */
3089: pshift = *aj%ppdof;
3090: /* Get block row of P */
3091: prow = *aj++/ppdof; /* integer division */
3092: pnzj = pi[prow+1] - pi[prow];
3093: pjj = pj + pi[prow];
3094: paj = pa + pi[prow];
3095: for (k=0; k<pnzj; k++) {
3096: poffset = pjj[k]*ppdof+pshift;
3097: if (!apjdense[poffset]) {
3098: apjdense[poffset] = -1;
3099: apj[apnzj++] = poffset;
3100: }
3101: apa[poffset] += (*aa)*paj[k];
3102: }
3103: PetscLogFlops(2.0*pnzj);
3104: aa++;
3105: }
3107: /* Sort the j index array for quick sparse axpy. */
3108: /* Note: a array does not need sorting as it is in dense storage locations. */
3109: PetscSortInt(apnzj,apj);
3111: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3112: prow = i/ppdof; /* integer division */
3113: pshift = i%ppdof;
3114: poffset = pi[prow];
3115: pnzi = pi[prow+1] - poffset;
3116: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3117: pJ = pj+poffset;
3118: pA = pa+poffset;
3119: for (j=0; j<pnzi; j++) {
3120: crow = (*pJ)*ppdof+pshift;
3121: cjj = cj + ci[crow];
3122: caj = ca + ci[crow];
3123: pJ++;
3124: /* Perform sparse axpy operation. Note cjj includes apj. */
3125: for (k=0,nextap=0; nextap<apnzj; k++) {
3126: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3127: }
3128: PetscLogFlops(2.0*apnzj);
3129: pA++;
3130: }
3132: /* Zero the current row info for A*P */
3133: for (j=0; j<apnzj; j++) {
3134: apa[apj[j]] = 0.;
3135: apjdense[apj[j]] = 0;
3136: }
3137: }
3139: /* Assemble the final matrix and clean up */
3140: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3141: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3142: PetscFree3(apa,apj,apjdense);
3143: return(0);
3144: }
3146: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3147: {
3148: PetscErrorCode ierr;
3149: Mat_Product *product = C->product;
3150: Mat A=product->A,P=product->B;
3153: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3154: return(0);
3155: }
3157: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3158: {
3160: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3161: return(0);
3162: }
3164: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3165: {
3167: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3168: return(0);
3169: }
3171: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3173: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3174: {
3175: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3176: PetscErrorCode ierr;
3180: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3181: return(0);
3182: }
3184: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3186: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3187: {
3188: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3189: PetscErrorCode ierr;
3192: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3193: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3194: return(0);
3195: }
3197: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3199: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3200: {
3201: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3202: PetscErrorCode ierr;
3206: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3207: return(0);
3208: }
3210: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3212: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3213: {
3214: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3215: PetscErrorCode ierr;
3219: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3220: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3221: return(0);
3222: }
3224: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3225: {
3226: PetscErrorCode ierr;
3227: Mat_Product *product = C->product;
3228: Mat A=product->A,P=product->B;
3229: PetscBool flg;
3232: PetscStrcmp(product->alg,"allatonce",&flg);
3233: if (flg) {
3234: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3235: C->ops->productnumeric = MatProductNumeric_PtAP;
3236: return(0);
3237: }
3239: PetscStrcmp(product->alg,"allatonce_merged",&flg);
3240: if (flg) {
3241: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3242: C->ops->productnumeric = MatProductNumeric_PtAP;
3243: return(0);
3244: }
3246: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3247: return(0);
3248: }
3250: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3251: {
3252: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3253: Mat a = b->AIJ,B;
3254: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3256: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3257: PetscInt *cols;
3258: PetscScalar *vals;
3261: MatGetSize(a,&m,&n);
3262: PetscMalloc1(dof*m,&ilen);
3263: for (i=0; i<m; i++) {
3264: nmax = PetscMax(nmax,aij->ilen[i]);
3265: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3266: }
3267: MatCreate(PETSC_COMM_SELF,&B);
3268: MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3269: MatSetType(B,newtype);
3270: MatSeqAIJSetPreallocation(B,0,ilen);
3271: PetscFree(ilen);
3272: PetscMalloc1(nmax,&icols);
3273: ii = 0;
3274: for (i=0; i<m; i++) {
3275: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3276: for (j=0; j<dof; j++) {
3277: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3278: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3279: ii++;
3280: }
3281: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3282: }
3283: PetscFree(icols);
3284: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3285: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3287: if (reuse == MAT_INPLACE_MATRIX) {
3288: MatHeaderReplace(A,&B);
3289: } else {
3290: *newmat = B;
3291: }
3292: return(0);
3293: }
3295: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3297: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3298: {
3299: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3300: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3301: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3302: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3303: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3304: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3305: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3306: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3307: PetscInt rstart,cstart,*garray,ii,k;
3309: PetscScalar *vals,*ovals;
3312: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3313: for (i=0; i<A->rmap->n/dof; i++) {
3314: nmax = PetscMax(nmax,AIJ->ilen[i]);
3315: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3316: for (j=0; j<dof; j++) {
3317: dnz[dof*i+j] = AIJ->ilen[i];
3318: onz[dof*i+j] = OAIJ->ilen[i];
3319: }
3320: }
3321: MatCreate(PetscObjectComm((PetscObject)A),&B);
3322: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3323: MatSetType(B,newtype);
3324: MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3325: MatSetBlockSize(B,dof);
3326: PetscFree2(dnz,onz);
3328: PetscMalloc2(nmax,&icols,onmax,&oicols);
3329: rstart = dof*maij->A->rmap->rstart;
3330: cstart = dof*maij->A->cmap->rstart;
3331: garray = mpiaij->garray;
3333: ii = rstart;
3334: for (i=0; i<A->rmap->n/dof; i++) {
3335: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3336: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3337: for (j=0; j<dof; j++) {
3338: for (k=0; k<ncols; k++) {
3339: icols[k] = cstart + dof*cols[k]+j;
3340: }
3341: for (k=0; k<oncols; k++) {
3342: oicols[k] = dof*garray[ocols[k]]+j;
3343: }
3344: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3345: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3346: ii++;
3347: }
3348: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3349: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3350: }
3351: PetscFree2(icols,oicols);
3353: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3354: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3356: if (reuse == MAT_INPLACE_MATRIX) {
3357: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3358: ((PetscObject)A)->refct = 1;
3360: MatHeaderReplace(A,&B);
3362: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3363: } else {
3364: *newmat = B;
3365: }
3366: return(0);
3367: }
3369: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3370: {
3372: Mat A;
3375: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3376: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3377: MatDestroy(&A);
3378: return(0);
3379: }
3381: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3382: {
3384: Mat A;
3387: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3388: MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3389: MatDestroy(&A);
3390: return(0);
3391: }
3393: /* ---------------------------------------------------------------------------------- */
3394: /*@
3395: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3396: operations for multicomponent problems. It interpolates each component the same
3397: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3398: and MATMPIAIJ for distributed matrices.
3400: Collective
3402: Input Parameters:
3403: + A - the AIJ matrix describing the action on blocks
3404: - dof - the block size (number of components per node)
3406: Output Parameter:
3407: . maij - the new MAIJ matrix
3409: Operations provided:
3410: + MatMult
3411: . MatMultTranspose
3412: . MatMultAdd
3413: . MatMultTransposeAdd
3414: - MatView
3416: Level: advanced
3418: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3419: @*/
3420: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3421: {
3423: PetscMPIInt size;
3424: PetscInt n;
3425: Mat B;
3426: #if defined(PETSC_HAVE_CUDA)
3427: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3428: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3429: #endif
3432: dof = PetscAbs(dof);
3433: PetscObjectReference((PetscObject)A);
3435: if (dof == 1) *maij = A;
3436: else {
3437: MatCreate(PetscObjectComm((PetscObject)A),&B);
3438: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3439: PetscLayoutSetBlockSize(B->rmap,dof);
3440: PetscLayoutSetBlockSize(B->cmap,dof);
3441: PetscLayoutSetUp(B->rmap);
3442: PetscLayoutSetUp(B->cmap);
3444: B->assembled = PETSC_TRUE;
3446: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3447: if (size == 1) {
3448: Mat_SeqMAIJ *b;
3450: MatSetType(B,MATSEQMAIJ);
3452: B->ops->setup = NULL;
3453: B->ops->destroy = MatDestroy_SeqMAIJ;
3454: B->ops->view = MatView_SeqMAIJ;
3456: b = (Mat_SeqMAIJ*)B->data;
3457: b->dof = dof;
3458: b->AIJ = A;
3460: if (dof == 2) {
3461: B->ops->mult = MatMult_SeqMAIJ_2;
3462: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3463: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3464: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3465: } else if (dof == 3) {
3466: B->ops->mult = MatMult_SeqMAIJ_3;
3467: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3468: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3469: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3470: } else if (dof == 4) {
3471: B->ops->mult = MatMult_SeqMAIJ_4;
3472: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3473: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3474: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3475: } else if (dof == 5) {
3476: B->ops->mult = MatMult_SeqMAIJ_5;
3477: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3478: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3479: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3480: } else if (dof == 6) {
3481: B->ops->mult = MatMult_SeqMAIJ_6;
3482: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3483: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3484: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3485: } else if (dof == 7) {
3486: B->ops->mult = MatMult_SeqMAIJ_7;
3487: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3488: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3489: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3490: } else if (dof == 8) {
3491: B->ops->mult = MatMult_SeqMAIJ_8;
3492: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3493: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3494: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3495: } else if (dof == 9) {
3496: B->ops->mult = MatMult_SeqMAIJ_9;
3497: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3498: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3499: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3500: } else if (dof == 10) {
3501: B->ops->mult = MatMult_SeqMAIJ_10;
3502: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3503: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3504: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3505: } else if (dof == 11) {
3506: B->ops->mult = MatMult_SeqMAIJ_11;
3507: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3508: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3509: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3510: } else if (dof == 16) {
3511: B->ops->mult = MatMult_SeqMAIJ_16;
3512: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3513: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3514: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3515: } else if (dof == 18) {
3516: B->ops->mult = MatMult_SeqMAIJ_18;
3517: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3518: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3519: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3520: } else {
3521: B->ops->mult = MatMult_SeqMAIJ_N;
3522: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3523: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3524: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3525: }
3526: #if defined(PETSC_HAVE_CUDA)
3527: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3528: #endif
3529: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3530: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3531: } else {
3532: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3533: Mat_MPIMAIJ *b;
3534: IS from,to;
3535: Vec gvec;
3537: MatSetType(B,MATMPIMAIJ);
3539: B->ops->setup = NULL;
3540: B->ops->destroy = MatDestroy_MPIMAIJ;
3541: B->ops->view = MatView_MPIMAIJ;
3543: b = (Mat_MPIMAIJ*)B->data;
3544: b->dof = dof;
3545: b->A = A;
3547: MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3548: MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);
3550: VecGetSize(mpiaij->lvec,&n);
3551: VecCreate(PETSC_COMM_SELF,&b->w);
3552: VecSetSizes(b->w,n*dof,n*dof);
3553: VecSetBlockSize(b->w,dof);
3554: VecSetType(b->w,VECSEQ);
3556: /* create two temporary Index sets for build scatter gather */
3557: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3558: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3560: /* create temporary global vector to generate scatter context */
3561: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3563: /* generate the scatter context */
3564: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3566: ISDestroy(&from);
3567: ISDestroy(&to);
3568: VecDestroy(&gvec);
3570: B->ops->mult = MatMult_MPIMAIJ_dof;
3571: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3572: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3573: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3575: #if defined(PETSC_HAVE_CUDA)
3576: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3577: #endif
3578: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3579: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3580: }
3581: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3582: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3583: MatSetFromOptions(B);
3584: MatSetUp(B);
3585: #if defined(PETSC_HAVE_CUDA)
3586: /* temporary until we have CUDA implementation of MAIJ */
3587: {
3588: PetscBool flg;
3589: if (convert) {
3590: PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3591: if (flg) {
3592: MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3593: }
3594: }
3595: }
3596: #endif
3597: *maij = B;
3598: MatViewFromOptions(B,NULL,"-mat_view");
3599: }
3600: return(0);
3601: }