Actual source code: maij.c
petsc-3.12.5 2020-03-29
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h>
20: #include <../src/mat/utils/freespace.h>
22: /*@
23: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
25: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
27: Input Parameter:
28: . A - the MAIJ matrix
30: Output Parameter:
31: . B - the AIJ matrix
33: Level: advanced
35: Notes:
36: The reference count on the AIJ matrix is not increased so you should not destroy it.
38: .seealso: MatCreateMAIJ()
39: @*/
40: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
41: {
43: PetscBool ismpimaij,isseqmaij;
46: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
47: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
48: if (ismpimaij) {
49: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
51: *B = b->A;
52: } else if (isseqmaij) {
53: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
55: *B = b->AIJ;
56: } else {
57: *B = A;
58: }
59: return(0);
60: }
62: /*@
63: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
65: Logically Collective
67: Input Parameter:
68: + A - the MAIJ matrix
69: - dof - the block size for the new matrix
71: Output Parameter:
72: . B - the new MAIJ matrix
74: Level: advanced
76: .seealso: MatCreateMAIJ()
77: @*/
78: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79: {
81: Mat Aij = NULL;
85: MatMAIJGetAIJ(A,&Aij);
86: MatCreateMAIJ(Aij,dof,B);
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91: {
93: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
96: MatDestroy(&b->AIJ);
97: PetscFree(A->data);
98: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
99: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaij_seqmaij_C",NULL);
100: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijperm_seqmaij_C",NULL);
101: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_seqaijmkl_seqmaij_C",NULL);
102: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqmaij_C",NULL);
103: return(0);
104: }
106: PetscErrorCode MatSetUp_MAIJ(Mat A)
107: {
109: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
110: return(0);
111: }
113: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
114: {
116: Mat B;
119: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
120: MatView(B,viewer);
121: MatDestroy(&B);
122: return(0);
123: }
125: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
126: {
128: Mat B;
131: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
132: MatView(B,viewer);
133: MatDestroy(&B);
134: return(0);
135: }
137: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
138: {
140: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
143: MatDestroy(&b->AIJ);
144: MatDestroy(&b->OAIJ);
145: MatDestroy(&b->A);
146: VecScatterDestroy(&b->ctx);
147: VecDestroy(&b->w);
148: PetscFree(A->data);
149: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
150: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",NULL);
151: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_mpimaij_C",NULL);
152: PetscObjectChangeTypeName((PetscObject)A,0);
153: return(0);
154: }
156: /*MC
157: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
158: multicomponent problems, interpolating or restricting each component the same way independently.
159: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
161: Operations provided:
162: + MatMult
163: . MatMultTranspose
164: . MatMultAdd
165: - MatMultTransposeAdd
167: Level: advanced
169: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
170: M*/
172: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
173: {
175: Mat_MPIMAIJ *b;
176: PetscMPIInt size;
179: PetscNewLog(A,&b);
180: A->data = (void*)b;
182: PetscMemzero(A->ops,sizeof(struct _MatOps));
184: A->ops->setup = MatSetUp_MAIJ;
186: b->AIJ = 0;
187: b->dof = 0;
188: b->OAIJ = 0;
189: b->ctx = 0;
190: b->w = 0;
191: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
192: if (size == 1) {
193: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
194: } else {
195: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
196: }
197: A->preallocated = PETSC_TRUE;
198: A->assembled = PETSC_TRUE;
199: return(0);
200: }
202: /* --------------------------------------------------------------------------------------*/
203: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
204: {
205: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
206: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
207: const PetscScalar *x,*v;
208: PetscScalar *y, sum1, sum2;
209: PetscErrorCode ierr;
210: PetscInt nonzerorow=0,n,i,jrow,j;
211: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
214: VecGetArrayRead(xx,&x);
215: VecGetArray(yy,&y);
216: idx = a->j;
217: v = a->a;
218: ii = a->i;
220: for (i=0; i<m; i++) {
221: jrow = ii[i];
222: n = ii[i+1] - jrow;
223: sum1 = 0.0;
224: sum2 = 0.0;
226: nonzerorow += (n>0);
227: for (j=0; j<n; j++) {
228: sum1 += v[jrow]*x[2*idx[jrow]];
229: sum2 += v[jrow]*x[2*idx[jrow]+1];
230: jrow++;
231: }
232: y[2*i] = sum1;
233: y[2*i+1] = sum2;
234: }
236: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
237: VecRestoreArrayRead(xx,&x);
238: VecRestoreArray(yy,&y);
239: return(0);
240: }
242: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
243: {
244: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
245: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
246: const PetscScalar *x,*v;
247: PetscScalar *y,alpha1,alpha2;
248: PetscErrorCode ierr;
249: PetscInt n,i;
250: const PetscInt m = b->AIJ->rmap->n,*idx;
253: VecSet(yy,0.0);
254: VecGetArrayRead(xx,&x);
255: VecGetArray(yy,&y);
257: for (i=0; i<m; i++) {
258: idx = a->j + a->i[i];
259: v = a->a + a->i[i];
260: n = a->i[i+1] - a->i[i];
261: alpha1 = x[2*i];
262: alpha2 = x[2*i+1];
263: while (n-->0) {
264: y[2*(*idx)] += alpha1*(*v);
265: y[2*(*idx)+1] += alpha2*(*v);
266: idx++; v++;
267: }
268: }
269: PetscLogFlops(4.0*a->nz);
270: VecRestoreArrayRead(xx,&x);
271: VecRestoreArray(yy,&y);
272: return(0);
273: }
275: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
276: {
277: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
278: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
279: const PetscScalar *x,*v;
280: PetscScalar *y,sum1, sum2;
281: PetscErrorCode ierr;
282: PetscInt n,i,jrow,j;
283: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
286: if (yy != zz) {VecCopy(yy,zz);}
287: VecGetArrayRead(xx,&x);
288: VecGetArray(zz,&y);
289: idx = a->j;
290: v = a->a;
291: ii = a->i;
293: for (i=0; i<m; i++) {
294: jrow = ii[i];
295: n = ii[i+1] - jrow;
296: sum1 = 0.0;
297: sum2 = 0.0;
298: for (j=0; j<n; j++) {
299: sum1 += v[jrow]*x[2*idx[jrow]];
300: sum2 += v[jrow]*x[2*idx[jrow]+1];
301: jrow++;
302: }
303: y[2*i] += sum1;
304: y[2*i+1] += sum2;
305: }
307: PetscLogFlops(4.0*a->nz);
308: VecRestoreArrayRead(xx,&x);
309: VecRestoreArray(zz,&y);
310: return(0);
311: }
312: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
313: {
314: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
315: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
316: const PetscScalar *x,*v;
317: PetscScalar *y,alpha1,alpha2;
318: PetscErrorCode ierr;
319: PetscInt n,i;
320: const PetscInt m = b->AIJ->rmap->n,*idx;
323: if (yy != zz) {VecCopy(yy,zz);}
324: VecGetArrayRead(xx,&x);
325: VecGetArray(zz,&y);
327: for (i=0; i<m; i++) {
328: idx = a->j + a->i[i];
329: v = a->a + a->i[i];
330: n = a->i[i+1] - a->i[i];
331: alpha1 = x[2*i];
332: alpha2 = x[2*i+1];
333: while (n-->0) {
334: y[2*(*idx)] += alpha1*(*v);
335: y[2*(*idx)+1] += alpha2*(*v);
336: idx++; v++;
337: }
338: }
339: PetscLogFlops(4.0*a->nz);
340: VecRestoreArrayRead(xx,&x);
341: VecRestoreArray(zz,&y);
342: return(0);
343: }
344: /* --------------------------------------------------------------------------------------*/
345: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
346: {
347: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
348: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
349: const PetscScalar *x,*v;
350: PetscScalar *y,sum1, sum2, sum3;
351: PetscErrorCode ierr;
352: PetscInt nonzerorow=0,n,i,jrow,j;
353: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
356: VecGetArrayRead(xx,&x);
357: VecGetArray(yy,&y);
358: idx = a->j;
359: v = a->a;
360: ii = a->i;
362: for (i=0; i<m; i++) {
363: jrow = ii[i];
364: n = ii[i+1] - jrow;
365: sum1 = 0.0;
366: sum2 = 0.0;
367: sum3 = 0.0;
369: nonzerorow += (n>0);
370: for (j=0; j<n; j++) {
371: sum1 += v[jrow]*x[3*idx[jrow]];
372: sum2 += v[jrow]*x[3*idx[jrow]+1];
373: sum3 += v[jrow]*x[3*idx[jrow]+2];
374: jrow++;
375: }
376: y[3*i] = sum1;
377: y[3*i+1] = sum2;
378: y[3*i+2] = sum3;
379: }
381: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
382: VecRestoreArrayRead(xx,&x);
383: VecRestoreArray(yy,&y);
384: return(0);
385: }
387: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
388: {
389: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
390: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
391: const PetscScalar *x,*v;
392: PetscScalar *y,alpha1,alpha2,alpha3;
393: PetscErrorCode ierr;
394: PetscInt n,i;
395: const PetscInt m = b->AIJ->rmap->n,*idx;
398: VecSet(yy,0.0);
399: VecGetArrayRead(xx,&x);
400: VecGetArray(yy,&y);
402: for (i=0; i<m; i++) {
403: idx = a->j + a->i[i];
404: v = a->a + a->i[i];
405: n = a->i[i+1] - a->i[i];
406: alpha1 = x[3*i];
407: alpha2 = x[3*i+1];
408: alpha3 = x[3*i+2];
409: while (n-->0) {
410: y[3*(*idx)] += alpha1*(*v);
411: y[3*(*idx)+1] += alpha2*(*v);
412: y[3*(*idx)+2] += alpha3*(*v);
413: idx++; v++;
414: }
415: }
416: PetscLogFlops(6.0*a->nz);
417: VecRestoreArrayRead(xx,&x);
418: VecRestoreArray(yy,&y);
419: return(0);
420: }
422: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
423: {
424: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
425: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
426: const PetscScalar *x,*v;
427: PetscScalar *y,sum1, sum2, sum3;
428: PetscErrorCode ierr;
429: PetscInt n,i,jrow,j;
430: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
433: if (yy != zz) {VecCopy(yy,zz);}
434: VecGetArrayRead(xx,&x);
435: VecGetArray(zz,&y);
436: idx = a->j;
437: v = a->a;
438: ii = a->i;
440: for (i=0; i<m; i++) {
441: jrow = ii[i];
442: n = ii[i+1] - jrow;
443: sum1 = 0.0;
444: sum2 = 0.0;
445: sum3 = 0.0;
446: for (j=0; j<n; j++) {
447: sum1 += v[jrow]*x[3*idx[jrow]];
448: sum2 += v[jrow]*x[3*idx[jrow]+1];
449: sum3 += v[jrow]*x[3*idx[jrow]+2];
450: jrow++;
451: }
452: y[3*i] += sum1;
453: y[3*i+1] += sum2;
454: y[3*i+2] += sum3;
455: }
457: PetscLogFlops(6.0*a->nz);
458: VecRestoreArrayRead(xx,&x);
459: VecRestoreArray(zz,&y);
460: return(0);
461: }
462: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
463: {
464: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
465: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
466: const PetscScalar *x,*v;
467: PetscScalar *y,alpha1,alpha2,alpha3;
468: PetscErrorCode ierr;
469: PetscInt n,i;
470: const PetscInt m = b->AIJ->rmap->n,*idx;
473: if (yy != zz) {VecCopy(yy,zz);}
474: VecGetArrayRead(xx,&x);
475: VecGetArray(zz,&y);
476: for (i=0; i<m; i++) {
477: idx = a->j + a->i[i];
478: v = a->a + a->i[i];
479: n = a->i[i+1] - a->i[i];
480: alpha1 = x[3*i];
481: alpha2 = x[3*i+1];
482: alpha3 = x[3*i+2];
483: while (n-->0) {
484: y[3*(*idx)] += alpha1*(*v);
485: y[3*(*idx)+1] += alpha2*(*v);
486: y[3*(*idx)+2] += alpha3*(*v);
487: idx++; v++;
488: }
489: }
490: PetscLogFlops(6.0*a->nz);
491: VecRestoreArrayRead(xx,&x);
492: VecRestoreArray(zz,&y);
493: return(0);
494: }
496: /* ------------------------------------------------------------------------------*/
497: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
498: {
499: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
500: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
501: const PetscScalar *x,*v;
502: PetscScalar *y,sum1, sum2, sum3, sum4;
503: PetscErrorCode ierr;
504: PetscInt nonzerorow=0,n,i,jrow,j;
505: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
508: VecGetArrayRead(xx,&x);
509: VecGetArray(yy,&y);
510: idx = a->j;
511: v = a->a;
512: ii = a->i;
514: for (i=0; i<m; i++) {
515: jrow = ii[i];
516: n = ii[i+1] - jrow;
517: sum1 = 0.0;
518: sum2 = 0.0;
519: sum3 = 0.0;
520: sum4 = 0.0;
521: nonzerorow += (n>0);
522: for (j=0; j<n; j++) {
523: sum1 += v[jrow]*x[4*idx[jrow]];
524: sum2 += v[jrow]*x[4*idx[jrow]+1];
525: sum3 += v[jrow]*x[4*idx[jrow]+2];
526: sum4 += v[jrow]*x[4*idx[jrow]+3];
527: jrow++;
528: }
529: y[4*i] = sum1;
530: y[4*i+1] = sum2;
531: y[4*i+2] = sum3;
532: y[4*i+3] = sum4;
533: }
535: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
536: VecRestoreArrayRead(xx,&x);
537: VecRestoreArray(yy,&y);
538: return(0);
539: }
541: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
542: {
543: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
544: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
545: const PetscScalar *x,*v;
546: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
547: PetscErrorCode ierr;
548: PetscInt n,i;
549: const PetscInt m = b->AIJ->rmap->n,*idx;
552: VecSet(yy,0.0);
553: VecGetArrayRead(xx,&x);
554: VecGetArray(yy,&y);
555: for (i=0; i<m; i++) {
556: idx = a->j + a->i[i];
557: v = a->a + a->i[i];
558: n = a->i[i+1] - a->i[i];
559: alpha1 = x[4*i];
560: alpha2 = x[4*i+1];
561: alpha3 = x[4*i+2];
562: alpha4 = x[4*i+3];
563: while (n-->0) {
564: y[4*(*idx)] += alpha1*(*v);
565: y[4*(*idx)+1] += alpha2*(*v);
566: y[4*(*idx)+2] += alpha3*(*v);
567: y[4*(*idx)+3] += alpha4*(*v);
568: idx++; v++;
569: }
570: }
571: PetscLogFlops(8.0*a->nz);
572: VecRestoreArrayRead(xx,&x);
573: VecRestoreArray(yy,&y);
574: return(0);
575: }
577: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
578: {
579: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
580: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
581: const PetscScalar *x,*v;
582: PetscScalar *y,sum1, sum2, sum3, sum4;
583: PetscErrorCode ierr;
584: PetscInt n,i,jrow,j;
585: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
588: if (yy != zz) {VecCopy(yy,zz);}
589: VecGetArrayRead(xx,&x);
590: VecGetArray(zz,&y);
591: idx = a->j;
592: v = a->a;
593: ii = a->i;
595: for (i=0; i<m; i++) {
596: jrow = ii[i];
597: n = ii[i+1] - jrow;
598: sum1 = 0.0;
599: sum2 = 0.0;
600: sum3 = 0.0;
601: sum4 = 0.0;
602: for (j=0; j<n; j++) {
603: sum1 += v[jrow]*x[4*idx[jrow]];
604: sum2 += v[jrow]*x[4*idx[jrow]+1];
605: sum3 += v[jrow]*x[4*idx[jrow]+2];
606: sum4 += v[jrow]*x[4*idx[jrow]+3];
607: jrow++;
608: }
609: y[4*i] += sum1;
610: y[4*i+1] += sum2;
611: y[4*i+2] += sum3;
612: y[4*i+3] += sum4;
613: }
615: PetscLogFlops(8.0*a->nz);
616: VecRestoreArrayRead(xx,&x);
617: VecRestoreArray(zz,&y);
618: return(0);
619: }
620: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
621: {
622: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
623: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
624: const PetscScalar *x,*v;
625: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
626: PetscErrorCode ierr;
627: PetscInt n,i;
628: const PetscInt m = b->AIJ->rmap->n,*idx;
631: if (yy != zz) {VecCopy(yy,zz);}
632: VecGetArrayRead(xx,&x);
633: VecGetArray(zz,&y);
635: for (i=0; i<m; i++) {
636: idx = a->j + a->i[i];
637: v = a->a + a->i[i];
638: n = a->i[i+1] - a->i[i];
639: alpha1 = x[4*i];
640: alpha2 = x[4*i+1];
641: alpha3 = x[4*i+2];
642: alpha4 = x[4*i+3];
643: while (n-->0) {
644: y[4*(*idx)] += alpha1*(*v);
645: y[4*(*idx)+1] += alpha2*(*v);
646: y[4*(*idx)+2] += alpha3*(*v);
647: y[4*(*idx)+3] += alpha4*(*v);
648: idx++; v++;
649: }
650: }
651: PetscLogFlops(8.0*a->nz);
652: VecRestoreArrayRead(xx,&x);
653: VecRestoreArray(zz,&y);
654: return(0);
655: }
656: /* ------------------------------------------------------------------------------*/
658: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
659: {
660: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
661: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
662: const PetscScalar *x,*v;
663: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
664: PetscErrorCode ierr;
665: PetscInt nonzerorow=0,n,i,jrow,j;
666: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
669: VecGetArrayRead(xx,&x);
670: VecGetArray(yy,&y);
671: idx = a->j;
672: v = a->a;
673: ii = a->i;
675: for (i=0; i<m; i++) {
676: jrow = ii[i];
677: n = ii[i+1] - jrow;
678: sum1 = 0.0;
679: sum2 = 0.0;
680: sum3 = 0.0;
681: sum4 = 0.0;
682: sum5 = 0.0;
684: nonzerorow += (n>0);
685: for (j=0; j<n; j++) {
686: sum1 += v[jrow]*x[5*idx[jrow]];
687: sum2 += v[jrow]*x[5*idx[jrow]+1];
688: sum3 += v[jrow]*x[5*idx[jrow]+2];
689: sum4 += v[jrow]*x[5*idx[jrow]+3];
690: sum5 += v[jrow]*x[5*idx[jrow]+4];
691: jrow++;
692: }
693: y[5*i] = sum1;
694: y[5*i+1] = sum2;
695: y[5*i+2] = sum3;
696: y[5*i+3] = sum4;
697: y[5*i+4] = sum5;
698: }
700: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
701: VecRestoreArrayRead(xx,&x);
702: VecRestoreArray(yy,&y);
703: return(0);
704: }
706: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
707: {
708: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
709: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
710: const PetscScalar *x,*v;
711: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
712: PetscErrorCode ierr;
713: PetscInt n,i;
714: const PetscInt m = b->AIJ->rmap->n,*idx;
717: VecSet(yy,0.0);
718: VecGetArrayRead(xx,&x);
719: VecGetArray(yy,&y);
721: for (i=0; i<m; i++) {
722: idx = a->j + a->i[i];
723: v = a->a + a->i[i];
724: n = a->i[i+1] - a->i[i];
725: alpha1 = x[5*i];
726: alpha2 = x[5*i+1];
727: alpha3 = x[5*i+2];
728: alpha4 = x[5*i+3];
729: alpha5 = x[5*i+4];
730: while (n-->0) {
731: y[5*(*idx)] += alpha1*(*v);
732: y[5*(*idx)+1] += alpha2*(*v);
733: y[5*(*idx)+2] += alpha3*(*v);
734: y[5*(*idx)+3] += alpha4*(*v);
735: y[5*(*idx)+4] += alpha5*(*v);
736: idx++; v++;
737: }
738: }
739: PetscLogFlops(10.0*a->nz);
740: VecRestoreArrayRead(xx,&x);
741: VecRestoreArray(yy,&y);
742: return(0);
743: }
745: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
746: {
747: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
748: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
749: const PetscScalar *x,*v;
750: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
751: PetscErrorCode ierr;
752: PetscInt n,i,jrow,j;
753: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
756: if (yy != zz) {VecCopy(yy,zz);}
757: VecGetArrayRead(xx,&x);
758: VecGetArray(zz,&y);
759: idx = a->j;
760: v = a->a;
761: ii = a->i;
763: for (i=0; i<m; i++) {
764: jrow = ii[i];
765: n = ii[i+1] - jrow;
766: sum1 = 0.0;
767: sum2 = 0.0;
768: sum3 = 0.0;
769: sum4 = 0.0;
770: sum5 = 0.0;
771: for (j=0; j<n; j++) {
772: sum1 += v[jrow]*x[5*idx[jrow]];
773: sum2 += v[jrow]*x[5*idx[jrow]+1];
774: sum3 += v[jrow]*x[5*idx[jrow]+2];
775: sum4 += v[jrow]*x[5*idx[jrow]+3];
776: sum5 += v[jrow]*x[5*idx[jrow]+4];
777: jrow++;
778: }
779: y[5*i] += sum1;
780: y[5*i+1] += sum2;
781: y[5*i+2] += sum3;
782: y[5*i+3] += sum4;
783: y[5*i+4] += sum5;
784: }
786: PetscLogFlops(10.0*a->nz);
787: VecRestoreArrayRead(xx,&x);
788: VecRestoreArray(zz,&y);
789: return(0);
790: }
792: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
793: {
794: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
795: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
796: const PetscScalar *x,*v;
797: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
798: PetscErrorCode ierr;
799: PetscInt n,i;
800: const PetscInt m = b->AIJ->rmap->n,*idx;
803: if (yy != zz) {VecCopy(yy,zz);}
804: VecGetArrayRead(xx,&x);
805: VecGetArray(zz,&y);
807: for (i=0; i<m; i++) {
808: idx = a->j + a->i[i];
809: v = a->a + a->i[i];
810: n = a->i[i+1] - a->i[i];
811: alpha1 = x[5*i];
812: alpha2 = x[5*i+1];
813: alpha3 = x[5*i+2];
814: alpha4 = x[5*i+3];
815: alpha5 = x[5*i+4];
816: while (n-->0) {
817: y[5*(*idx)] += alpha1*(*v);
818: y[5*(*idx)+1] += alpha2*(*v);
819: y[5*(*idx)+2] += alpha3*(*v);
820: y[5*(*idx)+3] += alpha4*(*v);
821: y[5*(*idx)+4] += alpha5*(*v);
822: idx++; v++;
823: }
824: }
825: PetscLogFlops(10.0*a->nz);
826: VecRestoreArrayRead(xx,&x);
827: VecRestoreArray(zz,&y);
828: return(0);
829: }
831: /* ------------------------------------------------------------------------------*/
832: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
833: {
834: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
835: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
836: const PetscScalar *x,*v;
837: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
838: PetscErrorCode ierr;
839: PetscInt nonzerorow=0,n,i,jrow,j;
840: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
843: VecGetArrayRead(xx,&x);
844: VecGetArray(yy,&y);
845: idx = a->j;
846: v = a->a;
847: ii = a->i;
849: for (i=0; i<m; i++) {
850: jrow = ii[i];
851: n = ii[i+1] - jrow;
852: sum1 = 0.0;
853: sum2 = 0.0;
854: sum3 = 0.0;
855: sum4 = 0.0;
856: sum5 = 0.0;
857: sum6 = 0.0;
859: nonzerorow += (n>0);
860: for (j=0; j<n; j++) {
861: sum1 += v[jrow]*x[6*idx[jrow]];
862: sum2 += v[jrow]*x[6*idx[jrow]+1];
863: sum3 += v[jrow]*x[6*idx[jrow]+2];
864: sum4 += v[jrow]*x[6*idx[jrow]+3];
865: sum5 += v[jrow]*x[6*idx[jrow]+4];
866: sum6 += v[jrow]*x[6*idx[jrow]+5];
867: jrow++;
868: }
869: y[6*i] = sum1;
870: y[6*i+1] = sum2;
871: y[6*i+2] = sum3;
872: y[6*i+3] = sum4;
873: y[6*i+4] = sum5;
874: y[6*i+5] = sum6;
875: }
877: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
878: VecRestoreArrayRead(xx,&x);
879: VecRestoreArray(yy,&y);
880: return(0);
881: }
883: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
884: {
885: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
886: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
887: const PetscScalar *x,*v;
888: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
889: PetscErrorCode ierr;
890: PetscInt n,i;
891: const PetscInt m = b->AIJ->rmap->n,*idx;
894: VecSet(yy,0.0);
895: VecGetArrayRead(xx,&x);
896: VecGetArray(yy,&y);
898: for (i=0; i<m; i++) {
899: idx = a->j + a->i[i];
900: v = a->a + a->i[i];
901: n = a->i[i+1] - a->i[i];
902: alpha1 = x[6*i];
903: alpha2 = x[6*i+1];
904: alpha3 = x[6*i+2];
905: alpha4 = x[6*i+3];
906: alpha5 = x[6*i+4];
907: alpha6 = x[6*i+5];
908: while (n-->0) {
909: y[6*(*idx)] += alpha1*(*v);
910: y[6*(*idx)+1] += alpha2*(*v);
911: y[6*(*idx)+2] += alpha3*(*v);
912: y[6*(*idx)+3] += alpha4*(*v);
913: y[6*(*idx)+4] += alpha5*(*v);
914: y[6*(*idx)+5] += alpha6*(*v);
915: idx++; v++;
916: }
917: }
918: PetscLogFlops(12.0*a->nz);
919: VecRestoreArrayRead(xx,&x);
920: VecRestoreArray(yy,&y);
921: return(0);
922: }
924: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
925: {
926: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
927: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
928: const PetscScalar *x,*v;
929: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
930: PetscErrorCode ierr;
931: PetscInt n,i,jrow,j;
932: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
935: if (yy != zz) {VecCopy(yy,zz);}
936: VecGetArrayRead(xx,&x);
937: VecGetArray(zz,&y);
938: idx = a->j;
939: v = a->a;
940: ii = a->i;
942: for (i=0; i<m; i++) {
943: jrow = ii[i];
944: n = ii[i+1] - jrow;
945: sum1 = 0.0;
946: sum2 = 0.0;
947: sum3 = 0.0;
948: sum4 = 0.0;
949: sum5 = 0.0;
950: sum6 = 0.0;
951: for (j=0; j<n; j++) {
952: sum1 += v[jrow]*x[6*idx[jrow]];
953: sum2 += v[jrow]*x[6*idx[jrow]+1];
954: sum3 += v[jrow]*x[6*idx[jrow]+2];
955: sum4 += v[jrow]*x[6*idx[jrow]+3];
956: sum5 += v[jrow]*x[6*idx[jrow]+4];
957: sum6 += v[jrow]*x[6*idx[jrow]+5];
958: jrow++;
959: }
960: y[6*i] += sum1;
961: y[6*i+1] += sum2;
962: y[6*i+2] += sum3;
963: y[6*i+3] += sum4;
964: y[6*i+4] += sum5;
965: y[6*i+5] += sum6;
966: }
968: PetscLogFlops(12.0*a->nz);
969: VecRestoreArrayRead(xx,&x);
970: VecRestoreArray(zz,&y);
971: return(0);
972: }
974: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
975: {
976: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
977: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
978: const PetscScalar *x,*v;
979: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
980: PetscErrorCode ierr;
981: PetscInt n,i;
982: const PetscInt m = b->AIJ->rmap->n,*idx;
985: if (yy != zz) {VecCopy(yy,zz);}
986: VecGetArrayRead(xx,&x);
987: VecGetArray(zz,&y);
989: for (i=0; i<m; i++) {
990: idx = a->j + a->i[i];
991: v = a->a + a->i[i];
992: n = a->i[i+1] - a->i[i];
993: alpha1 = x[6*i];
994: alpha2 = x[6*i+1];
995: alpha3 = x[6*i+2];
996: alpha4 = x[6*i+3];
997: alpha5 = x[6*i+4];
998: alpha6 = x[6*i+5];
999: while (n-->0) {
1000: y[6*(*idx)] += alpha1*(*v);
1001: y[6*(*idx)+1] += alpha2*(*v);
1002: y[6*(*idx)+2] += alpha3*(*v);
1003: y[6*(*idx)+3] += alpha4*(*v);
1004: y[6*(*idx)+4] += alpha5*(*v);
1005: y[6*(*idx)+5] += alpha6*(*v);
1006: idx++; v++;
1007: }
1008: }
1009: PetscLogFlops(12.0*a->nz);
1010: VecRestoreArrayRead(xx,&x);
1011: VecRestoreArray(zz,&y);
1012: return(0);
1013: }
1015: /* ------------------------------------------------------------------------------*/
1016: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1017: {
1018: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1019: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1020: const PetscScalar *x,*v;
1021: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1022: PetscErrorCode ierr;
1023: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1024: PetscInt nonzerorow=0,n,i,jrow,j;
1027: VecGetArrayRead(xx,&x);
1028: VecGetArray(yy,&y);
1029: idx = a->j;
1030: v = a->a;
1031: ii = a->i;
1033: for (i=0; i<m; i++) {
1034: jrow = ii[i];
1035: n = ii[i+1] - jrow;
1036: sum1 = 0.0;
1037: sum2 = 0.0;
1038: sum3 = 0.0;
1039: sum4 = 0.0;
1040: sum5 = 0.0;
1041: sum6 = 0.0;
1042: sum7 = 0.0;
1044: nonzerorow += (n>0);
1045: for (j=0; j<n; j++) {
1046: sum1 += v[jrow]*x[7*idx[jrow]];
1047: sum2 += v[jrow]*x[7*idx[jrow]+1];
1048: sum3 += v[jrow]*x[7*idx[jrow]+2];
1049: sum4 += v[jrow]*x[7*idx[jrow]+3];
1050: sum5 += v[jrow]*x[7*idx[jrow]+4];
1051: sum6 += v[jrow]*x[7*idx[jrow]+5];
1052: sum7 += v[jrow]*x[7*idx[jrow]+6];
1053: jrow++;
1054: }
1055: y[7*i] = sum1;
1056: y[7*i+1] = sum2;
1057: y[7*i+2] = sum3;
1058: y[7*i+3] = sum4;
1059: y[7*i+4] = sum5;
1060: y[7*i+5] = sum6;
1061: y[7*i+6] = sum7;
1062: }
1064: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1065: VecRestoreArrayRead(xx,&x);
1066: VecRestoreArray(yy,&y);
1067: return(0);
1068: }
1070: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1071: {
1072: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1073: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1074: const PetscScalar *x,*v;
1075: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1076: PetscErrorCode ierr;
1077: const PetscInt m = b->AIJ->rmap->n,*idx;
1078: PetscInt n,i;
1081: VecSet(yy,0.0);
1082: VecGetArrayRead(xx,&x);
1083: VecGetArray(yy,&y);
1085: for (i=0; i<m; i++) {
1086: idx = a->j + a->i[i];
1087: v = a->a + a->i[i];
1088: n = a->i[i+1] - a->i[i];
1089: alpha1 = x[7*i];
1090: alpha2 = x[7*i+1];
1091: alpha3 = x[7*i+2];
1092: alpha4 = x[7*i+3];
1093: alpha5 = x[7*i+4];
1094: alpha6 = x[7*i+5];
1095: alpha7 = x[7*i+6];
1096: while (n-->0) {
1097: y[7*(*idx)] += alpha1*(*v);
1098: y[7*(*idx)+1] += alpha2*(*v);
1099: y[7*(*idx)+2] += alpha3*(*v);
1100: y[7*(*idx)+3] += alpha4*(*v);
1101: y[7*(*idx)+4] += alpha5*(*v);
1102: y[7*(*idx)+5] += alpha6*(*v);
1103: y[7*(*idx)+6] += alpha7*(*v);
1104: idx++; v++;
1105: }
1106: }
1107: PetscLogFlops(14.0*a->nz);
1108: VecRestoreArrayRead(xx,&x);
1109: VecRestoreArray(yy,&y);
1110: return(0);
1111: }
1113: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1114: {
1115: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1116: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1117: const PetscScalar *x,*v;
1118: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1119: PetscErrorCode ierr;
1120: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1121: PetscInt n,i,jrow,j;
1124: if (yy != zz) {VecCopy(yy,zz);}
1125: VecGetArrayRead(xx,&x);
1126: VecGetArray(zz,&y);
1127: idx = a->j;
1128: v = a->a;
1129: ii = a->i;
1131: for (i=0; i<m; i++) {
1132: jrow = ii[i];
1133: n = ii[i+1] - jrow;
1134: sum1 = 0.0;
1135: sum2 = 0.0;
1136: sum3 = 0.0;
1137: sum4 = 0.0;
1138: sum5 = 0.0;
1139: sum6 = 0.0;
1140: sum7 = 0.0;
1141: for (j=0; j<n; j++) {
1142: sum1 += v[jrow]*x[7*idx[jrow]];
1143: sum2 += v[jrow]*x[7*idx[jrow]+1];
1144: sum3 += v[jrow]*x[7*idx[jrow]+2];
1145: sum4 += v[jrow]*x[7*idx[jrow]+3];
1146: sum5 += v[jrow]*x[7*idx[jrow]+4];
1147: sum6 += v[jrow]*x[7*idx[jrow]+5];
1148: sum7 += v[jrow]*x[7*idx[jrow]+6];
1149: jrow++;
1150: }
1151: y[7*i] += sum1;
1152: y[7*i+1] += sum2;
1153: y[7*i+2] += sum3;
1154: y[7*i+3] += sum4;
1155: y[7*i+4] += sum5;
1156: y[7*i+5] += sum6;
1157: y[7*i+6] += sum7;
1158: }
1160: PetscLogFlops(14.0*a->nz);
1161: VecRestoreArrayRead(xx,&x);
1162: VecRestoreArray(zz,&y);
1163: return(0);
1164: }
1166: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1167: {
1168: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1169: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1170: const PetscScalar *x,*v;
1171: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1172: PetscErrorCode ierr;
1173: const PetscInt m = b->AIJ->rmap->n,*idx;
1174: PetscInt n,i;
1177: if (yy != zz) {VecCopy(yy,zz);}
1178: VecGetArrayRead(xx,&x);
1179: VecGetArray(zz,&y);
1180: for (i=0; i<m; i++) {
1181: idx = a->j + a->i[i];
1182: v = a->a + a->i[i];
1183: n = a->i[i+1] - a->i[i];
1184: alpha1 = x[7*i];
1185: alpha2 = x[7*i+1];
1186: alpha3 = x[7*i+2];
1187: alpha4 = x[7*i+3];
1188: alpha5 = x[7*i+4];
1189: alpha6 = x[7*i+5];
1190: alpha7 = x[7*i+6];
1191: while (n-->0) {
1192: y[7*(*idx)] += alpha1*(*v);
1193: y[7*(*idx)+1] += alpha2*(*v);
1194: y[7*(*idx)+2] += alpha3*(*v);
1195: y[7*(*idx)+3] += alpha4*(*v);
1196: y[7*(*idx)+4] += alpha5*(*v);
1197: y[7*(*idx)+5] += alpha6*(*v);
1198: y[7*(*idx)+6] += alpha7*(*v);
1199: idx++; v++;
1200: }
1201: }
1202: PetscLogFlops(14.0*a->nz);
1203: VecRestoreArrayRead(xx,&x);
1204: VecRestoreArray(zz,&y);
1205: return(0);
1206: }
1208: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1209: {
1210: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1211: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1212: const PetscScalar *x,*v;
1213: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1214: PetscErrorCode ierr;
1215: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1216: PetscInt nonzerorow=0,n,i,jrow,j;
1219: VecGetArrayRead(xx,&x);
1220: VecGetArray(yy,&y);
1221: idx = a->j;
1222: v = a->a;
1223: ii = a->i;
1225: for (i=0; i<m; i++) {
1226: jrow = ii[i];
1227: n = ii[i+1] - jrow;
1228: sum1 = 0.0;
1229: sum2 = 0.0;
1230: sum3 = 0.0;
1231: sum4 = 0.0;
1232: sum5 = 0.0;
1233: sum6 = 0.0;
1234: sum7 = 0.0;
1235: sum8 = 0.0;
1237: nonzerorow += (n>0);
1238: for (j=0; j<n; j++) {
1239: sum1 += v[jrow]*x[8*idx[jrow]];
1240: sum2 += v[jrow]*x[8*idx[jrow]+1];
1241: sum3 += v[jrow]*x[8*idx[jrow]+2];
1242: sum4 += v[jrow]*x[8*idx[jrow]+3];
1243: sum5 += v[jrow]*x[8*idx[jrow]+4];
1244: sum6 += v[jrow]*x[8*idx[jrow]+5];
1245: sum7 += v[jrow]*x[8*idx[jrow]+6];
1246: sum8 += v[jrow]*x[8*idx[jrow]+7];
1247: jrow++;
1248: }
1249: y[8*i] = sum1;
1250: y[8*i+1] = sum2;
1251: y[8*i+2] = sum3;
1252: y[8*i+3] = sum4;
1253: y[8*i+4] = sum5;
1254: y[8*i+5] = sum6;
1255: y[8*i+6] = sum7;
1256: y[8*i+7] = sum8;
1257: }
1259: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1260: VecRestoreArrayRead(xx,&x);
1261: VecRestoreArray(yy,&y);
1262: return(0);
1263: }
1265: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1266: {
1267: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1268: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1269: const PetscScalar *x,*v;
1270: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1271: PetscErrorCode ierr;
1272: const PetscInt m = b->AIJ->rmap->n,*idx;
1273: PetscInt n,i;
1276: VecSet(yy,0.0);
1277: VecGetArrayRead(xx,&x);
1278: VecGetArray(yy,&y);
1280: for (i=0; i<m; i++) {
1281: idx = a->j + a->i[i];
1282: v = a->a + a->i[i];
1283: n = a->i[i+1] - a->i[i];
1284: alpha1 = x[8*i];
1285: alpha2 = x[8*i+1];
1286: alpha3 = x[8*i+2];
1287: alpha4 = x[8*i+3];
1288: alpha5 = x[8*i+4];
1289: alpha6 = x[8*i+5];
1290: alpha7 = x[8*i+6];
1291: alpha8 = x[8*i+7];
1292: while (n-->0) {
1293: y[8*(*idx)] += alpha1*(*v);
1294: y[8*(*idx)+1] += alpha2*(*v);
1295: y[8*(*idx)+2] += alpha3*(*v);
1296: y[8*(*idx)+3] += alpha4*(*v);
1297: y[8*(*idx)+4] += alpha5*(*v);
1298: y[8*(*idx)+5] += alpha6*(*v);
1299: y[8*(*idx)+6] += alpha7*(*v);
1300: y[8*(*idx)+7] += alpha8*(*v);
1301: idx++; v++;
1302: }
1303: }
1304: PetscLogFlops(16.0*a->nz);
1305: VecRestoreArrayRead(xx,&x);
1306: VecRestoreArray(yy,&y);
1307: return(0);
1308: }
1310: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1311: {
1312: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1313: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1314: const PetscScalar *x,*v;
1315: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1316: PetscErrorCode ierr;
1317: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1318: PetscInt n,i,jrow,j;
1321: if (yy != zz) {VecCopy(yy,zz);}
1322: VecGetArrayRead(xx,&x);
1323: VecGetArray(zz,&y);
1324: idx = a->j;
1325: v = a->a;
1326: ii = a->i;
1328: for (i=0; i<m; i++) {
1329: jrow = ii[i];
1330: n = ii[i+1] - jrow;
1331: sum1 = 0.0;
1332: sum2 = 0.0;
1333: sum3 = 0.0;
1334: sum4 = 0.0;
1335: sum5 = 0.0;
1336: sum6 = 0.0;
1337: sum7 = 0.0;
1338: sum8 = 0.0;
1339: for (j=0; j<n; j++) {
1340: sum1 += v[jrow]*x[8*idx[jrow]];
1341: sum2 += v[jrow]*x[8*idx[jrow]+1];
1342: sum3 += v[jrow]*x[8*idx[jrow]+2];
1343: sum4 += v[jrow]*x[8*idx[jrow]+3];
1344: sum5 += v[jrow]*x[8*idx[jrow]+4];
1345: sum6 += v[jrow]*x[8*idx[jrow]+5];
1346: sum7 += v[jrow]*x[8*idx[jrow]+6];
1347: sum8 += v[jrow]*x[8*idx[jrow]+7];
1348: jrow++;
1349: }
1350: y[8*i] += sum1;
1351: y[8*i+1] += sum2;
1352: y[8*i+2] += sum3;
1353: y[8*i+3] += sum4;
1354: y[8*i+4] += sum5;
1355: y[8*i+5] += sum6;
1356: y[8*i+6] += sum7;
1357: y[8*i+7] += sum8;
1358: }
1360: PetscLogFlops(16.0*a->nz);
1361: VecRestoreArrayRead(xx,&x);
1362: VecRestoreArray(zz,&y);
1363: return(0);
1364: }
1366: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1367: {
1368: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1369: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1370: const PetscScalar *x,*v;
1371: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1372: PetscErrorCode ierr;
1373: const PetscInt m = b->AIJ->rmap->n,*idx;
1374: PetscInt n,i;
1377: if (yy != zz) {VecCopy(yy,zz);}
1378: VecGetArrayRead(xx,&x);
1379: VecGetArray(zz,&y);
1380: for (i=0; i<m; i++) {
1381: idx = a->j + a->i[i];
1382: v = a->a + a->i[i];
1383: n = a->i[i+1] - a->i[i];
1384: alpha1 = x[8*i];
1385: alpha2 = x[8*i+1];
1386: alpha3 = x[8*i+2];
1387: alpha4 = x[8*i+3];
1388: alpha5 = x[8*i+4];
1389: alpha6 = x[8*i+5];
1390: alpha7 = x[8*i+6];
1391: alpha8 = x[8*i+7];
1392: while (n-->0) {
1393: y[8*(*idx)] += alpha1*(*v);
1394: y[8*(*idx)+1] += alpha2*(*v);
1395: y[8*(*idx)+2] += alpha3*(*v);
1396: y[8*(*idx)+3] += alpha4*(*v);
1397: y[8*(*idx)+4] += alpha5*(*v);
1398: y[8*(*idx)+5] += alpha6*(*v);
1399: y[8*(*idx)+6] += alpha7*(*v);
1400: y[8*(*idx)+7] += alpha8*(*v);
1401: idx++; v++;
1402: }
1403: }
1404: PetscLogFlops(16.0*a->nz);
1405: VecRestoreArrayRead(xx,&x);
1406: VecRestoreArray(zz,&y);
1407: return(0);
1408: }
1410: /* ------------------------------------------------------------------------------*/
1411: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1412: {
1413: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1414: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1415: const PetscScalar *x,*v;
1416: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1417: PetscErrorCode ierr;
1418: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1419: PetscInt nonzerorow=0,n,i,jrow,j;
1422: VecGetArrayRead(xx,&x);
1423: VecGetArray(yy,&y);
1424: idx = a->j;
1425: v = a->a;
1426: ii = a->i;
1428: for (i=0; i<m; i++) {
1429: jrow = ii[i];
1430: n = ii[i+1] - jrow;
1431: sum1 = 0.0;
1432: sum2 = 0.0;
1433: sum3 = 0.0;
1434: sum4 = 0.0;
1435: sum5 = 0.0;
1436: sum6 = 0.0;
1437: sum7 = 0.0;
1438: sum8 = 0.0;
1439: sum9 = 0.0;
1441: nonzerorow += (n>0);
1442: for (j=0; j<n; j++) {
1443: sum1 += v[jrow]*x[9*idx[jrow]];
1444: sum2 += v[jrow]*x[9*idx[jrow]+1];
1445: sum3 += v[jrow]*x[9*idx[jrow]+2];
1446: sum4 += v[jrow]*x[9*idx[jrow]+3];
1447: sum5 += v[jrow]*x[9*idx[jrow]+4];
1448: sum6 += v[jrow]*x[9*idx[jrow]+5];
1449: sum7 += v[jrow]*x[9*idx[jrow]+6];
1450: sum8 += v[jrow]*x[9*idx[jrow]+7];
1451: sum9 += v[jrow]*x[9*idx[jrow]+8];
1452: jrow++;
1453: }
1454: y[9*i] = sum1;
1455: y[9*i+1] = sum2;
1456: y[9*i+2] = sum3;
1457: y[9*i+3] = sum4;
1458: y[9*i+4] = sum5;
1459: y[9*i+5] = sum6;
1460: y[9*i+6] = sum7;
1461: y[9*i+7] = sum8;
1462: y[9*i+8] = sum9;
1463: }
1465: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1466: VecRestoreArrayRead(xx,&x);
1467: VecRestoreArray(yy,&y);
1468: return(0);
1469: }
1471: /* ------------------------------------------------------------------------------*/
1473: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1474: {
1475: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1476: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1477: const PetscScalar *x,*v;
1478: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1479: PetscErrorCode ierr;
1480: const PetscInt m = b->AIJ->rmap->n,*idx;
1481: PetscInt n,i;
1484: VecSet(yy,0.0);
1485: VecGetArrayRead(xx,&x);
1486: VecGetArray(yy,&y);
1488: for (i=0; i<m; i++) {
1489: idx = a->j + a->i[i];
1490: v = a->a + a->i[i];
1491: n = a->i[i+1] - a->i[i];
1492: alpha1 = x[9*i];
1493: alpha2 = x[9*i+1];
1494: alpha3 = x[9*i+2];
1495: alpha4 = x[9*i+3];
1496: alpha5 = x[9*i+4];
1497: alpha6 = x[9*i+5];
1498: alpha7 = x[9*i+6];
1499: alpha8 = x[9*i+7];
1500: alpha9 = x[9*i+8];
1501: while (n-->0) {
1502: y[9*(*idx)] += alpha1*(*v);
1503: y[9*(*idx)+1] += alpha2*(*v);
1504: y[9*(*idx)+2] += alpha3*(*v);
1505: y[9*(*idx)+3] += alpha4*(*v);
1506: y[9*(*idx)+4] += alpha5*(*v);
1507: y[9*(*idx)+5] += alpha6*(*v);
1508: y[9*(*idx)+6] += alpha7*(*v);
1509: y[9*(*idx)+7] += alpha8*(*v);
1510: y[9*(*idx)+8] += alpha9*(*v);
1511: idx++; v++;
1512: }
1513: }
1514: PetscLogFlops(18.0*a->nz);
1515: VecRestoreArrayRead(xx,&x);
1516: VecRestoreArray(yy,&y);
1517: return(0);
1518: }
1520: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1521: {
1522: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1523: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1524: const PetscScalar *x,*v;
1525: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1526: PetscErrorCode ierr;
1527: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1528: PetscInt n,i,jrow,j;
1531: if (yy != zz) {VecCopy(yy,zz);}
1532: VecGetArrayRead(xx,&x);
1533: VecGetArray(zz,&y);
1534: idx = a->j;
1535: v = a->a;
1536: ii = a->i;
1538: for (i=0; i<m; i++) {
1539: jrow = ii[i];
1540: n = ii[i+1] - jrow;
1541: sum1 = 0.0;
1542: sum2 = 0.0;
1543: sum3 = 0.0;
1544: sum4 = 0.0;
1545: sum5 = 0.0;
1546: sum6 = 0.0;
1547: sum7 = 0.0;
1548: sum8 = 0.0;
1549: sum9 = 0.0;
1550: for (j=0; j<n; j++) {
1551: sum1 += v[jrow]*x[9*idx[jrow]];
1552: sum2 += v[jrow]*x[9*idx[jrow]+1];
1553: sum3 += v[jrow]*x[9*idx[jrow]+2];
1554: sum4 += v[jrow]*x[9*idx[jrow]+3];
1555: sum5 += v[jrow]*x[9*idx[jrow]+4];
1556: sum6 += v[jrow]*x[9*idx[jrow]+5];
1557: sum7 += v[jrow]*x[9*idx[jrow]+6];
1558: sum8 += v[jrow]*x[9*idx[jrow]+7];
1559: sum9 += v[jrow]*x[9*idx[jrow]+8];
1560: jrow++;
1561: }
1562: y[9*i] += sum1;
1563: y[9*i+1] += sum2;
1564: y[9*i+2] += sum3;
1565: y[9*i+3] += sum4;
1566: y[9*i+4] += sum5;
1567: y[9*i+5] += sum6;
1568: y[9*i+6] += sum7;
1569: y[9*i+7] += sum8;
1570: y[9*i+8] += sum9;
1571: }
1573: PetscLogFlops(18*a->nz);
1574: VecRestoreArrayRead(xx,&x);
1575: VecRestoreArray(zz,&y);
1576: return(0);
1577: }
1579: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1580: {
1581: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1582: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1583: const PetscScalar *x,*v;
1584: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1585: PetscErrorCode ierr;
1586: const PetscInt m = b->AIJ->rmap->n,*idx;
1587: PetscInt n,i;
1590: if (yy != zz) {VecCopy(yy,zz);}
1591: VecGetArrayRead(xx,&x);
1592: VecGetArray(zz,&y);
1593: for (i=0; i<m; i++) {
1594: idx = a->j + a->i[i];
1595: v = a->a + a->i[i];
1596: n = a->i[i+1] - a->i[i];
1597: alpha1 = x[9*i];
1598: alpha2 = x[9*i+1];
1599: alpha3 = x[9*i+2];
1600: alpha4 = x[9*i+3];
1601: alpha5 = x[9*i+4];
1602: alpha6 = x[9*i+5];
1603: alpha7 = x[9*i+6];
1604: alpha8 = x[9*i+7];
1605: alpha9 = x[9*i+8];
1606: while (n-->0) {
1607: y[9*(*idx)] += alpha1*(*v);
1608: y[9*(*idx)+1] += alpha2*(*v);
1609: y[9*(*idx)+2] += alpha3*(*v);
1610: y[9*(*idx)+3] += alpha4*(*v);
1611: y[9*(*idx)+4] += alpha5*(*v);
1612: y[9*(*idx)+5] += alpha6*(*v);
1613: y[9*(*idx)+6] += alpha7*(*v);
1614: y[9*(*idx)+7] += alpha8*(*v);
1615: y[9*(*idx)+8] += alpha9*(*v);
1616: idx++; v++;
1617: }
1618: }
1619: PetscLogFlops(18.0*a->nz);
1620: VecRestoreArrayRead(xx,&x);
1621: VecRestoreArray(zz,&y);
1622: return(0);
1623: }
1624: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1625: {
1626: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1627: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1628: const PetscScalar *x,*v;
1629: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1630: PetscErrorCode ierr;
1631: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1632: PetscInt nonzerorow=0,n,i,jrow,j;
1635: VecGetArrayRead(xx,&x);
1636: VecGetArray(yy,&y);
1637: idx = a->j;
1638: v = a->a;
1639: ii = a->i;
1641: for (i=0; i<m; i++) {
1642: jrow = ii[i];
1643: n = ii[i+1] - jrow;
1644: sum1 = 0.0;
1645: sum2 = 0.0;
1646: sum3 = 0.0;
1647: sum4 = 0.0;
1648: sum5 = 0.0;
1649: sum6 = 0.0;
1650: sum7 = 0.0;
1651: sum8 = 0.0;
1652: sum9 = 0.0;
1653: sum10 = 0.0;
1655: nonzerorow += (n>0);
1656: for (j=0; j<n; j++) {
1657: sum1 += v[jrow]*x[10*idx[jrow]];
1658: sum2 += v[jrow]*x[10*idx[jrow]+1];
1659: sum3 += v[jrow]*x[10*idx[jrow]+2];
1660: sum4 += v[jrow]*x[10*idx[jrow]+3];
1661: sum5 += v[jrow]*x[10*idx[jrow]+4];
1662: sum6 += v[jrow]*x[10*idx[jrow]+5];
1663: sum7 += v[jrow]*x[10*idx[jrow]+6];
1664: sum8 += v[jrow]*x[10*idx[jrow]+7];
1665: sum9 += v[jrow]*x[10*idx[jrow]+8];
1666: sum10 += v[jrow]*x[10*idx[jrow]+9];
1667: jrow++;
1668: }
1669: y[10*i] = sum1;
1670: y[10*i+1] = sum2;
1671: y[10*i+2] = sum3;
1672: y[10*i+3] = sum4;
1673: y[10*i+4] = sum5;
1674: y[10*i+5] = sum6;
1675: y[10*i+6] = sum7;
1676: y[10*i+7] = sum8;
1677: y[10*i+8] = sum9;
1678: y[10*i+9] = sum10;
1679: }
1681: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1682: VecRestoreArrayRead(xx,&x);
1683: VecRestoreArray(yy,&y);
1684: return(0);
1685: }
1687: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1688: {
1689: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1690: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1691: const PetscScalar *x,*v;
1692: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1693: PetscErrorCode ierr;
1694: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1695: PetscInt n,i,jrow,j;
1698: if (yy != zz) {VecCopy(yy,zz);}
1699: VecGetArrayRead(xx,&x);
1700: VecGetArray(zz,&y);
1701: idx = a->j;
1702: v = a->a;
1703: ii = a->i;
1705: for (i=0; i<m; i++) {
1706: jrow = ii[i];
1707: n = ii[i+1] - jrow;
1708: sum1 = 0.0;
1709: sum2 = 0.0;
1710: sum3 = 0.0;
1711: sum4 = 0.0;
1712: sum5 = 0.0;
1713: sum6 = 0.0;
1714: sum7 = 0.0;
1715: sum8 = 0.0;
1716: sum9 = 0.0;
1717: sum10 = 0.0;
1718: for (j=0; j<n; j++) {
1719: sum1 += v[jrow]*x[10*idx[jrow]];
1720: sum2 += v[jrow]*x[10*idx[jrow]+1];
1721: sum3 += v[jrow]*x[10*idx[jrow]+2];
1722: sum4 += v[jrow]*x[10*idx[jrow]+3];
1723: sum5 += v[jrow]*x[10*idx[jrow]+4];
1724: sum6 += v[jrow]*x[10*idx[jrow]+5];
1725: sum7 += v[jrow]*x[10*idx[jrow]+6];
1726: sum8 += v[jrow]*x[10*idx[jrow]+7];
1727: sum9 += v[jrow]*x[10*idx[jrow]+8];
1728: sum10 += v[jrow]*x[10*idx[jrow]+9];
1729: jrow++;
1730: }
1731: y[10*i] += sum1;
1732: y[10*i+1] += sum2;
1733: y[10*i+2] += sum3;
1734: y[10*i+3] += sum4;
1735: y[10*i+4] += sum5;
1736: y[10*i+5] += sum6;
1737: y[10*i+6] += sum7;
1738: y[10*i+7] += sum8;
1739: y[10*i+8] += sum9;
1740: y[10*i+9] += sum10;
1741: }
1743: PetscLogFlops(20.0*a->nz);
1744: VecRestoreArrayRead(xx,&x);
1745: VecRestoreArray(yy,&y);
1746: return(0);
1747: }
1749: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1750: {
1751: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1752: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1753: const PetscScalar *x,*v;
1754: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1755: PetscErrorCode ierr;
1756: const PetscInt m = b->AIJ->rmap->n,*idx;
1757: PetscInt n,i;
1760: VecSet(yy,0.0);
1761: VecGetArrayRead(xx,&x);
1762: VecGetArray(yy,&y);
1764: for (i=0; i<m; i++) {
1765: idx = a->j + a->i[i];
1766: v = a->a + a->i[i];
1767: n = a->i[i+1] - a->i[i];
1768: alpha1 = x[10*i];
1769: alpha2 = x[10*i+1];
1770: alpha3 = x[10*i+2];
1771: alpha4 = x[10*i+3];
1772: alpha5 = x[10*i+4];
1773: alpha6 = x[10*i+5];
1774: alpha7 = x[10*i+6];
1775: alpha8 = x[10*i+7];
1776: alpha9 = x[10*i+8];
1777: alpha10 = x[10*i+9];
1778: while (n-->0) {
1779: y[10*(*idx)] += alpha1*(*v);
1780: y[10*(*idx)+1] += alpha2*(*v);
1781: y[10*(*idx)+2] += alpha3*(*v);
1782: y[10*(*idx)+3] += alpha4*(*v);
1783: y[10*(*idx)+4] += alpha5*(*v);
1784: y[10*(*idx)+5] += alpha6*(*v);
1785: y[10*(*idx)+6] += alpha7*(*v);
1786: y[10*(*idx)+7] += alpha8*(*v);
1787: y[10*(*idx)+8] += alpha9*(*v);
1788: y[10*(*idx)+9] += alpha10*(*v);
1789: idx++; v++;
1790: }
1791: }
1792: PetscLogFlops(20.0*a->nz);
1793: VecRestoreArrayRead(xx,&x);
1794: VecRestoreArray(yy,&y);
1795: return(0);
1796: }
1798: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1799: {
1800: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1801: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1802: const PetscScalar *x,*v;
1803: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1804: PetscErrorCode ierr;
1805: const PetscInt m = b->AIJ->rmap->n,*idx;
1806: PetscInt n,i;
1809: if (yy != zz) {VecCopy(yy,zz);}
1810: VecGetArrayRead(xx,&x);
1811: VecGetArray(zz,&y);
1812: for (i=0; i<m; i++) {
1813: idx = a->j + a->i[i];
1814: v = a->a + a->i[i];
1815: n = a->i[i+1] - a->i[i];
1816: alpha1 = x[10*i];
1817: alpha2 = x[10*i+1];
1818: alpha3 = x[10*i+2];
1819: alpha4 = x[10*i+3];
1820: alpha5 = x[10*i+4];
1821: alpha6 = x[10*i+5];
1822: alpha7 = x[10*i+6];
1823: alpha8 = x[10*i+7];
1824: alpha9 = x[10*i+8];
1825: alpha10 = x[10*i+9];
1826: while (n-->0) {
1827: y[10*(*idx)] += alpha1*(*v);
1828: y[10*(*idx)+1] += alpha2*(*v);
1829: y[10*(*idx)+2] += alpha3*(*v);
1830: y[10*(*idx)+3] += alpha4*(*v);
1831: y[10*(*idx)+4] += alpha5*(*v);
1832: y[10*(*idx)+5] += alpha6*(*v);
1833: y[10*(*idx)+6] += alpha7*(*v);
1834: y[10*(*idx)+7] += alpha8*(*v);
1835: y[10*(*idx)+8] += alpha9*(*v);
1836: y[10*(*idx)+9] += alpha10*(*v);
1837: idx++; v++;
1838: }
1839: }
1840: PetscLogFlops(20.0*a->nz);
1841: VecRestoreArrayRead(xx,&x);
1842: VecRestoreArray(zz,&y);
1843: return(0);
1844: }
1847: /*--------------------------------------------------------------------------------------------*/
1848: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1849: {
1850: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1851: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1852: const PetscScalar *x,*v;
1853: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1854: PetscErrorCode ierr;
1855: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1856: PetscInt nonzerorow=0,n,i,jrow,j;
1859: VecGetArrayRead(xx,&x);
1860: VecGetArray(yy,&y);
1861: idx = a->j;
1862: v = a->a;
1863: ii = a->i;
1865: for (i=0; i<m; i++) {
1866: jrow = ii[i];
1867: n = ii[i+1] - jrow;
1868: sum1 = 0.0;
1869: sum2 = 0.0;
1870: sum3 = 0.0;
1871: sum4 = 0.0;
1872: sum5 = 0.0;
1873: sum6 = 0.0;
1874: sum7 = 0.0;
1875: sum8 = 0.0;
1876: sum9 = 0.0;
1877: sum10 = 0.0;
1878: sum11 = 0.0;
1880: nonzerorow += (n>0);
1881: for (j=0; j<n; j++) {
1882: sum1 += v[jrow]*x[11*idx[jrow]];
1883: sum2 += v[jrow]*x[11*idx[jrow]+1];
1884: sum3 += v[jrow]*x[11*idx[jrow]+2];
1885: sum4 += v[jrow]*x[11*idx[jrow]+3];
1886: sum5 += v[jrow]*x[11*idx[jrow]+4];
1887: sum6 += v[jrow]*x[11*idx[jrow]+5];
1888: sum7 += v[jrow]*x[11*idx[jrow]+6];
1889: sum8 += v[jrow]*x[11*idx[jrow]+7];
1890: sum9 += v[jrow]*x[11*idx[jrow]+8];
1891: sum10 += v[jrow]*x[11*idx[jrow]+9];
1892: sum11 += v[jrow]*x[11*idx[jrow]+10];
1893: jrow++;
1894: }
1895: y[11*i] = sum1;
1896: y[11*i+1] = sum2;
1897: y[11*i+2] = sum3;
1898: y[11*i+3] = sum4;
1899: y[11*i+4] = sum5;
1900: y[11*i+5] = sum6;
1901: y[11*i+6] = sum7;
1902: y[11*i+7] = sum8;
1903: y[11*i+8] = sum9;
1904: y[11*i+9] = sum10;
1905: y[11*i+10] = sum11;
1906: }
1908: PetscLogFlops(22*a->nz - 11*nonzerorow);
1909: VecRestoreArrayRead(xx,&x);
1910: VecRestoreArray(yy,&y);
1911: return(0);
1912: }
1914: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1915: {
1916: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1917: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1918: const PetscScalar *x,*v;
1919: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1920: PetscErrorCode ierr;
1921: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1922: PetscInt n,i,jrow,j;
1925: if (yy != zz) {VecCopy(yy,zz);}
1926: VecGetArrayRead(xx,&x);
1927: VecGetArray(zz,&y);
1928: idx = a->j;
1929: v = a->a;
1930: ii = a->i;
1932: for (i=0; i<m; i++) {
1933: jrow = ii[i];
1934: n = ii[i+1] - jrow;
1935: sum1 = 0.0;
1936: sum2 = 0.0;
1937: sum3 = 0.0;
1938: sum4 = 0.0;
1939: sum5 = 0.0;
1940: sum6 = 0.0;
1941: sum7 = 0.0;
1942: sum8 = 0.0;
1943: sum9 = 0.0;
1944: sum10 = 0.0;
1945: sum11 = 0.0;
1946: for (j=0; j<n; j++) {
1947: sum1 += v[jrow]*x[11*idx[jrow]];
1948: sum2 += v[jrow]*x[11*idx[jrow]+1];
1949: sum3 += v[jrow]*x[11*idx[jrow]+2];
1950: sum4 += v[jrow]*x[11*idx[jrow]+3];
1951: sum5 += v[jrow]*x[11*idx[jrow]+4];
1952: sum6 += v[jrow]*x[11*idx[jrow]+5];
1953: sum7 += v[jrow]*x[11*idx[jrow]+6];
1954: sum8 += v[jrow]*x[11*idx[jrow]+7];
1955: sum9 += v[jrow]*x[11*idx[jrow]+8];
1956: sum10 += v[jrow]*x[11*idx[jrow]+9];
1957: sum11 += v[jrow]*x[11*idx[jrow]+10];
1958: jrow++;
1959: }
1960: y[11*i] += sum1;
1961: y[11*i+1] += sum2;
1962: y[11*i+2] += sum3;
1963: y[11*i+3] += sum4;
1964: y[11*i+4] += sum5;
1965: y[11*i+5] += sum6;
1966: y[11*i+6] += sum7;
1967: y[11*i+7] += sum8;
1968: y[11*i+8] += sum9;
1969: y[11*i+9] += sum10;
1970: y[11*i+10] += sum11;
1971: }
1973: PetscLogFlops(22*a->nz);
1974: VecRestoreArrayRead(xx,&x);
1975: VecRestoreArray(yy,&y);
1976: return(0);
1977: }
1979: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1980: {
1981: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1982: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1983: const PetscScalar *x,*v;
1984: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1985: PetscErrorCode ierr;
1986: const PetscInt m = b->AIJ->rmap->n,*idx;
1987: PetscInt n,i;
1990: VecSet(yy,0.0);
1991: VecGetArrayRead(xx,&x);
1992: VecGetArray(yy,&y);
1994: for (i=0; i<m; i++) {
1995: idx = a->j + a->i[i];
1996: v = a->a + a->i[i];
1997: n = a->i[i+1] - a->i[i];
1998: alpha1 = x[11*i];
1999: alpha2 = x[11*i+1];
2000: alpha3 = x[11*i+2];
2001: alpha4 = x[11*i+3];
2002: alpha5 = x[11*i+4];
2003: alpha6 = x[11*i+5];
2004: alpha7 = x[11*i+6];
2005: alpha8 = x[11*i+7];
2006: alpha9 = x[11*i+8];
2007: alpha10 = x[11*i+9];
2008: alpha11 = x[11*i+10];
2009: while (n-->0) {
2010: y[11*(*idx)] += alpha1*(*v);
2011: y[11*(*idx)+1] += alpha2*(*v);
2012: y[11*(*idx)+2] += alpha3*(*v);
2013: y[11*(*idx)+3] += alpha4*(*v);
2014: y[11*(*idx)+4] += alpha5*(*v);
2015: y[11*(*idx)+5] += alpha6*(*v);
2016: y[11*(*idx)+6] += alpha7*(*v);
2017: y[11*(*idx)+7] += alpha8*(*v);
2018: y[11*(*idx)+8] += alpha9*(*v);
2019: y[11*(*idx)+9] += alpha10*(*v);
2020: y[11*(*idx)+10] += alpha11*(*v);
2021: idx++; v++;
2022: }
2023: }
2024: PetscLogFlops(22*a->nz);
2025: VecRestoreArrayRead(xx,&x);
2026: VecRestoreArray(yy,&y);
2027: return(0);
2028: }
2030: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2031: {
2032: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2033: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2034: const PetscScalar *x,*v;
2035: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2036: PetscErrorCode ierr;
2037: const PetscInt m = b->AIJ->rmap->n,*idx;
2038: PetscInt n,i;
2041: if (yy != zz) {VecCopy(yy,zz);}
2042: VecGetArrayRead(xx,&x);
2043: VecGetArray(zz,&y);
2044: for (i=0; i<m; i++) {
2045: idx = a->j + a->i[i];
2046: v = a->a + a->i[i];
2047: n = a->i[i+1] - a->i[i];
2048: alpha1 = x[11*i];
2049: alpha2 = x[11*i+1];
2050: alpha3 = x[11*i+2];
2051: alpha4 = x[11*i+3];
2052: alpha5 = x[11*i+4];
2053: alpha6 = x[11*i+5];
2054: alpha7 = x[11*i+6];
2055: alpha8 = x[11*i+7];
2056: alpha9 = x[11*i+8];
2057: alpha10 = x[11*i+9];
2058: alpha11 = x[11*i+10];
2059: while (n-->0) {
2060: y[11*(*idx)] += alpha1*(*v);
2061: y[11*(*idx)+1] += alpha2*(*v);
2062: y[11*(*idx)+2] += alpha3*(*v);
2063: y[11*(*idx)+3] += alpha4*(*v);
2064: y[11*(*idx)+4] += alpha5*(*v);
2065: y[11*(*idx)+5] += alpha6*(*v);
2066: y[11*(*idx)+6] += alpha7*(*v);
2067: y[11*(*idx)+7] += alpha8*(*v);
2068: y[11*(*idx)+8] += alpha9*(*v);
2069: y[11*(*idx)+9] += alpha10*(*v);
2070: y[11*(*idx)+10] += alpha11*(*v);
2071: idx++; v++;
2072: }
2073: }
2074: PetscLogFlops(22*a->nz);
2075: VecRestoreArrayRead(xx,&x);
2076: VecRestoreArray(zz,&y);
2077: return(0);
2078: }
2081: /*--------------------------------------------------------------------------------------------*/
2082: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2083: {
2084: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2085: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2086: const PetscScalar *x,*v;
2087: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2088: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2089: PetscErrorCode ierr;
2090: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2091: PetscInt nonzerorow=0,n,i,jrow,j;
2094: VecGetArrayRead(xx,&x);
2095: VecGetArray(yy,&y);
2096: idx = a->j;
2097: v = a->a;
2098: ii = a->i;
2100: for (i=0; i<m; i++) {
2101: jrow = ii[i];
2102: n = ii[i+1] - jrow;
2103: sum1 = 0.0;
2104: sum2 = 0.0;
2105: sum3 = 0.0;
2106: sum4 = 0.0;
2107: sum5 = 0.0;
2108: sum6 = 0.0;
2109: sum7 = 0.0;
2110: sum8 = 0.0;
2111: sum9 = 0.0;
2112: sum10 = 0.0;
2113: sum11 = 0.0;
2114: sum12 = 0.0;
2115: sum13 = 0.0;
2116: sum14 = 0.0;
2117: sum15 = 0.0;
2118: sum16 = 0.0;
2120: nonzerorow += (n>0);
2121: for (j=0; j<n; j++) {
2122: sum1 += v[jrow]*x[16*idx[jrow]];
2123: sum2 += v[jrow]*x[16*idx[jrow]+1];
2124: sum3 += v[jrow]*x[16*idx[jrow]+2];
2125: sum4 += v[jrow]*x[16*idx[jrow]+3];
2126: sum5 += v[jrow]*x[16*idx[jrow]+4];
2127: sum6 += v[jrow]*x[16*idx[jrow]+5];
2128: sum7 += v[jrow]*x[16*idx[jrow]+6];
2129: sum8 += v[jrow]*x[16*idx[jrow]+7];
2130: sum9 += v[jrow]*x[16*idx[jrow]+8];
2131: sum10 += v[jrow]*x[16*idx[jrow]+9];
2132: sum11 += v[jrow]*x[16*idx[jrow]+10];
2133: sum12 += v[jrow]*x[16*idx[jrow]+11];
2134: sum13 += v[jrow]*x[16*idx[jrow]+12];
2135: sum14 += v[jrow]*x[16*idx[jrow]+13];
2136: sum15 += v[jrow]*x[16*idx[jrow]+14];
2137: sum16 += v[jrow]*x[16*idx[jrow]+15];
2138: jrow++;
2139: }
2140: y[16*i] = sum1;
2141: y[16*i+1] = sum2;
2142: y[16*i+2] = sum3;
2143: y[16*i+3] = sum4;
2144: y[16*i+4] = sum5;
2145: y[16*i+5] = sum6;
2146: y[16*i+6] = sum7;
2147: y[16*i+7] = sum8;
2148: y[16*i+8] = sum9;
2149: y[16*i+9] = sum10;
2150: y[16*i+10] = sum11;
2151: y[16*i+11] = sum12;
2152: y[16*i+12] = sum13;
2153: y[16*i+13] = sum14;
2154: y[16*i+14] = sum15;
2155: y[16*i+15] = sum16;
2156: }
2158: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2159: VecRestoreArrayRead(xx,&x);
2160: VecRestoreArray(yy,&y);
2161: return(0);
2162: }
2164: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2165: {
2166: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2167: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2168: const PetscScalar *x,*v;
2169: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2170: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2171: PetscErrorCode ierr;
2172: const PetscInt m = b->AIJ->rmap->n,*idx;
2173: PetscInt n,i;
2176: VecSet(yy,0.0);
2177: VecGetArrayRead(xx,&x);
2178: VecGetArray(yy,&y);
2180: for (i=0; i<m; i++) {
2181: idx = a->j + a->i[i];
2182: v = a->a + a->i[i];
2183: n = a->i[i+1] - a->i[i];
2184: alpha1 = x[16*i];
2185: alpha2 = x[16*i+1];
2186: alpha3 = x[16*i+2];
2187: alpha4 = x[16*i+3];
2188: alpha5 = x[16*i+4];
2189: alpha6 = x[16*i+5];
2190: alpha7 = x[16*i+6];
2191: alpha8 = x[16*i+7];
2192: alpha9 = x[16*i+8];
2193: alpha10 = x[16*i+9];
2194: alpha11 = x[16*i+10];
2195: alpha12 = x[16*i+11];
2196: alpha13 = x[16*i+12];
2197: alpha14 = x[16*i+13];
2198: alpha15 = x[16*i+14];
2199: alpha16 = x[16*i+15];
2200: while (n-->0) {
2201: y[16*(*idx)] += alpha1*(*v);
2202: y[16*(*idx)+1] += alpha2*(*v);
2203: y[16*(*idx)+2] += alpha3*(*v);
2204: y[16*(*idx)+3] += alpha4*(*v);
2205: y[16*(*idx)+4] += alpha5*(*v);
2206: y[16*(*idx)+5] += alpha6*(*v);
2207: y[16*(*idx)+6] += alpha7*(*v);
2208: y[16*(*idx)+7] += alpha8*(*v);
2209: y[16*(*idx)+8] += alpha9*(*v);
2210: y[16*(*idx)+9] += alpha10*(*v);
2211: y[16*(*idx)+10] += alpha11*(*v);
2212: y[16*(*idx)+11] += alpha12*(*v);
2213: y[16*(*idx)+12] += alpha13*(*v);
2214: y[16*(*idx)+13] += alpha14*(*v);
2215: y[16*(*idx)+14] += alpha15*(*v);
2216: y[16*(*idx)+15] += alpha16*(*v);
2217: idx++; v++;
2218: }
2219: }
2220: PetscLogFlops(32.0*a->nz);
2221: VecRestoreArrayRead(xx,&x);
2222: VecRestoreArray(yy,&y);
2223: return(0);
2224: }
2226: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2227: {
2228: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2229: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2230: const PetscScalar *x,*v;
2231: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2232: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2233: PetscErrorCode ierr;
2234: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2235: PetscInt n,i,jrow,j;
2238: if (yy != zz) {VecCopy(yy,zz);}
2239: VecGetArrayRead(xx,&x);
2240: VecGetArray(zz,&y);
2241: idx = a->j;
2242: v = a->a;
2243: ii = a->i;
2245: for (i=0; i<m; i++) {
2246: jrow = ii[i];
2247: n = ii[i+1] - jrow;
2248: sum1 = 0.0;
2249: sum2 = 0.0;
2250: sum3 = 0.0;
2251: sum4 = 0.0;
2252: sum5 = 0.0;
2253: sum6 = 0.0;
2254: sum7 = 0.0;
2255: sum8 = 0.0;
2256: sum9 = 0.0;
2257: sum10 = 0.0;
2258: sum11 = 0.0;
2259: sum12 = 0.0;
2260: sum13 = 0.0;
2261: sum14 = 0.0;
2262: sum15 = 0.0;
2263: sum16 = 0.0;
2264: for (j=0; j<n; j++) {
2265: sum1 += v[jrow]*x[16*idx[jrow]];
2266: sum2 += v[jrow]*x[16*idx[jrow]+1];
2267: sum3 += v[jrow]*x[16*idx[jrow]+2];
2268: sum4 += v[jrow]*x[16*idx[jrow]+3];
2269: sum5 += v[jrow]*x[16*idx[jrow]+4];
2270: sum6 += v[jrow]*x[16*idx[jrow]+5];
2271: sum7 += v[jrow]*x[16*idx[jrow]+6];
2272: sum8 += v[jrow]*x[16*idx[jrow]+7];
2273: sum9 += v[jrow]*x[16*idx[jrow]+8];
2274: sum10 += v[jrow]*x[16*idx[jrow]+9];
2275: sum11 += v[jrow]*x[16*idx[jrow]+10];
2276: sum12 += v[jrow]*x[16*idx[jrow]+11];
2277: sum13 += v[jrow]*x[16*idx[jrow]+12];
2278: sum14 += v[jrow]*x[16*idx[jrow]+13];
2279: sum15 += v[jrow]*x[16*idx[jrow]+14];
2280: sum16 += v[jrow]*x[16*idx[jrow]+15];
2281: jrow++;
2282: }
2283: y[16*i] += sum1;
2284: y[16*i+1] += sum2;
2285: y[16*i+2] += sum3;
2286: y[16*i+3] += sum4;
2287: y[16*i+4] += sum5;
2288: y[16*i+5] += sum6;
2289: y[16*i+6] += sum7;
2290: y[16*i+7] += sum8;
2291: y[16*i+8] += sum9;
2292: y[16*i+9] += sum10;
2293: y[16*i+10] += sum11;
2294: y[16*i+11] += sum12;
2295: y[16*i+12] += sum13;
2296: y[16*i+13] += sum14;
2297: y[16*i+14] += sum15;
2298: y[16*i+15] += sum16;
2299: }
2301: PetscLogFlops(32.0*a->nz);
2302: VecRestoreArrayRead(xx,&x);
2303: VecRestoreArray(zz,&y);
2304: return(0);
2305: }
2307: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2308: {
2309: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2311: const PetscScalar *x,*v;
2312: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2313: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2314: PetscErrorCode ierr;
2315: const PetscInt m = b->AIJ->rmap->n,*idx;
2316: PetscInt n,i;
2319: if (yy != zz) {VecCopy(yy,zz);}
2320: VecGetArrayRead(xx,&x);
2321: VecGetArray(zz,&y);
2322: for (i=0; i<m; i++) {
2323: idx = a->j + a->i[i];
2324: v = a->a + a->i[i];
2325: n = a->i[i+1] - a->i[i];
2326: alpha1 = x[16*i];
2327: alpha2 = x[16*i+1];
2328: alpha3 = x[16*i+2];
2329: alpha4 = x[16*i+3];
2330: alpha5 = x[16*i+4];
2331: alpha6 = x[16*i+5];
2332: alpha7 = x[16*i+6];
2333: alpha8 = x[16*i+7];
2334: alpha9 = x[16*i+8];
2335: alpha10 = x[16*i+9];
2336: alpha11 = x[16*i+10];
2337: alpha12 = x[16*i+11];
2338: alpha13 = x[16*i+12];
2339: alpha14 = x[16*i+13];
2340: alpha15 = x[16*i+14];
2341: alpha16 = x[16*i+15];
2342: while (n-->0) {
2343: y[16*(*idx)] += alpha1*(*v);
2344: y[16*(*idx)+1] += alpha2*(*v);
2345: y[16*(*idx)+2] += alpha3*(*v);
2346: y[16*(*idx)+3] += alpha4*(*v);
2347: y[16*(*idx)+4] += alpha5*(*v);
2348: y[16*(*idx)+5] += alpha6*(*v);
2349: y[16*(*idx)+6] += alpha7*(*v);
2350: y[16*(*idx)+7] += alpha8*(*v);
2351: y[16*(*idx)+8] += alpha9*(*v);
2352: y[16*(*idx)+9] += alpha10*(*v);
2353: y[16*(*idx)+10] += alpha11*(*v);
2354: y[16*(*idx)+11] += alpha12*(*v);
2355: y[16*(*idx)+12] += alpha13*(*v);
2356: y[16*(*idx)+13] += alpha14*(*v);
2357: y[16*(*idx)+14] += alpha15*(*v);
2358: y[16*(*idx)+15] += alpha16*(*v);
2359: idx++; v++;
2360: }
2361: }
2362: PetscLogFlops(32.0*a->nz);
2363: VecRestoreArrayRead(xx,&x);
2364: VecRestoreArray(zz,&y);
2365: return(0);
2366: }
2368: /*--------------------------------------------------------------------------------------------*/
2369: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2370: {
2371: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2372: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2373: const PetscScalar *x,*v;
2374: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2375: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2376: PetscErrorCode ierr;
2377: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2378: PetscInt nonzerorow=0,n,i,jrow,j;
2381: VecGetArrayRead(xx,&x);
2382: VecGetArray(yy,&y);
2383: idx = a->j;
2384: v = a->a;
2385: ii = a->i;
2387: for (i=0; i<m; i++) {
2388: jrow = ii[i];
2389: n = ii[i+1] - jrow;
2390: sum1 = 0.0;
2391: sum2 = 0.0;
2392: sum3 = 0.0;
2393: sum4 = 0.0;
2394: sum5 = 0.0;
2395: sum6 = 0.0;
2396: sum7 = 0.0;
2397: sum8 = 0.0;
2398: sum9 = 0.0;
2399: sum10 = 0.0;
2400: sum11 = 0.0;
2401: sum12 = 0.0;
2402: sum13 = 0.0;
2403: sum14 = 0.0;
2404: sum15 = 0.0;
2405: sum16 = 0.0;
2406: sum17 = 0.0;
2407: sum18 = 0.0;
2409: nonzerorow += (n>0);
2410: for (j=0; j<n; j++) {
2411: sum1 += v[jrow]*x[18*idx[jrow]];
2412: sum2 += v[jrow]*x[18*idx[jrow]+1];
2413: sum3 += v[jrow]*x[18*idx[jrow]+2];
2414: sum4 += v[jrow]*x[18*idx[jrow]+3];
2415: sum5 += v[jrow]*x[18*idx[jrow]+4];
2416: sum6 += v[jrow]*x[18*idx[jrow]+5];
2417: sum7 += v[jrow]*x[18*idx[jrow]+6];
2418: sum8 += v[jrow]*x[18*idx[jrow]+7];
2419: sum9 += v[jrow]*x[18*idx[jrow]+8];
2420: sum10 += v[jrow]*x[18*idx[jrow]+9];
2421: sum11 += v[jrow]*x[18*idx[jrow]+10];
2422: sum12 += v[jrow]*x[18*idx[jrow]+11];
2423: sum13 += v[jrow]*x[18*idx[jrow]+12];
2424: sum14 += v[jrow]*x[18*idx[jrow]+13];
2425: sum15 += v[jrow]*x[18*idx[jrow]+14];
2426: sum16 += v[jrow]*x[18*idx[jrow]+15];
2427: sum17 += v[jrow]*x[18*idx[jrow]+16];
2428: sum18 += v[jrow]*x[18*idx[jrow]+17];
2429: jrow++;
2430: }
2431: y[18*i] = sum1;
2432: y[18*i+1] = sum2;
2433: y[18*i+2] = sum3;
2434: y[18*i+3] = sum4;
2435: y[18*i+4] = sum5;
2436: y[18*i+5] = sum6;
2437: y[18*i+6] = sum7;
2438: y[18*i+7] = sum8;
2439: y[18*i+8] = sum9;
2440: y[18*i+9] = sum10;
2441: y[18*i+10] = sum11;
2442: y[18*i+11] = sum12;
2443: y[18*i+12] = sum13;
2444: y[18*i+13] = sum14;
2445: y[18*i+14] = sum15;
2446: y[18*i+15] = sum16;
2447: y[18*i+16] = sum17;
2448: y[18*i+17] = sum18;
2449: }
2451: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2452: VecRestoreArrayRead(xx,&x);
2453: VecRestoreArray(yy,&y);
2454: return(0);
2455: }
2457: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2458: {
2459: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2460: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2461: const PetscScalar *x,*v;
2462: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2463: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2464: PetscErrorCode ierr;
2465: const PetscInt m = b->AIJ->rmap->n,*idx;
2466: PetscInt n,i;
2469: VecSet(yy,0.0);
2470: VecGetArrayRead(xx,&x);
2471: VecGetArray(yy,&y);
2473: for (i=0; i<m; i++) {
2474: idx = a->j + a->i[i];
2475: v = a->a + a->i[i];
2476: n = a->i[i+1] - a->i[i];
2477: alpha1 = x[18*i];
2478: alpha2 = x[18*i+1];
2479: alpha3 = x[18*i+2];
2480: alpha4 = x[18*i+3];
2481: alpha5 = x[18*i+4];
2482: alpha6 = x[18*i+5];
2483: alpha7 = x[18*i+6];
2484: alpha8 = x[18*i+7];
2485: alpha9 = x[18*i+8];
2486: alpha10 = x[18*i+9];
2487: alpha11 = x[18*i+10];
2488: alpha12 = x[18*i+11];
2489: alpha13 = x[18*i+12];
2490: alpha14 = x[18*i+13];
2491: alpha15 = x[18*i+14];
2492: alpha16 = x[18*i+15];
2493: alpha17 = x[18*i+16];
2494: alpha18 = x[18*i+17];
2495: while (n-->0) {
2496: y[18*(*idx)] += alpha1*(*v);
2497: y[18*(*idx)+1] += alpha2*(*v);
2498: y[18*(*idx)+2] += alpha3*(*v);
2499: y[18*(*idx)+3] += alpha4*(*v);
2500: y[18*(*idx)+4] += alpha5*(*v);
2501: y[18*(*idx)+5] += alpha6*(*v);
2502: y[18*(*idx)+6] += alpha7*(*v);
2503: y[18*(*idx)+7] += alpha8*(*v);
2504: y[18*(*idx)+8] += alpha9*(*v);
2505: y[18*(*idx)+9] += alpha10*(*v);
2506: y[18*(*idx)+10] += alpha11*(*v);
2507: y[18*(*idx)+11] += alpha12*(*v);
2508: y[18*(*idx)+12] += alpha13*(*v);
2509: y[18*(*idx)+13] += alpha14*(*v);
2510: y[18*(*idx)+14] += alpha15*(*v);
2511: y[18*(*idx)+15] += alpha16*(*v);
2512: y[18*(*idx)+16] += alpha17*(*v);
2513: y[18*(*idx)+17] += alpha18*(*v);
2514: idx++; v++;
2515: }
2516: }
2517: PetscLogFlops(36.0*a->nz);
2518: VecRestoreArrayRead(xx,&x);
2519: VecRestoreArray(yy,&y);
2520: return(0);
2521: }
2523: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2524: {
2525: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2526: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2527: const PetscScalar *x,*v;
2528: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2529: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2530: PetscErrorCode ierr;
2531: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2532: PetscInt n,i,jrow,j;
2535: if (yy != zz) {VecCopy(yy,zz);}
2536: VecGetArrayRead(xx,&x);
2537: VecGetArray(zz,&y);
2538: idx = a->j;
2539: v = a->a;
2540: ii = a->i;
2542: for (i=0; i<m; i++) {
2543: jrow = ii[i];
2544: n = ii[i+1] - jrow;
2545: sum1 = 0.0;
2546: sum2 = 0.0;
2547: sum3 = 0.0;
2548: sum4 = 0.0;
2549: sum5 = 0.0;
2550: sum6 = 0.0;
2551: sum7 = 0.0;
2552: sum8 = 0.0;
2553: sum9 = 0.0;
2554: sum10 = 0.0;
2555: sum11 = 0.0;
2556: sum12 = 0.0;
2557: sum13 = 0.0;
2558: sum14 = 0.0;
2559: sum15 = 0.0;
2560: sum16 = 0.0;
2561: sum17 = 0.0;
2562: sum18 = 0.0;
2563: for (j=0; j<n; j++) {
2564: sum1 += v[jrow]*x[18*idx[jrow]];
2565: sum2 += v[jrow]*x[18*idx[jrow]+1];
2566: sum3 += v[jrow]*x[18*idx[jrow]+2];
2567: sum4 += v[jrow]*x[18*idx[jrow]+3];
2568: sum5 += v[jrow]*x[18*idx[jrow]+4];
2569: sum6 += v[jrow]*x[18*idx[jrow]+5];
2570: sum7 += v[jrow]*x[18*idx[jrow]+6];
2571: sum8 += v[jrow]*x[18*idx[jrow]+7];
2572: sum9 += v[jrow]*x[18*idx[jrow]+8];
2573: sum10 += v[jrow]*x[18*idx[jrow]+9];
2574: sum11 += v[jrow]*x[18*idx[jrow]+10];
2575: sum12 += v[jrow]*x[18*idx[jrow]+11];
2576: sum13 += v[jrow]*x[18*idx[jrow]+12];
2577: sum14 += v[jrow]*x[18*idx[jrow]+13];
2578: sum15 += v[jrow]*x[18*idx[jrow]+14];
2579: sum16 += v[jrow]*x[18*idx[jrow]+15];
2580: sum17 += v[jrow]*x[18*idx[jrow]+16];
2581: sum18 += v[jrow]*x[18*idx[jrow]+17];
2582: jrow++;
2583: }
2584: y[18*i] += sum1;
2585: y[18*i+1] += sum2;
2586: y[18*i+2] += sum3;
2587: y[18*i+3] += sum4;
2588: y[18*i+4] += sum5;
2589: y[18*i+5] += sum6;
2590: y[18*i+6] += sum7;
2591: y[18*i+7] += sum8;
2592: y[18*i+8] += sum9;
2593: y[18*i+9] += sum10;
2594: y[18*i+10] += sum11;
2595: y[18*i+11] += sum12;
2596: y[18*i+12] += sum13;
2597: y[18*i+13] += sum14;
2598: y[18*i+14] += sum15;
2599: y[18*i+15] += sum16;
2600: y[18*i+16] += sum17;
2601: y[18*i+17] += sum18;
2602: }
2604: PetscLogFlops(36.0*a->nz);
2605: VecRestoreArrayRead(xx,&x);
2606: VecRestoreArray(zz,&y);
2607: return(0);
2608: }
2610: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2611: {
2612: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2613: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2614: const PetscScalar *x,*v;
2615: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2616: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2617: PetscErrorCode ierr;
2618: const PetscInt m = b->AIJ->rmap->n,*idx;
2619: PetscInt n,i;
2622: if (yy != zz) {VecCopy(yy,zz);}
2623: VecGetArrayRead(xx,&x);
2624: VecGetArray(zz,&y);
2625: for (i=0; i<m; i++) {
2626: idx = a->j + a->i[i];
2627: v = a->a + a->i[i];
2628: n = a->i[i+1] - a->i[i];
2629: alpha1 = x[18*i];
2630: alpha2 = x[18*i+1];
2631: alpha3 = x[18*i+2];
2632: alpha4 = x[18*i+3];
2633: alpha5 = x[18*i+4];
2634: alpha6 = x[18*i+5];
2635: alpha7 = x[18*i+6];
2636: alpha8 = x[18*i+7];
2637: alpha9 = x[18*i+8];
2638: alpha10 = x[18*i+9];
2639: alpha11 = x[18*i+10];
2640: alpha12 = x[18*i+11];
2641: alpha13 = x[18*i+12];
2642: alpha14 = x[18*i+13];
2643: alpha15 = x[18*i+14];
2644: alpha16 = x[18*i+15];
2645: alpha17 = x[18*i+16];
2646: alpha18 = x[18*i+17];
2647: while (n-->0) {
2648: y[18*(*idx)] += alpha1*(*v);
2649: y[18*(*idx)+1] += alpha2*(*v);
2650: y[18*(*idx)+2] += alpha3*(*v);
2651: y[18*(*idx)+3] += alpha4*(*v);
2652: y[18*(*idx)+4] += alpha5*(*v);
2653: y[18*(*idx)+5] += alpha6*(*v);
2654: y[18*(*idx)+6] += alpha7*(*v);
2655: y[18*(*idx)+7] += alpha8*(*v);
2656: y[18*(*idx)+8] += alpha9*(*v);
2657: y[18*(*idx)+9] += alpha10*(*v);
2658: y[18*(*idx)+10] += alpha11*(*v);
2659: y[18*(*idx)+11] += alpha12*(*v);
2660: y[18*(*idx)+12] += alpha13*(*v);
2661: y[18*(*idx)+13] += alpha14*(*v);
2662: y[18*(*idx)+14] += alpha15*(*v);
2663: y[18*(*idx)+15] += alpha16*(*v);
2664: y[18*(*idx)+16] += alpha17*(*v);
2665: y[18*(*idx)+17] += alpha18*(*v);
2666: idx++; v++;
2667: }
2668: }
2669: PetscLogFlops(36.0*a->nz);
2670: VecRestoreArrayRead(xx,&x);
2671: VecRestoreArray(zz,&y);
2672: return(0);
2673: }
2675: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2676: {
2677: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2678: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2679: const PetscScalar *x,*v;
2680: PetscScalar *y,*sums;
2681: PetscErrorCode ierr;
2682: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2683: PetscInt n,i,jrow,j,dof = b->dof,k;
2686: VecGetArrayRead(xx,&x);
2687: VecSet(yy,0.0);
2688: VecGetArray(yy,&y);
2689: idx = a->j;
2690: v = a->a;
2691: ii = a->i;
2693: for (i=0; i<m; i++) {
2694: jrow = ii[i];
2695: n = ii[i+1] - jrow;
2696: sums = y + dof*i;
2697: for (j=0; j<n; j++) {
2698: for (k=0; k<dof; k++) {
2699: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2700: }
2701: jrow++;
2702: }
2703: }
2705: PetscLogFlops(2.0*dof*a->nz);
2706: VecRestoreArrayRead(xx,&x);
2707: VecRestoreArray(yy,&y);
2708: return(0);
2709: }
2711: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2712: {
2713: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2714: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2715: const PetscScalar *x,*v;
2716: PetscScalar *y,*sums;
2717: PetscErrorCode ierr;
2718: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2719: PetscInt n,i,jrow,j,dof = b->dof,k;
2722: if (yy != zz) {VecCopy(yy,zz);}
2723: VecGetArrayRead(xx,&x);
2724: VecGetArray(zz,&y);
2725: idx = a->j;
2726: v = a->a;
2727: ii = a->i;
2729: for (i=0; i<m; i++) {
2730: jrow = ii[i];
2731: n = ii[i+1] - jrow;
2732: sums = y + dof*i;
2733: for (j=0; j<n; j++) {
2734: for (k=0; k<dof; k++) {
2735: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2736: }
2737: jrow++;
2738: }
2739: }
2741: PetscLogFlops(2.0*dof*a->nz);
2742: VecRestoreArrayRead(xx,&x);
2743: VecRestoreArray(zz,&y);
2744: return(0);
2745: }
2747: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2748: {
2749: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2750: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2751: const PetscScalar *x,*v,*alpha;
2752: PetscScalar *y;
2753: PetscErrorCode ierr;
2754: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2755: PetscInt n,i,k;
2758: VecGetArrayRead(xx,&x);
2759: VecSet(yy,0.0);
2760: VecGetArray(yy,&y);
2761: for (i=0; i<m; i++) {
2762: idx = a->j + a->i[i];
2763: v = a->a + a->i[i];
2764: n = a->i[i+1] - a->i[i];
2765: alpha = x + dof*i;
2766: while (n-->0) {
2767: for (k=0; k<dof; k++) {
2768: y[dof*(*idx)+k] += alpha[k]*(*v);
2769: }
2770: idx++; v++;
2771: }
2772: }
2773: PetscLogFlops(2.0*dof*a->nz);
2774: VecRestoreArrayRead(xx,&x);
2775: VecRestoreArray(yy,&y);
2776: return(0);
2777: }
2779: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2780: {
2781: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2782: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2783: const PetscScalar *x,*v,*alpha;
2784: PetscScalar *y;
2785: PetscErrorCode ierr;
2786: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2787: PetscInt n,i,k;
2790: if (yy != zz) {VecCopy(yy,zz);}
2791: VecGetArrayRead(xx,&x);
2792: VecGetArray(zz,&y);
2793: for (i=0; i<m; i++) {
2794: idx = a->j + a->i[i];
2795: v = a->a + a->i[i];
2796: n = a->i[i+1] - a->i[i];
2797: alpha = x + dof*i;
2798: while (n-->0) {
2799: for (k=0; k<dof; k++) {
2800: y[dof*(*idx)+k] += alpha[k]*(*v);
2801: }
2802: idx++; v++;
2803: }
2804: }
2805: PetscLogFlops(2.0*dof*a->nz);
2806: VecRestoreArrayRead(xx,&x);
2807: VecRestoreArray(zz,&y);
2808: return(0);
2809: }
2811: /*===================================================================================*/
2812: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2813: {
2814: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2818: /* start the scatter */
2819: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2820: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2821: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2822: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2823: return(0);
2824: }
2826: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2827: {
2828: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2832: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2833: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2834: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2835: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2836: return(0);
2837: }
2839: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2840: {
2841: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2845: /* start the scatter */
2846: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2847: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2848: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2849: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2850: return(0);
2851: }
2853: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2854: {
2855: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2859: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2860: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2861: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2862: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2863: return(0);
2864: }
2866: /* ----------------------------------------------------------------*/
2867: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2868: {
2869: PetscErrorCode ierr;
2870: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2871: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2872: Mat P =pp->AIJ;
2873: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2874: PetscInt *pti,*ptj,*ptJ;
2875: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2876: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2877: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2878: MatScalar *ca;
2879: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2882: /* Get ij structure of P^T */
2883: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2885: cn = pn*ppdof;
2886: /* Allocate ci array, arrays for fill computation and */
2887: /* free space for accumulating nonzero column info */
2888: PetscMalloc1(cn+1,&ci);
2889: ci[0] = 0;
2891: /* Work arrays for rows of P^T*A */
2892: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2893: PetscArrayzero(ptadenserow,an);
2894: PetscArrayzero(denserow,cn);
2896: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2897: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2898: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2899: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2900: current_space = free_space;
2902: /* Determine symbolic info for each row of C: */
2903: for (i=0; i<pn; i++) {
2904: ptnzi = pti[i+1] - pti[i];
2905: ptJ = ptj + pti[i];
2906: for (dof=0; dof<ppdof; dof++) {
2907: ptanzi = 0;
2908: /* Determine symbolic row of PtA: */
2909: for (j=0; j<ptnzi; j++) {
2910: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2911: arow = ptJ[j]*ppdof + dof;
2912: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2913: anzj = ai[arow+1] - ai[arow];
2914: ajj = aj + ai[arow];
2915: for (k=0; k<anzj; k++) {
2916: if (!ptadenserow[ajj[k]]) {
2917: ptadenserow[ajj[k]] = -1;
2918: ptasparserow[ptanzi++] = ajj[k];
2919: }
2920: }
2921: }
2922: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2923: ptaj = ptasparserow;
2924: cnzi = 0;
2925: for (j=0; j<ptanzi; j++) {
2926: /* Get offset within block of P */
2927: pshift = *ptaj%ppdof;
2928: /* Get block row of P */
2929: prow = (*ptaj++)/ppdof; /* integer division */
2930: /* P has same number of nonzeros per row as the compressed form */
2931: pnzj = pi[prow+1] - pi[prow];
2932: pjj = pj + pi[prow];
2933: for (k=0;k<pnzj;k++) {
2934: /* Locations in C are shifted by the offset within the block */
2935: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2936: if (!denserow[pjj[k]*ppdof+pshift]) {
2937: denserow[pjj[k]*ppdof+pshift] = -1;
2938: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2939: }
2940: }
2941: }
2943: /* sort sparserow */
2944: PetscSortInt(cnzi,sparserow);
2946: /* If free space is not available, make more free space */
2947: /* Double the amount of total space in the list */
2948: if (current_space->local_remaining<cnzi) {
2949: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
2950: }
2952: /* Copy data into free space, and zero out denserows */
2953: PetscArraycpy(current_space->array,sparserow,cnzi);
2955: current_space->array += cnzi;
2956: current_space->local_used += cnzi;
2957: current_space->local_remaining -= cnzi;
2959: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
2960: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
2962: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2963: /* For now, we will recompute what is needed. */
2964: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2965: }
2966: }
2967: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2968: /* Allocate space for cj, initialize cj, and */
2969: /* destroy list of free space and other temporary array(s) */
2970: PetscMalloc1(ci[cn]+1,&cj);
2971: PetscFreeSpaceContiguous(&free_space,cj);
2972: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
2974: /* Allocate space for ca */
2975: PetscCalloc1(ci[cn]+1,&ca);
2977: /* put together the new matrix */
2978: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,C);
2979: MatSetBlockSize(*C,pp->dof);
2981: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2982: /* Since these are PETSc arrays, change flags to free them as necessary. */
2983: c = (Mat_SeqAIJ*)((*C)->data);
2984: c->free_a = PETSC_TRUE;
2985: c->free_ij = PETSC_TRUE;
2986: c->nonew = 0;
2988: (*C)->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2990: /* Clean up. */
2991: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2992: return(0);
2993: }
2995: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2996: {
2997: /* This routine requires testing -- first draft only */
2998: PetscErrorCode ierr;
2999: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3000: Mat P =pp->AIJ;
3001: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
3002: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3003: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3004: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3005: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3006: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3007: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3008: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3009: MatScalar *ca=c->a,*caj,*apa;
3012: /* Allocate temporary array for storage of one row of A*P */
3013: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3015: /* Clear old values in C */
3016: PetscArrayzero(ca,ci[cm]);
3018: for (i=0; i<am; i++) {
3019: /* Form sparse row of A*P */
3020: anzi = ai[i+1] - ai[i];
3021: apnzj = 0;
3022: for (j=0; j<anzi; j++) {
3023: /* Get offset within block of P */
3024: pshift = *aj%ppdof;
3025: /* Get block row of P */
3026: prow = *aj++/ppdof; /* integer division */
3027: pnzj = pi[prow+1] - pi[prow];
3028: pjj = pj + pi[prow];
3029: paj = pa + pi[prow];
3030: for (k=0; k<pnzj; k++) {
3031: poffset = pjj[k]*ppdof+pshift;
3032: if (!apjdense[poffset]) {
3033: apjdense[poffset] = -1;
3034: apj[apnzj++] = poffset;
3035: }
3036: apa[poffset] += (*aa)*paj[k];
3037: }
3038: PetscLogFlops(2.0*pnzj);
3039: aa++;
3040: }
3042: /* Sort the j index array for quick sparse axpy. */
3043: /* Note: a array does not need sorting as it is in dense storage locations. */
3044: PetscSortInt(apnzj,apj);
3046: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3047: prow = i/ppdof; /* integer division */
3048: pshift = i%ppdof;
3049: poffset = pi[prow];
3050: pnzi = pi[prow+1] - poffset;
3051: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3052: pJ = pj+poffset;
3053: pA = pa+poffset;
3054: for (j=0; j<pnzi; j++) {
3055: crow = (*pJ)*ppdof+pshift;
3056: cjj = cj + ci[crow];
3057: caj = ca + ci[crow];
3058: pJ++;
3059: /* Perform sparse axpy operation. Note cjj includes apj. */
3060: for (k=0,nextap=0; nextap<apnzj; k++) {
3061: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3062: }
3063: PetscLogFlops(2.0*apnzj);
3064: pA++;
3065: }
3067: /* Zero the current row info for A*P */
3068: for (j=0; j<apnzj; j++) {
3069: apa[apj[j]] = 0.;
3070: apjdense[apj[j]] = 0;
3071: }
3072: }
3074: /* Assemble the final matrix and clean up */
3075: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3076: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3077: PetscFree3(apa,apj,apjdense);
3078: return(0);
3079: }
3081: PETSC_INTERN PetscErrorCode MatPtAP_SeqAIJ_SeqMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3082: {
3086: if (scall == MAT_INITIAL_MATRIX) {
3087: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3088: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,fill,C);
3089: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3090: }
3091: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3092: MatPtAPNumeric_SeqAIJ_SeqMAIJ(A,P,*C);
3093: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3094: return(0);
3095: }
3097: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3098: {
3100: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3101: return(0);
3102: }
3104: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3105: {
3107: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3108: return(0);
3109: }
3111: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3113: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3114: {
3118: if (scall == MAT_INITIAL_MATRIX) {
3119: PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ");
3120: MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
3121: }
3122: MatPtAP_MPIAIJ_MPIAIJ(A,P,scall,fill,C);
3123: return(0);
3124: }
3126: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3128: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3129: {
3130: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3131: PetscErrorCode ierr;
3135: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3136: return(0);
3137: }
3139: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat*);
3141: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat *C)
3142: {
3143: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3144: PetscErrorCode ierr;
3148: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3149: (*C)->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3150: return(0);
3151: }
3153: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3154: {
3155: PetscErrorCode ierr;
3156: Mat_MPIAIJ *c;
3157: Mat_APMPI *ap;
3159: if (scall == MAT_INITIAL_MATRIX) {
3160: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3161: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,fill,C);
3162: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3163: }
3165: c = (Mat_MPIAIJ*)(*C)->data;
3166: ap = c->ap;
3167: PetscOptionsBegin(PetscObjectComm((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures","Mat");
3168: ap->freestruct = PETSC_FALSE;
3169: PetscOptionsBool("-mat_freeintermediatedatastructures","Free intermediate data structures", "MatFreeIntermediateDataStructures",ap->freestruct,&ap->freestruct, NULL);
3170: PetscOptionsEnd();
3172: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3173: (*(*C)->ops->ptapnumeric)(A,P,*C);
3174: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3175: return(0);
3176: }
3178: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3180: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3181: {
3182: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3183: PetscErrorCode ierr;
3187: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3188: return(0);
3189: }
3191: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat*);
3193: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat *C)
3194: {
3195: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3196: PetscErrorCode ierr;
3200: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3201: (*C)->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3202: return(0);
3203: }
3205: PETSC_INTERN PetscErrorCode MatPtAP_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3206: {
3208: Mat_MPIAIJ *c;
3209: Mat_APMPI *ap;
3212: if (scall == MAT_INITIAL_MATRIX) {
3213: PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);
3214: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,fill,C);
3215: PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);
3216: }
3218: c = (Mat_MPIAIJ*)(*C)->data;
3219: ap = c->ap;
3220: PetscOptionsBegin(PetscObjectComm((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures","Mat");
3221: ap->freestruct = PETSC_FALSE;
3222: PetscOptionsBool("-mat_freeintermediatedatastructures","Free intermediate data structures", "MatFreeIntermediateDataStructures",ap->freestruct,&ap->freestruct, NULL);
3223: PetscOptionsEnd();
3225: PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);
3226: (*(*C)->ops->ptapnumeric)(A,P,*C);
3227: PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);
3228: return(0);
3229: }
3231: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3232: {
3233: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3234: Mat a = b->AIJ,B;
3235: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3237: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3238: PetscInt *cols;
3239: PetscScalar *vals;
3242: MatGetSize(a,&m,&n);
3243: PetscMalloc1(dof*m,&ilen);
3244: for (i=0; i<m; i++) {
3245: nmax = PetscMax(nmax,aij->ilen[i]);
3246: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3247: }
3248: MatCreate(PETSC_COMM_SELF,&B);
3249: MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3250: MatSetType(B,newtype);
3251: MatSeqAIJSetPreallocation(B,0,ilen);
3252: PetscFree(ilen);
3253: PetscMalloc1(nmax,&icols);
3254: ii = 0;
3255: for (i=0; i<m; i++) {
3256: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3257: for (j=0; j<dof; j++) {
3258: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3259: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3260: ii++;
3261: }
3262: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3263: }
3264: PetscFree(icols);
3265: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3266: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3268: if (reuse == MAT_INPLACE_MATRIX) {
3269: MatHeaderReplace(A,&B);
3270: } else {
3271: *newmat = B;
3272: }
3273: return(0);
3274: }
3276: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3278: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3279: {
3280: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3281: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3282: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3283: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3284: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3285: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3286: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3287: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3288: PetscInt rstart,cstart,*garray,ii,k;
3290: PetscScalar *vals,*ovals;
3293: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3294: for (i=0; i<A->rmap->n/dof; i++) {
3295: nmax = PetscMax(nmax,AIJ->ilen[i]);
3296: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3297: for (j=0; j<dof; j++) {
3298: dnz[dof*i+j] = AIJ->ilen[i];
3299: onz[dof*i+j] = OAIJ->ilen[i];
3300: }
3301: }
3302: MatCreate(PetscObjectComm((PetscObject)A),&B);
3303: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3304: MatSetType(B,newtype);
3305: MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3306: MatSetBlockSize(B,dof);
3307: PetscFree2(dnz,onz);
3309: PetscMalloc2(nmax,&icols,onmax,&oicols);
3310: rstart = dof*maij->A->rmap->rstart;
3311: cstart = dof*maij->A->cmap->rstart;
3312: garray = mpiaij->garray;
3314: ii = rstart;
3315: for (i=0; i<A->rmap->n/dof; i++) {
3316: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3317: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3318: for (j=0; j<dof; j++) {
3319: for (k=0; k<ncols; k++) {
3320: icols[k] = cstart + dof*cols[k]+j;
3321: }
3322: for (k=0; k<oncols; k++) {
3323: oicols[k] = dof*garray[ocols[k]]+j;
3324: }
3325: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3326: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3327: ii++;
3328: }
3329: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3330: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3331: }
3332: PetscFree2(icols,oicols);
3334: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3335: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3337: if (reuse == MAT_INPLACE_MATRIX) {
3338: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3339: ((PetscObject)A)->refct = 1;
3341: MatHeaderReplace(A,&B);
3343: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3344: } else {
3345: *newmat = B;
3346: }
3347: return(0);
3348: }
3350: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3351: {
3353: Mat A;
3356: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3357: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3358: MatDestroy(&A);
3359: return(0);
3360: }
3362: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3363: {
3365: Mat A;
3368: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3369: MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3370: MatDestroy(&A);
3371: return(0);
3372: }
3374: PetscErrorCode MatSetFromOptions_MAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
3375: {
3376: PetscErrorCode ierr;
3377: /* By default, we convert MAIJ to MPIAIJ and then do PtAP.
3378: * If we convert it to MPIAIJ, it is better to use MPIAIJ at the first place.
3379: * However, not sure other PETSc developers want to change it or not. So I
3380: * still leave it as is.
3381: */
3382: const char *algTypes[3] = {"mpiaij","allatonce","allatonce_merged"};
3383: PetscInt nalg=3,alg=0;
3384: PetscMPIInt size;
3387: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3388: if (size <= 1) return(0);
3389: PetscOptionsHead(PetscOptionsObject,"MPIMAIJ options");
3390: PetscOptionsEList("-matmaijptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,NULL);
3391: switch (alg) {
3392: case 0: /* Do not do anything */
3393: break;
3394: case 1:
3395: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ_allatonce);
3396: break;
3397: case 2:
3398: PetscObjectComposeFunction((PetscObject)A,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ_allatonce_merged);
3399: break;
3400: }
3401: PetscOptionsTail();
3402: return(0);
3403: }
3405: PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
3408: /* ---------------------------------------------------------------------------------- */
3409: /*@
3410: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3411: operations for multicomponent problems. It interpolates each component the same
3412: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3413: and MATMPIAIJ for distributed matrices.
3415: Collective
3417: Input Parameters:
3418: + A - the AIJ matrix describing the action on blocks
3419: - dof - the block size (number of components per node)
3421: Output Parameter:
3422: . maij - the new MAIJ matrix
3424: Operations provided:
3425: + MatMult
3426: . MatMultTranspose
3427: . MatMultAdd
3428: . MatMultTransposeAdd
3429: - MatView
3431: Level: advanced
3433: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3434: @*/
3435: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3436: {
3438: PetscMPIInt size;
3439: PetscInt n;
3440: Mat B;
3441: #if defined(PETSC_HAVE_CUDA)
3442: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3443: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3444: #endif
3447: dof = PetscAbs(dof);
3448: PetscObjectReference((PetscObject)A);
3450: if (dof == 1) *maij = A;
3451: else {
3452: MatCreate(PetscObjectComm((PetscObject)A),&B);
3453: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3454: PetscLayoutSetBlockSize(B->rmap,dof);
3455: PetscLayoutSetBlockSize(B->cmap,dof);
3456: PetscLayoutSetUp(B->rmap);
3457: PetscLayoutSetUp(B->cmap);
3459: B->assembled = PETSC_TRUE;
3461: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3462: if (size == 1) {
3463: Mat_SeqMAIJ *b;
3465: MatSetType(B,MATSEQMAIJ);
3467: B->ops->setup = NULL;
3468: B->ops->destroy = MatDestroy_SeqMAIJ;
3469: B->ops->view = MatView_SeqMAIJ;
3470: b = (Mat_SeqMAIJ*)B->data;
3471: b->dof = dof;
3472: b->AIJ = A;
3474: if (dof == 2) {
3475: B->ops->mult = MatMult_SeqMAIJ_2;
3476: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3477: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3478: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3479: } else if (dof == 3) {
3480: B->ops->mult = MatMult_SeqMAIJ_3;
3481: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3482: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3483: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3484: } else if (dof == 4) {
3485: B->ops->mult = MatMult_SeqMAIJ_4;
3486: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3487: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3488: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3489: } else if (dof == 5) {
3490: B->ops->mult = MatMult_SeqMAIJ_5;
3491: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3492: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3493: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3494: } else if (dof == 6) {
3495: B->ops->mult = MatMult_SeqMAIJ_6;
3496: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3497: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3498: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3499: } else if (dof == 7) {
3500: B->ops->mult = MatMult_SeqMAIJ_7;
3501: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3502: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3503: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3504: } else if (dof == 8) {
3505: B->ops->mult = MatMult_SeqMAIJ_8;
3506: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3507: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3508: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3509: } else if (dof == 9) {
3510: B->ops->mult = MatMult_SeqMAIJ_9;
3511: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3512: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3513: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3514: } else if (dof == 10) {
3515: B->ops->mult = MatMult_SeqMAIJ_10;
3516: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3517: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3518: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3519: } else if (dof == 11) {
3520: B->ops->mult = MatMult_SeqMAIJ_11;
3521: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3522: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3523: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3524: } else if (dof == 16) {
3525: B->ops->mult = MatMult_SeqMAIJ_16;
3526: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3527: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3528: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3529: } else if (dof == 18) {
3530: B->ops->mult = MatMult_SeqMAIJ_18;
3531: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3532: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3533: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3534: } else {
3535: B->ops->mult = MatMult_SeqMAIJ_N;
3536: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3537: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3538: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3539: }
3540: #if defined(PETSC_HAVE_CUDA)
3541: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3542: #endif
3543: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3544: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaij_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3545: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijperm_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3546: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_seqaijmkl_seqmaij_C",MatPtAP_SeqAIJ_SeqMAIJ);
3547: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqmaij_C",MatPtAP_IS_XAIJ);
3548: } else {
3549: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3550: Mat_MPIMAIJ *b;
3551: IS from,to;
3552: Vec gvec;
3554: MatSetType(B,MATMPIMAIJ);
3556: B->ops->setup = NULL;
3557: B->ops->destroy = MatDestroy_MPIMAIJ;
3558: B->ops->view = MatView_MPIMAIJ;
3560: b = (Mat_MPIMAIJ*)B->data;
3561: b->dof = dof;
3562: b->A = A;
3564: MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3565: MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);
3567: VecGetSize(mpiaij->lvec,&n);
3568: VecCreate(PETSC_COMM_SELF,&b->w);
3569: VecSetSizes(b->w,n*dof,n*dof);
3570: VecSetBlockSize(b->w,dof);
3571: VecSetType(b->w,VECSEQ);
3573: /* create two temporary Index sets for build scatter gather */
3574: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3575: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3577: /* create temporary global vector to generate scatter context */
3578: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3580: /* generate the scatter context */
3581: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3583: ISDestroy(&from);
3584: ISDestroy(&to);
3585: VecDestroy(&gvec);
3587: B->ops->mult = MatMult_MPIMAIJ_dof;
3588: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3589: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3590: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3592: #if defined(PETSC_HAVE_CUDA)
3593: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3594: #endif
3595: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3596: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_mpiaij_mpimaij_C",MatPtAP_MPIAIJ_MPIMAIJ);
3597: PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpimaij_C",MatPtAP_IS_XAIJ);
3598: }
3599: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3600: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3601: B->ops->setfromoptions = MatSetFromOptions_MAIJ;
3602: MatSetFromOptions(B);
3603: MatSetUp(B);
3604: #if defined(PETSC_HAVE_CUDA)
3605: /* temporary until we have CUDA implementation of MAIJ */
3606: {
3607: PetscBool flg;
3608: if (convert) {
3609: PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3610: if (flg) {
3611: MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3612: }
3613: }
3614: }
3615: #endif
3616: *maij = B;
3617: MatViewFromOptions(B,NULL,"-mat_view");
3618: }
3620: return(0);
3621: }