Actual source code: maij.c
petsc-3.14.6 2021-03-30
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h>
20: #include <../src/mat/utils/freespace.h>
22: /*@
23: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
25: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
27: Input Parameter:
28: . A - the MAIJ matrix
30: Output Parameter:
31: . B - the AIJ matrix
33: Level: advanced
35: Notes:
36: The reference count on the AIJ matrix is not increased so you should not destroy it.
38: .seealso: MatCreateMAIJ()
39: @*/
40: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
41: {
43: PetscBool ismpimaij,isseqmaij;
46: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
47: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
48: if (ismpimaij) {
49: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
51: *B = b->A;
52: } else if (isseqmaij) {
53: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
55: *B = b->AIJ;
56: } else {
57: *B = A;
58: }
59: return(0);
60: }
62: /*@
63: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
65: Logically Collective
67: Input Parameter:
68: + A - the MAIJ matrix
69: - dof - the block size for the new matrix
71: Output Parameter:
72: . B - the new MAIJ matrix
74: Level: advanced
76: .seealso: MatCreateMAIJ()
77: @*/
78: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79: {
81: Mat Aij = NULL;
85: MatMAIJGetAIJ(A,&Aij);
86: MatCreateMAIJ(Aij,dof,B);
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91: {
93: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
96: MatDestroy(&b->AIJ);
97: PetscFree(A->data);
98: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
99: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
100: return(0);
101: }
103: PetscErrorCode MatSetUp_MAIJ(Mat A)
104: {
106: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107: }
109: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110: {
112: Mat B;
115: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
116: MatView(B,viewer);
117: MatDestroy(&B);
118: return(0);
119: }
121: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122: {
124: Mat B;
127: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
128: MatView(B,viewer);
129: MatDestroy(&B);
130: return(0);
131: }
133: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134: {
136: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
139: MatDestroy(&b->AIJ);
140: MatDestroy(&b->OAIJ);
141: MatDestroy(&b->A);
142: VecScatterDestroy(&b->ctx);
143: VecDestroy(&b->w);
144: PetscFree(A->data);
145: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
146: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
147: PetscObjectChangeTypeName((PetscObject)A,NULL);
148: return(0);
149: }
151: /*MC
152: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153: multicomponent problems, interpolating or restricting each component the same way independently.
154: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
156: Operations provided:
157: + MatMult
158: . MatMultTranspose
159: . MatMultAdd
160: - MatMultTransposeAdd
162: Level: advanced
164: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
165: M*/
167: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
168: {
170: Mat_MPIMAIJ *b;
171: PetscMPIInt size;
174: PetscNewLog(A,&b);
175: A->data = (void*)b;
177: PetscMemzero(A->ops,sizeof(struct _MatOps));
179: A->ops->setup = MatSetUp_MAIJ;
181: b->AIJ = NULL;
182: b->dof = 0;
183: b->OAIJ = NULL;
184: b->ctx = NULL;
185: b->w = NULL;
186: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
187: if (size == 1) {
188: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
189: } else {
190: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
191: }
192: A->preallocated = PETSC_TRUE;
193: A->assembled = PETSC_TRUE;
194: return(0);
195: }
197: /* --------------------------------------------------------------------------------------*/
198: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
199: {
200: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
201: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
202: const PetscScalar *x,*v;
203: PetscScalar *y, sum1, sum2;
204: PetscErrorCode ierr;
205: PetscInt nonzerorow=0,n,i,jrow,j;
206: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
209: VecGetArrayRead(xx,&x);
210: VecGetArray(yy,&y);
211: idx = a->j;
212: v = a->a;
213: ii = a->i;
215: for (i=0; i<m; i++) {
216: jrow = ii[i];
217: n = ii[i+1] - jrow;
218: sum1 = 0.0;
219: sum2 = 0.0;
221: nonzerorow += (n>0);
222: for (j=0; j<n; j++) {
223: sum1 += v[jrow]*x[2*idx[jrow]];
224: sum2 += v[jrow]*x[2*idx[jrow]+1];
225: jrow++;
226: }
227: y[2*i] = sum1;
228: y[2*i+1] = sum2;
229: }
231: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
232: VecRestoreArrayRead(xx,&x);
233: VecRestoreArray(yy,&y);
234: return(0);
235: }
237: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
238: {
239: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
240: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
241: const PetscScalar *x,*v;
242: PetscScalar *y,alpha1,alpha2;
243: PetscErrorCode ierr;
244: PetscInt n,i;
245: const PetscInt m = b->AIJ->rmap->n,*idx;
248: VecSet(yy,0.0);
249: VecGetArrayRead(xx,&x);
250: VecGetArray(yy,&y);
252: for (i=0; i<m; i++) {
253: idx = a->j + a->i[i];
254: v = a->a + a->i[i];
255: n = a->i[i+1] - a->i[i];
256: alpha1 = x[2*i];
257: alpha2 = x[2*i+1];
258: while (n-->0) {
259: y[2*(*idx)] += alpha1*(*v);
260: y[2*(*idx)+1] += alpha2*(*v);
261: idx++; v++;
262: }
263: }
264: PetscLogFlops(4.0*a->nz);
265: VecRestoreArrayRead(xx,&x);
266: VecRestoreArray(yy,&y);
267: return(0);
268: }
270: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
271: {
272: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
273: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
274: const PetscScalar *x,*v;
275: PetscScalar *y,sum1, sum2;
276: PetscErrorCode ierr;
277: PetscInt n,i,jrow,j;
278: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
281: if (yy != zz) {VecCopy(yy,zz);}
282: VecGetArrayRead(xx,&x);
283: VecGetArray(zz,&y);
284: idx = a->j;
285: v = a->a;
286: ii = a->i;
288: for (i=0; i<m; i++) {
289: jrow = ii[i];
290: n = ii[i+1] - jrow;
291: sum1 = 0.0;
292: sum2 = 0.0;
293: for (j=0; j<n; j++) {
294: sum1 += v[jrow]*x[2*idx[jrow]];
295: sum2 += v[jrow]*x[2*idx[jrow]+1];
296: jrow++;
297: }
298: y[2*i] += sum1;
299: y[2*i+1] += sum2;
300: }
302: PetscLogFlops(4.0*a->nz);
303: VecRestoreArrayRead(xx,&x);
304: VecRestoreArray(zz,&y);
305: return(0);
306: }
307: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
308: {
309: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
311: const PetscScalar *x,*v;
312: PetscScalar *y,alpha1,alpha2;
313: PetscErrorCode ierr;
314: PetscInt n,i;
315: const PetscInt m = b->AIJ->rmap->n,*idx;
318: if (yy != zz) {VecCopy(yy,zz);}
319: VecGetArrayRead(xx,&x);
320: VecGetArray(zz,&y);
322: for (i=0; i<m; i++) {
323: idx = a->j + a->i[i];
324: v = a->a + a->i[i];
325: n = a->i[i+1] - a->i[i];
326: alpha1 = x[2*i];
327: alpha2 = x[2*i+1];
328: while (n-->0) {
329: y[2*(*idx)] += alpha1*(*v);
330: y[2*(*idx)+1] += alpha2*(*v);
331: idx++; v++;
332: }
333: }
334: PetscLogFlops(4.0*a->nz);
335: VecRestoreArrayRead(xx,&x);
336: VecRestoreArray(zz,&y);
337: return(0);
338: }
339: /* --------------------------------------------------------------------------------------*/
340: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341: {
342: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
343: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
344: const PetscScalar *x,*v;
345: PetscScalar *y,sum1, sum2, sum3;
346: PetscErrorCode ierr;
347: PetscInt nonzerorow=0,n,i,jrow,j;
348: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
351: VecGetArrayRead(xx,&x);
352: VecGetArray(yy,&y);
353: idx = a->j;
354: v = a->a;
355: ii = a->i;
357: for (i=0; i<m; i++) {
358: jrow = ii[i];
359: n = ii[i+1] - jrow;
360: sum1 = 0.0;
361: sum2 = 0.0;
362: sum3 = 0.0;
364: nonzerorow += (n>0);
365: for (j=0; j<n; j++) {
366: sum1 += v[jrow]*x[3*idx[jrow]];
367: sum2 += v[jrow]*x[3*idx[jrow]+1];
368: sum3 += v[jrow]*x[3*idx[jrow]+2];
369: jrow++;
370: }
371: y[3*i] = sum1;
372: y[3*i+1] = sum2;
373: y[3*i+2] = sum3;
374: }
376: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
377: VecRestoreArrayRead(xx,&x);
378: VecRestoreArray(yy,&y);
379: return(0);
380: }
382: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383: {
384: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
385: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
386: const PetscScalar *x,*v;
387: PetscScalar *y,alpha1,alpha2,alpha3;
388: PetscErrorCode ierr;
389: PetscInt n,i;
390: const PetscInt m = b->AIJ->rmap->n,*idx;
393: VecSet(yy,0.0);
394: VecGetArrayRead(xx,&x);
395: VecGetArray(yy,&y);
397: for (i=0; i<m; i++) {
398: idx = a->j + a->i[i];
399: v = a->a + a->i[i];
400: n = a->i[i+1] - a->i[i];
401: alpha1 = x[3*i];
402: alpha2 = x[3*i+1];
403: alpha3 = x[3*i+2];
404: while (n-->0) {
405: y[3*(*idx)] += alpha1*(*v);
406: y[3*(*idx)+1] += alpha2*(*v);
407: y[3*(*idx)+2] += alpha3*(*v);
408: idx++; v++;
409: }
410: }
411: PetscLogFlops(6.0*a->nz);
412: VecRestoreArrayRead(xx,&x);
413: VecRestoreArray(yy,&y);
414: return(0);
415: }
417: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418: {
419: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
420: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
421: const PetscScalar *x,*v;
422: PetscScalar *y,sum1, sum2, sum3;
423: PetscErrorCode ierr;
424: PetscInt n,i,jrow,j;
425: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
428: if (yy != zz) {VecCopy(yy,zz);}
429: VecGetArrayRead(xx,&x);
430: VecGetArray(zz,&y);
431: idx = a->j;
432: v = a->a;
433: ii = a->i;
435: for (i=0; i<m; i++) {
436: jrow = ii[i];
437: n = ii[i+1] - jrow;
438: sum1 = 0.0;
439: sum2 = 0.0;
440: sum3 = 0.0;
441: for (j=0; j<n; j++) {
442: sum1 += v[jrow]*x[3*idx[jrow]];
443: sum2 += v[jrow]*x[3*idx[jrow]+1];
444: sum3 += v[jrow]*x[3*idx[jrow]+2];
445: jrow++;
446: }
447: y[3*i] += sum1;
448: y[3*i+1] += sum2;
449: y[3*i+2] += sum3;
450: }
452: PetscLogFlops(6.0*a->nz);
453: VecRestoreArrayRead(xx,&x);
454: VecRestoreArray(zz,&y);
455: return(0);
456: }
457: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458: {
459: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
460: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
461: const PetscScalar *x,*v;
462: PetscScalar *y,alpha1,alpha2,alpha3;
463: PetscErrorCode ierr;
464: PetscInt n,i;
465: const PetscInt m = b->AIJ->rmap->n,*idx;
468: if (yy != zz) {VecCopy(yy,zz);}
469: VecGetArrayRead(xx,&x);
470: VecGetArray(zz,&y);
471: for (i=0; i<m; i++) {
472: idx = a->j + a->i[i];
473: v = a->a + a->i[i];
474: n = a->i[i+1] - a->i[i];
475: alpha1 = x[3*i];
476: alpha2 = x[3*i+1];
477: alpha3 = x[3*i+2];
478: while (n-->0) {
479: y[3*(*idx)] += alpha1*(*v);
480: y[3*(*idx)+1] += alpha2*(*v);
481: y[3*(*idx)+2] += alpha3*(*v);
482: idx++; v++;
483: }
484: }
485: PetscLogFlops(6.0*a->nz);
486: VecRestoreArrayRead(xx,&x);
487: VecRestoreArray(zz,&y);
488: return(0);
489: }
491: /* ------------------------------------------------------------------------------*/
492: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493: {
494: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
495: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
496: const PetscScalar *x,*v;
497: PetscScalar *y,sum1, sum2, sum3, sum4;
498: PetscErrorCode ierr;
499: PetscInt nonzerorow=0,n,i,jrow,j;
500: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
503: VecGetArrayRead(xx,&x);
504: VecGetArray(yy,&y);
505: idx = a->j;
506: v = a->a;
507: ii = a->i;
509: for (i=0; i<m; i++) {
510: jrow = ii[i];
511: n = ii[i+1] - jrow;
512: sum1 = 0.0;
513: sum2 = 0.0;
514: sum3 = 0.0;
515: sum4 = 0.0;
516: nonzerorow += (n>0);
517: for (j=0; j<n; j++) {
518: sum1 += v[jrow]*x[4*idx[jrow]];
519: sum2 += v[jrow]*x[4*idx[jrow]+1];
520: sum3 += v[jrow]*x[4*idx[jrow]+2];
521: sum4 += v[jrow]*x[4*idx[jrow]+3];
522: jrow++;
523: }
524: y[4*i] = sum1;
525: y[4*i+1] = sum2;
526: y[4*i+2] = sum3;
527: y[4*i+3] = sum4;
528: }
530: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
531: VecRestoreArrayRead(xx,&x);
532: VecRestoreArray(yy,&y);
533: return(0);
534: }
536: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537: {
538: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
539: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
540: const PetscScalar *x,*v;
541: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
542: PetscErrorCode ierr;
543: PetscInt n,i;
544: const PetscInt m = b->AIJ->rmap->n,*idx;
547: VecSet(yy,0.0);
548: VecGetArrayRead(xx,&x);
549: VecGetArray(yy,&y);
550: for (i=0; i<m; i++) {
551: idx = a->j + a->i[i];
552: v = a->a + a->i[i];
553: n = a->i[i+1] - a->i[i];
554: alpha1 = x[4*i];
555: alpha2 = x[4*i+1];
556: alpha3 = x[4*i+2];
557: alpha4 = x[4*i+3];
558: while (n-->0) {
559: y[4*(*idx)] += alpha1*(*v);
560: y[4*(*idx)+1] += alpha2*(*v);
561: y[4*(*idx)+2] += alpha3*(*v);
562: y[4*(*idx)+3] += alpha4*(*v);
563: idx++; v++;
564: }
565: }
566: PetscLogFlops(8.0*a->nz);
567: VecRestoreArrayRead(xx,&x);
568: VecRestoreArray(yy,&y);
569: return(0);
570: }
572: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573: {
574: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
575: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
576: const PetscScalar *x,*v;
577: PetscScalar *y,sum1, sum2, sum3, sum4;
578: PetscErrorCode ierr;
579: PetscInt n,i,jrow,j;
580: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
583: if (yy != zz) {VecCopy(yy,zz);}
584: VecGetArrayRead(xx,&x);
585: VecGetArray(zz,&y);
586: idx = a->j;
587: v = a->a;
588: ii = a->i;
590: for (i=0; i<m; i++) {
591: jrow = ii[i];
592: n = ii[i+1] - jrow;
593: sum1 = 0.0;
594: sum2 = 0.0;
595: sum3 = 0.0;
596: sum4 = 0.0;
597: for (j=0; j<n; j++) {
598: sum1 += v[jrow]*x[4*idx[jrow]];
599: sum2 += v[jrow]*x[4*idx[jrow]+1];
600: sum3 += v[jrow]*x[4*idx[jrow]+2];
601: sum4 += v[jrow]*x[4*idx[jrow]+3];
602: jrow++;
603: }
604: y[4*i] += sum1;
605: y[4*i+1] += sum2;
606: y[4*i+2] += sum3;
607: y[4*i+3] += sum4;
608: }
610: PetscLogFlops(8.0*a->nz);
611: VecRestoreArrayRead(xx,&x);
612: VecRestoreArray(zz,&y);
613: return(0);
614: }
615: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616: {
617: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
618: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
619: const PetscScalar *x,*v;
620: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
621: PetscErrorCode ierr;
622: PetscInt n,i;
623: const PetscInt m = b->AIJ->rmap->n,*idx;
626: if (yy != zz) {VecCopy(yy,zz);}
627: VecGetArrayRead(xx,&x);
628: VecGetArray(zz,&y);
630: for (i=0; i<m; i++) {
631: idx = a->j + a->i[i];
632: v = a->a + a->i[i];
633: n = a->i[i+1] - a->i[i];
634: alpha1 = x[4*i];
635: alpha2 = x[4*i+1];
636: alpha3 = x[4*i+2];
637: alpha4 = x[4*i+3];
638: while (n-->0) {
639: y[4*(*idx)] += alpha1*(*v);
640: y[4*(*idx)+1] += alpha2*(*v);
641: y[4*(*idx)+2] += alpha3*(*v);
642: y[4*(*idx)+3] += alpha4*(*v);
643: idx++; v++;
644: }
645: }
646: PetscLogFlops(8.0*a->nz);
647: VecRestoreArrayRead(xx,&x);
648: VecRestoreArray(zz,&y);
649: return(0);
650: }
651: /* ------------------------------------------------------------------------------*/
653: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654: {
655: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
656: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
657: const PetscScalar *x,*v;
658: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
659: PetscErrorCode ierr;
660: PetscInt nonzerorow=0,n,i,jrow,j;
661: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
664: VecGetArrayRead(xx,&x);
665: VecGetArray(yy,&y);
666: idx = a->j;
667: v = a->a;
668: ii = a->i;
670: for (i=0; i<m; i++) {
671: jrow = ii[i];
672: n = ii[i+1] - jrow;
673: sum1 = 0.0;
674: sum2 = 0.0;
675: sum3 = 0.0;
676: sum4 = 0.0;
677: sum5 = 0.0;
679: nonzerorow += (n>0);
680: for (j=0; j<n; j++) {
681: sum1 += v[jrow]*x[5*idx[jrow]];
682: sum2 += v[jrow]*x[5*idx[jrow]+1];
683: sum3 += v[jrow]*x[5*idx[jrow]+2];
684: sum4 += v[jrow]*x[5*idx[jrow]+3];
685: sum5 += v[jrow]*x[5*idx[jrow]+4];
686: jrow++;
687: }
688: y[5*i] = sum1;
689: y[5*i+1] = sum2;
690: y[5*i+2] = sum3;
691: y[5*i+3] = sum4;
692: y[5*i+4] = sum5;
693: }
695: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
696: VecRestoreArrayRead(xx,&x);
697: VecRestoreArray(yy,&y);
698: return(0);
699: }
701: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702: {
703: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
704: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
705: const PetscScalar *x,*v;
706: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707: PetscErrorCode ierr;
708: PetscInt n,i;
709: const PetscInt m = b->AIJ->rmap->n,*idx;
712: VecSet(yy,0.0);
713: VecGetArrayRead(xx,&x);
714: VecGetArray(yy,&y);
716: for (i=0; i<m; i++) {
717: idx = a->j + a->i[i];
718: v = a->a + a->i[i];
719: n = a->i[i+1] - a->i[i];
720: alpha1 = x[5*i];
721: alpha2 = x[5*i+1];
722: alpha3 = x[5*i+2];
723: alpha4 = x[5*i+3];
724: alpha5 = x[5*i+4];
725: while (n-->0) {
726: y[5*(*idx)] += alpha1*(*v);
727: y[5*(*idx)+1] += alpha2*(*v);
728: y[5*(*idx)+2] += alpha3*(*v);
729: y[5*(*idx)+3] += alpha4*(*v);
730: y[5*(*idx)+4] += alpha5*(*v);
731: idx++; v++;
732: }
733: }
734: PetscLogFlops(10.0*a->nz);
735: VecRestoreArrayRead(xx,&x);
736: VecRestoreArray(yy,&y);
737: return(0);
738: }
740: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741: {
742: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
743: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
744: const PetscScalar *x,*v;
745: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
746: PetscErrorCode ierr;
747: PetscInt n,i,jrow,j;
748: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
751: if (yy != zz) {VecCopy(yy,zz);}
752: VecGetArrayRead(xx,&x);
753: VecGetArray(zz,&y);
754: idx = a->j;
755: v = a->a;
756: ii = a->i;
758: for (i=0; i<m; i++) {
759: jrow = ii[i];
760: n = ii[i+1] - jrow;
761: sum1 = 0.0;
762: sum2 = 0.0;
763: sum3 = 0.0;
764: sum4 = 0.0;
765: sum5 = 0.0;
766: for (j=0; j<n; j++) {
767: sum1 += v[jrow]*x[5*idx[jrow]];
768: sum2 += v[jrow]*x[5*idx[jrow]+1];
769: sum3 += v[jrow]*x[5*idx[jrow]+2];
770: sum4 += v[jrow]*x[5*idx[jrow]+3];
771: sum5 += v[jrow]*x[5*idx[jrow]+4];
772: jrow++;
773: }
774: y[5*i] += sum1;
775: y[5*i+1] += sum2;
776: y[5*i+2] += sum3;
777: y[5*i+3] += sum4;
778: y[5*i+4] += sum5;
779: }
781: PetscLogFlops(10.0*a->nz);
782: VecRestoreArrayRead(xx,&x);
783: VecRestoreArray(zz,&y);
784: return(0);
785: }
787: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788: {
789: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
790: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
791: const PetscScalar *x,*v;
792: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793: PetscErrorCode ierr;
794: PetscInt n,i;
795: const PetscInt m = b->AIJ->rmap->n,*idx;
798: if (yy != zz) {VecCopy(yy,zz);}
799: VecGetArrayRead(xx,&x);
800: VecGetArray(zz,&y);
802: for (i=0; i<m; i++) {
803: idx = a->j + a->i[i];
804: v = a->a + a->i[i];
805: n = a->i[i+1] - a->i[i];
806: alpha1 = x[5*i];
807: alpha2 = x[5*i+1];
808: alpha3 = x[5*i+2];
809: alpha4 = x[5*i+3];
810: alpha5 = x[5*i+4];
811: while (n-->0) {
812: y[5*(*idx)] += alpha1*(*v);
813: y[5*(*idx)+1] += alpha2*(*v);
814: y[5*(*idx)+2] += alpha3*(*v);
815: y[5*(*idx)+3] += alpha4*(*v);
816: y[5*(*idx)+4] += alpha5*(*v);
817: idx++; v++;
818: }
819: }
820: PetscLogFlops(10.0*a->nz);
821: VecRestoreArrayRead(xx,&x);
822: VecRestoreArray(zz,&y);
823: return(0);
824: }
826: /* ------------------------------------------------------------------------------*/
827: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
828: {
829: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
830: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
831: const PetscScalar *x,*v;
832: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
833: PetscErrorCode ierr;
834: PetscInt nonzerorow=0,n,i,jrow,j;
835: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
838: VecGetArrayRead(xx,&x);
839: VecGetArray(yy,&y);
840: idx = a->j;
841: v = a->a;
842: ii = a->i;
844: for (i=0; i<m; i++) {
845: jrow = ii[i];
846: n = ii[i+1] - jrow;
847: sum1 = 0.0;
848: sum2 = 0.0;
849: sum3 = 0.0;
850: sum4 = 0.0;
851: sum5 = 0.0;
852: sum6 = 0.0;
854: nonzerorow += (n>0);
855: for (j=0; j<n; j++) {
856: sum1 += v[jrow]*x[6*idx[jrow]];
857: sum2 += v[jrow]*x[6*idx[jrow]+1];
858: sum3 += v[jrow]*x[6*idx[jrow]+2];
859: sum4 += v[jrow]*x[6*idx[jrow]+3];
860: sum5 += v[jrow]*x[6*idx[jrow]+4];
861: sum6 += v[jrow]*x[6*idx[jrow]+5];
862: jrow++;
863: }
864: y[6*i] = sum1;
865: y[6*i+1] = sum2;
866: y[6*i+2] = sum3;
867: y[6*i+3] = sum4;
868: y[6*i+4] = sum5;
869: y[6*i+5] = sum6;
870: }
872: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
873: VecRestoreArrayRead(xx,&x);
874: VecRestoreArray(yy,&y);
875: return(0);
876: }
878: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879: {
880: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
881: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
882: const PetscScalar *x,*v;
883: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884: PetscErrorCode ierr;
885: PetscInt n,i;
886: const PetscInt m = b->AIJ->rmap->n,*idx;
889: VecSet(yy,0.0);
890: VecGetArrayRead(xx,&x);
891: VecGetArray(yy,&y);
893: for (i=0; i<m; i++) {
894: idx = a->j + a->i[i];
895: v = a->a + a->i[i];
896: n = a->i[i+1] - a->i[i];
897: alpha1 = x[6*i];
898: alpha2 = x[6*i+1];
899: alpha3 = x[6*i+2];
900: alpha4 = x[6*i+3];
901: alpha5 = x[6*i+4];
902: alpha6 = x[6*i+5];
903: while (n-->0) {
904: y[6*(*idx)] += alpha1*(*v);
905: y[6*(*idx)+1] += alpha2*(*v);
906: y[6*(*idx)+2] += alpha3*(*v);
907: y[6*(*idx)+3] += alpha4*(*v);
908: y[6*(*idx)+4] += alpha5*(*v);
909: y[6*(*idx)+5] += alpha6*(*v);
910: idx++; v++;
911: }
912: }
913: PetscLogFlops(12.0*a->nz);
914: VecRestoreArrayRead(xx,&x);
915: VecRestoreArray(yy,&y);
916: return(0);
917: }
919: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920: {
921: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
922: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
923: const PetscScalar *x,*v;
924: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
925: PetscErrorCode ierr;
926: PetscInt n,i,jrow,j;
927: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
930: if (yy != zz) {VecCopy(yy,zz);}
931: VecGetArrayRead(xx,&x);
932: VecGetArray(zz,&y);
933: idx = a->j;
934: v = a->a;
935: ii = a->i;
937: for (i=0; i<m; i++) {
938: jrow = ii[i];
939: n = ii[i+1] - jrow;
940: sum1 = 0.0;
941: sum2 = 0.0;
942: sum3 = 0.0;
943: sum4 = 0.0;
944: sum5 = 0.0;
945: sum6 = 0.0;
946: for (j=0; j<n; j++) {
947: sum1 += v[jrow]*x[6*idx[jrow]];
948: sum2 += v[jrow]*x[6*idx[jrow]+1];
949: sum3 += v[jrow]*x[6*idx[jrow]+2];
950: sum4 += v[jrow]*x[6*idx[jrow]+3];
951: sum5 += v[jrow]*x[6*idx[jrow]+4];
952: sum6 += v[jrow]*x[6*idx[jrow]+5];
953: jrow++;
954: }
955: y[6*i] += sum1;
956: y[6*i+1] += sum2;
957: y[6*i+2] += sum3;
958: y[6*i+3] += sum4;
959: y[6*i+4] += sum5;
960: y[6*i+5] += sum6;
961: }
963: PetscLogFlops(12.0*a->nz);
964: VecRestoreArrayRead(xx,&x);
965: VecRestoreArray(zz,&y);
966: return(0);
967: }
969: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
970: {
971: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
972: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
973: const PetscScalar *x,*v;
974: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975: PetscErrorCode ierr;
976: PetscInt n,i;
977: const PetscInt m = b->AIJ->rmap->n,*idx;
980: if (yy != zz) {VecCopy(yy,zz);}
981: VecGetArrayRead(xx,&x);
982: VecGetArray(zz,&y);
984: for (i=0; i<m; i++) {
985: idx = a->j + a->i[i];
986: v = a->a + a->i[i];
987: n = a->i[i+1] - a->i[i];
988: alpha1 = x[6*i];
989: alpha2 = x[6*i+1];
990: alpha3 = x[6*i+2];
991: alpha4 = x[6*i+3];
992: alpha5 = x[6*i+4];
993: alpha6 = x[6*i+5];
994: while (n-->0) {
995: y[6*(*idx)] += alpha1*(*v);
996: y[6*(*idx)+1] += alpha2*(*v);
997: y[6*(*idx)+2] += alpha3*(*v);
998: y[6*(*idx)+3] += alpha4*(*v);
999: y[6*(*idx)+4] += alpha5*(*v);
1000: y[6*(*idx)+5] += alpha6*(*v);
1001: idx++; v++;
1002: }
1003: }
1004: PetscLogFlops(12.0*a->nz);
1005: VecRestoreArrayRead(xx,&x);
1006: VecRestoreArray(zz,&y);
1007: return(0);
1008: }
1010: /* ------------------------------------------------------------------------------*/
1011: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012: {
1013: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1014: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1015: const PetscScalar *x,*v;
1016: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017: PetscErrorCode ierr;
1018: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1019: PetscInt nonzerorow=0,n,i,jrow,j;
1022: VecGetArrayRead(xx,&x);
1023: VecGetArray(yy,&y);
1024: idx = a->j;
1025: v = a->a;
1026: ii = a->i;
1028: for (i=0; i<m; i++) {
1029: jrow = ii[i];
1030: n = ii[i+1] - jrow;
1031: sum1 = 0.0;
1032: sum2 = 0.0;
1033: sum3 = 0.0;
1034: sum4 = 0.0;
1035: sum5 = 0.0;
1036: sum6 = 0.0;
1037: sum7 = 0.0;
1039: nonzerorow += (n>0);
1040: for (j=0; j<n; j++) {
1041: sum1 += v[jrow]*x[7*idx[jrow]];
1042: sum2 += v[jrow]*x[7*idx[jrow]+1];
1043: sum3 += v[jrow]*x[7*idx[jrow]+2];
1044: sum4 += v[jrow]*x[7*idx[jrow]+3];
1045: sum5 += v[jrow]*x[7*idx[jrow]+4];
1046: sum6 += v[jrow]*x[7*idx[jrow]+5];
1047: sum7 += v[jrow]*x[7*idx[jrow]+6];
1048: jrow++;
1049: }
1050: y[7*i] = sum1;
1051: y[7*i+1] = sum2;
1052: y[7*i+2] = sum3;
1053: y[7*i+3] = sum4;
1054: y[7*i+4] = sum5;
1055: y[7*i+5] = sum6;
1056: y[7*i+6] = sum7;
1057: }
1059: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1060: VecRestoreArrayRead(xx,&x);
1061: VecRestoreArray(yy,&y);
1062: return(0);
1063: }
1065: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066: {
1067: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1068: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1069: const PetscScalar *x,*v;
1070: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071: PetscErrorCode ierr;
1072: const PetscInt m = b->AIJ->rmap->n,*idx;
1073: PetscInt n,i;
1076: VecSet(yy,0.0);
1077: VecGetArrayRead(xx,&x);
1078: VecGetArray(yy,&y);
1080: for (i=0; i<m; i++) {
1081: idx = a->j + a->i[i];
1082: v = a->a + a->i[i];
1083: n = a->i[i+1] - a->i[i];
1084: alpha1 = x[7*i];
1085: alpha2 = x[7*i+1];
1086: alpha3 = x[7*i+2];
1087: alpha4 = x[7*i+3];
1088: alpha5 = x[7*i+4];
1089: alpha6 = x[7*i+5];
1090: alpha7 = x[7*i+6];
1091: while (n-->0) {
1092: y[7*(*idx)] += alpha1*(*v);
1093: y[7*(*idx)+1] += alpha2*(*v);
1094: y[7*(*idx)+2] += alpha3*(*v);
1095: y[7*(*idx)+3] += alpha4*(*v);
1096: y[7*(*idx)+4] += alpha5*(*v);
1097: y[7*(*idx)+5] += alpha6*(*v);
1098: y[7*(*idx)+6] += alpha7*(*v);
1099: idx++; v++;
1100: }
1101: }
1102: PetscLogFlops(14.0*a->nz);
1103: VecRestoreArrayRead(xx,&x);
1104: VecRestoreArray(yy,&y);
1105: return(0);
1106: }
1108: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1111: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1112: const PetscScalar *x,*v;
1113: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114: PetscErrorCode ierr;
1115: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1116: PetscInt n,i,jrow,j;
1119: if (yy != zz) {VecCopy(yy,zz);}
1120: VecGetArrayRead(xx,&x);
1121: VecGetArray(zz,&y);
1122: idx = a->j;
1123: v = a->a;
1124: ii = a->i;
1126: for (i=0; i<m; i++) {
1127: jrow = ii[i];
1128: n = ii[i+1] - jrow;
1129: sum1 = 0.0;
1130: sum2 = 0.0;
1131: sum3 = 0.0;
1132: sum4 = 0.0;
1133: sum5 = 0.0;
1134: sum6 = 0.0;
1135: sum7 = 0.0;
1136: for (j=0; j<n; j++) {
1137: sum1 += v[jrow]*x[7*idx[jrow]];
1138: sum2 += v[jrow]*x[7*idx[jrow]+1];
1139: sum3 += v[jrow]*x[7*idx[jrow]+2];
1140: sum4 += v[jrow]*x[7*idx[jrow]+3];
1141: sum5 += v[jrow]*x[7*idx[jrow]+4];
1142: sum6 += v[jrow]*x[7*idx[jrow]+5];
1143: sum7 += v[jrow]*x[7*idx[jrow]+6];
1144: jrow++;
1145: }
1146: y[7*i] += sum1;
1147: y[7*i+1] += sum2;
1148: y[7*i+2] += sum3;
1149: y[7*i+3] += sum4;
1150: y[7*i+4] += sum5;
1151: y[7*i+5] += sum6;
1152: y[7*i+6] += sum7;
1153: }
1155: PetscLogFlops(14.0*a->nz);
1156: VecRestoreArrayRead(xx,&x);
1157: VecRestoreArray(zz,&y);
1158: return(0);
1159: }
1161: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162: {
1163: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1164: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1165: const PetscScalar *x,*v;
1166: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167: PetscErrorCode ierr;
1168: const PetscInt m = b->AIJ->rmap->n,*idx;
1169: PetscInt n,i;
1172: if (yy != zz) {VecCopy(yy,zz);}
1173: VecGetArrayRead(xx,&x);
1174: VecGetArray(zz,&y);
1175: for (i=0; i<m; i++) {
1176: idx = a->j + a->i[i];
1177: v = a->a + a->i[i];
1178: n = a->i[i+1] - a->i[i];
1179: alpha1 = x[7*i];
1180: alpha2 = x[7*i+1];
1181: alpha3 = x[7*i+2];
1182: alpha4 = x[7*i+3];
1183: alpha5 = x[7*i+4];
1184: alpha6 = x[7*i+5];
1185: alpha7 = x[7*i+6];
1186: while (n-->0) {
1187: y[7*(*idx)] += alpha1*(*v);
1188: y[7*(*idx)+1] += alpha2*(*v);
1189: y[7*(*idx)+2] += alpha3*(*v);
1190: y[7*(*idx)+3] += alpha4*(*v);
1191: y[7*(*idx)+4] += alpha5*(*v);
1192: y[7*(*idx)+5] += alpha6*(*v);
1193: y[7*(*idx)+6] += alpha7*(*v);
1194: idx++; v++;
1195: }
1196: }
1197: PetscLogFlops(14.0*a->nz);
1198: VecRestoreArrayRead(xx,&x);
1199: VecRestoreArray(zz,&y);
1200: return(0);
1201: }
1203: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1204: {
1205: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1206: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1207: const PetscScalar *x,*v;
1208: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209: PetscErrorCode ierr;
1210: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1211: PetscInt nonzerorow=0,n,i,jrow,j;
1214: VecGetArrayRead(xx,&x);
1215: VecGetArray(yy,&y);
1216: idx = a->j;
1217: v = a->a;
1218: ii = a->i;
1220: for (i=0; i<m; i++) {
1221: jrow = ii[i];
1222: n = ii[i+1] - jrow;
1223: sum1 = 0.0;
1224: sum2 = 0.0;
1225: sum3 = 0.0;
1226: sum4 = 0.0;
1227: sum5 = 0.0;
1228: sum6 = 0.0;
1229: sum7 = 0.0;
1230: sum8 = 0.0;
1232: nonzerorow += (n>0);
1233: for (j=0; j<n; j++) {
1234: sum1 += v[jrow]*x[8*idx[jrow]];
1235: sum2 += v[jrow]*x[8*idx[jrow]+1];
1236: sum3 += v[jrow]*x[8*idx[jrow]+2];
1237: sum4 += v[jrow]*x[8*idx[jrow]+3];
1238: sum5 += v[jrow]*x[8*idx[jrow]+4];
1239: sum6 += v[jrow]*x[8*idx[jrow]+5];
1240: sum7 += v[jrow]*x[8*idx[jrow]+6];
1241: sum8 += v[jrow]*x[8*idx[jrow]+7];
1242: jrow++;
1243: }
1244: y[8*i] = sum1;
1245: y[8*i+1] = sum2;
1246: y[8*i+2] = sum3;
1247: y[8*i+3] = sum4;
1248: y[8*i+4] = sum5;
1249: y[8*i+5] = sum6;
1250: y[8*i+6] = sum7;
1251: y[8*i+7] = sum8;
1252: }
1254: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1255: VecRestoreArrayRead(xx,&x);
1256: VecRestoreArray(yy,&y);
1257: return(0);
1258: }
1260: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1261: {
1262: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1263: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1264: const PetscScalar *x,*v;
1265: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266: PetscErrorCode ierr;
1267: const PetscInt m = b->AIJ->rmap->n,*idx;
1268: PetscInt n,i;
1271: VecSet(yy,0.0);
1272: VecGetArrayRead(xx,&x);
1273: VecGetArray(yy,&y);
1275: for (i=0; i<m; i++) {
1276: idx = a->j + a->i[i];
1277: v = a->a + a->i[i];
1278: n = a->i[i+1] - a->i[i];
1279: alpha1 = x[8*i];
1280: alpha2 = x[8*i+1];
1281: alpha3 = x[8*i+2];
1282: alpha4 = x[8*i+3];
1283: alpha5 = x[8*i+4];
1284: alpha6 = x[8*i+5];
1285: alpha7 = x[8*i+6];
1286: alpha8 = x[8*i+7];
1287: while (n-->0) {
1288: y[8*(*idx)] += alpha1*(*v);
1289: y[8*(*idx)+1] += alpha2*(*v);
1290: y[8*(*idx)+2] += alpha3*(*v);
1291: y[8*(*idx)+3] += alpha4*(*v);
1292: y[8*(*idx)+4] += alpha5*(*v);
1293: y[8*(*idx)+5] += alpha6*(*v);
1294: y[8*(*idx)+6] += alpha7*(*v);
1295: y[8*(*idx)+7] += alpha8*(*v);
1296: idx++; v++;
1297: }
1298: }
1299: PetscLogFlops(16.0*a->nz);
1300: VecRestoreArrayRead(xx,&x);
1301: VecRestoreArray(yy,&y);
1302: return(0);
1303: }
1305: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1306: {
1307: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1308: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1309: const PetscScalar *x,*v;
1310: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311: PetscErrorCode ierr;
1312: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1313: PetscInt n,i,jrow,j;
1316: if (yy != zz) {VecCopy(yy,zz);}
1317: VecGetArrayRead(xx,&x);
1318: VecGetArray(zz,&y);
1319: idx = a->j;
1320: v = a->a;
1321: ii = a->i;
1323: for (i=0; i<m; i++) {
1324: jrow = ii[i];
1325: n = ii[i+1] - jrow;
1326: sum1 = 0.0;
1327: sum2 = 0.0;
1328: sum3 = 0.0;
1329: sum4 = 0.0;
1330: sum5 = 0.0;
1331: sum6 = 0.0;
1332: sum7 = 0.0;
1333: sum8 = 0.0;
1334: for (j=0; j<n; j++) {
1335: sum1 += v[jrow]*x[8*idx[jrow]];
1336: sum2 += v[jrow]*x[8*idx[jrow]+1];
1337: sum3 += v[jrow]*x[8*idx[jrow]+2];
1338: sum4 += v[jrow]*x[8*idx[jrow]+3];
1339: sum5 += v[jrow]*x[8*idx[jrow]+4];
1340: sum6 += v[jrow]*x[8*idx[jrow]+5];
1341: sum7 += v[jrow]*x[8*idx[jrow]+6];
1342: sum8 += v[jrow]*x[8*idx[jrow]+7];
1343: jrow++;
1344: }
1345: y[8*i] += sum1;
1346: y[8*i+1] += sum2;
1347: y[8*i+2] += sum3;
1348: y[8*i+3] += sum4;
1349: y[8*i+4] += sum5;
1350: y[8*i+5] += sum6;
1351: y[8*i+6] += sum7;
1352: y[8*i+7] += sum8;
1353: }
1355: PetscLogFlops(16.0*a->nz);
1356: VecRestoreArrayRead(xx,&x);
1357: VecRestoreArray(zz,&y);
1358: return(0);
1359: }
1361: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1362: {
1363: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1364: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1365: const PetscScalar *x,*v;
1366: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367: PetscErrorCode ierr;
1368: const PetscInt m = b->AIJ->rmap->n,*idx;
1369: PetscInt n,i;
1372: if (yy != zz) {VecCopy(yy,zz);}
1373: VecGetArrayRead(xx,&x);
1374: VecGetArray(zz,&y);
1375: for (i=0; i<m; i++) {
1376: idx = a->j + a->i[i];
1377: v = a->a + a->i[i];
1378: n = a->i[i+1] - a->i[i];
1379: alpha1 = x[8*i];
1380: alpha2 = x[8*i+1];
1381: alpha3 = x[8*i+2];
1382: alpha4 = x[8*i+3];
1383: alpha5 = x[8*i+4];
1384: alpha6 = x[8*i+5];
1385: alpha7 = x[8*i+6];
1386: alpha8 = x[8*i+7];
1387: while (n-->0) {
1388: y[8*(*idx)] += alpha1*(*v);
1389: y[8*(*idx)+1] += alpha2*(*v);
1390: y[8*(*idx)+2] += alpha3*(*v);
1391: y[8*(*idx)+3] += alpha4*(*v);
1392: y[8*(*idx)+4] += alpha5*(*v);
1393: y[8*(*idx)+5] += alpha6*(*v);
1394: y[8*(*idx)+6] += alpha7*(*v);
1395: y[8*(*idx)+7] += alpha8*(*v);
1396: idx++; v++;
1397: }
1398: }
1399: PetscLogFlops(16.0*a->nz);
1400: VecRestoreArrayRead(xx,&x);
1401: VecRestoreArray(zz,&y);
1402: return(0);
1403: }
1405: /* ------------------------------------------------------------------------------*/
1406: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1407: {
1408: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1409: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1410: const PetscScalar *x,*v;
1411: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412: PetscErrorCode ierr;
1413: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1414: PetscInt nonzerorow=0,n,i,jrow,j;
1417: VecGetArrayRead(xx,&x);
1418: VecGetArray(yy,&y);
1419: idx = a->j;
1420: v = a->a;
1421: ii = a->i;
1423: for (i=0; i<m; i++) {
1424: jrow = ii[i];
1425: n = ii[i+1] - jrow;
1426: sum1 = 0.0;
1427: sum2 = 0.0;
1428: sum3 = 0.0;
1429: sum4 = 0.0;
1430: sum5 = 0.0;
1431: sum6 = 0.0;
1432: sum7 = 0.0;
1433: sum8 = 0.0;
1434: sum9 = 0.0;
1436: nonzerorow += (n>0);
1437: for (j=0; j<n; j++) {
1438: sum1 += v[jrow]*x[9*idx[jrow]];
1439: sum2 += v[jrow]*x[9*idx[jrow]+1];
1440: sum3 += v[jrow]*x[9*idx[jrow]+2];
1441: sum4 += v[jrow]*x[9*idx[jrow]+3];
1442: sum5 += v[jrow]*x[9*idx[jrow]+4];
1443: sum6 += v[jrow]*x[9*idx[jrow]+5];
1444: sum7 += v[jrow]*x[9*idx[jrow]+6];
1445: sum8 += v[jrow]*x[9*idx[jrow]+7];
1446: sum9 += v[jrow]*x[9*idx[jrow]+8];
1447: jrow++;
1448: }
1449: y[9*i] = sum1;
1450: y[9*i+1] = sum2;
1451: y[9*i+2] = sum3;
1452: y[9*i+3] = sum4;
1453: y[9*i+4] = sum5;
1454: y[9*i+5] = sum6;
1455: y[9*i+6] = sum7;
1456: y[9*i+7] = sum8;
1457: y[9*i+8] = sum9;
1458: }
1460: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1461: VecRestoreArrayRead(xx,&x);
1462: VecRestoreArray(yy,&y);
1463: return(0);
1464: }
1466: /* ------------------------------------------------------------------------------*/
1468: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1469: {
1470: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1471: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1472: const PetscScalar *x,*v;
1473: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474: PetscErrorCode ierr;
1475: const PetscInt m = b->AIJ->rmap->n,*idx;
1476: PetscInt n,i;
1479: VecSet(yy,0.0);
1480: VecGetArrayRead(xx,&x);
1481: VecGetArray(yy,&y);
1483: for (i=0; i<m; i++) {
1484: idx = a->j + a->i[i];
1485: v = a->a + a->i[i];
1486: n = a->i[i+1] - a->i[i];
1487: alpha1 = x[9*i];
1488: alpha2 = x[9*i+1];
1489: alpha3 = x[9*i+2];
1490: alpha4 = x[9*i+3];
1491: alpha5 = x[9*i+4];
1492: alpha6 = x[9*i+5];
1493: alpha7 = x[9*i+6];
1494: alpha8 = x[9*i+7];
1495: alpha9 = x[9*i+8];
1496: while (n-->0) {
1497: y[9*(*idx)] += alpha1*(*v);
1498: y[9*(*idx)+1] += alpha2*(*v);
1499: y[9*(*idx)+2] += alpha3*(*v);
1500: y[9*(*idx)+3] += alpha4*(*v);
1501: y[9*(*idx)+4] += alpha5*(*v);
1502: y[9*(*idx)+5] += alpha6*(*v);
1503: y[9*(*idx)+6] += alpha7*(*v);
1504: y[9*(*idx)+7] += alpha8*(*v);
1505: y[9*(*idx)+8] += alpha9*(*v);
1506: idx++; v++;
1507: }
1508: }
1509: PetscLogFlops(18.0*a->nz);
1510: VecRestoreArrayRead(xx,&x);
1511: VecRestoreArray(yy,&y);
1512: return(0);
1513: }
1515: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1516: {
1517: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1518: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1519: const PetscScalar *x,*v;
1520: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521: PetscErrorCode ierr;
1522: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1523: PetscInt n,i,jrow,j;
1526: if (yy != zz) {VecCopy(yy,zz);}
1527: VecGetArrayRead(xx,&x);
1528: VecGetArray(zz,&y);
1529: idx = a->j;
1530: v = a->a;
1531: ii = a->i;
1533: for (i=0; i<m; i++) {
1534: jrow = ii[i];
1535: n = ii[i+1] - jrow;
1536: sum1 = 0.0;
1537: sum2 = 0.0;
1538: sum3 = 0.0;
1539: sum4 = 0.0;
1540: sum5 = 0.0;
1541: sum6 = 0.0;
1542: sum7 = 0.0;
1543: sum8 = 0.0;
1544: sum9 = 0.0;
1545: for (j=0; j<n; j++) {
1546: sum1 += v[jrow]*x[9*idx[jrow]];
1547: sum2 += v[jrow]*x[9*idx[jrow]+1];
1548: sum3 += v[jrow]*x[9*idx[jrow]+2];
1549: sum4 += v[jrow]*x[9*idx[jrow]+3];
1550: sum5 += v[jrow]*x[9*idx[jrow]+4];
1551: sum6 += v[jrow]*x[9*idx[jrow]+5];
1552: sum7 += v[jrow]*x[9*idx[jrow]+6];
1553: sum8 += v[jrow]*x[9*idx[jrow]+7];
1554: sum9 += v[jrow]*x[9*idx[jrow]+8];
1555: jrow++;
1556: }
1557: y[9*i] += sum1;
1558: y[9*i+1] += sum2;
1559: y[9*i+2] += sum3;
1560: y[9*i+3] += sum4;
1561: y[9*i+4] += sum5;
1562: y[9*i+5] += sum6;
1563: y[9*i+6] += sum7;
1564: y[9*i+7] += sum8;
1565: y[9*i+8] += sum9;
1566: }
1568: PetscLogFlops(18.0*a->nz);
1569: VecRestoreArrayRead(xx,&x);
1570: VecRestoreArray(zz,&y);
1571: return(0);
1572: }
1574: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1575: {
1576: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1577: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1578: const PetscScalar *x,*v;
1579: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580: PetscErrorCode ierr;
1581: const PetscInt m = b->AIJ->rmap->n,*idx;
1582: PetscInt n,i;
1585: if (yy != zz) {VecCopy(yy,zz);}
1586: VecGetArrayRead(xx,&x);
1587: VecGetArray(zz,&y);
1588: for (i=0; i<m; i++) {
1589: idx = a->j + a->i[i];
1590: v = a->a + a->i[i];
1591: n = a->i[i+1] - a->i[i];
1592: alpha1 = x[9*i];
1593: alpha2 = x[9*i+1];
1594: alpha3 = x[9*i+2];
1595: alpha4 = x[9*i+3];
1596: alpha5 = x[9*i+4];
1597: alpha6 = x[9*i+5];
1598: alpha7 = x[9*i+6];
1599: alpha8 = x[9*i+7];
1600: alpha9 = x[9*i+8];
1601: while (n-->0) {
1602: y[9*(*idx)] += alpha1*(*v);
1603: y[9*(*idx)+1] += alpha2*(*v);
1604: y[9*(*idx)+2] += alpha3*(*v);
1605: y[9*(*idx)+3] += alpha4*(*v);
1606: y[9*(*idx)+4] += alpha5*(*v);
1607: y[9*(*idx)+5] += alpha6*(*v);
1608: y[9*(*idx)+6] += alpha7*(*v);
1609: y[9*(*idx)+7] += alpha8*(*v);
1610: y[9*(*idx)+8] += alpha9*(*v);
1611: idx++; v++;
1612: }
1613: }
1614: PetscLogFlops(18.0*a->nz);
1615: VecRestoreArrayRead(xx,&x);
1616: VecRestoreArray(zz,&y);
1617: return(0);
1618: }
1619: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620: {
1621: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1622: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1623: const PetscScalar *x,*v;
1624: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625: PetscErrorCode ierr;
1626: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1627: PetscInt nonzerorow=0,n,i,jrow,j;
1630: VecGetArrayRead(xx,&x);
1631: VecGetArray(yy,&y);
1632: idx = a->j;
1633: v = a->a;
1634: ii = a->i;
1636: for (i=0; i<m; i++) {
1637: jrow = ii[i];
1638: n = ii[i+1] - jrow;
1639: sum1 = 0.0;
1640: sum2 = 0.0;
1641: sum3 = 0.0;
1642: sum4 = 0.0;
1643: sum5 = 0.0;
1644: sum6 = 0.0;
1645: sum7 = 0.0;
1646: sum8 = 0.0;
1647: sum9 = 0.0;
1648: sum10 = 0.0;
1650: nonzerorow += (n>0);
1651: for (j=0; j<n; j++) {
1652: sum1 += v[jrow]*x[10*idx[jrow]];
1653: sum2 += v[jrow]*x[10*idx[jrow]+1];
1654: sum3 += v[jrow]*x[10*idx[jrow]+2];
1655: sum4 += v[jrow]*x[10*idx[jrow]+3];
1656: sum5 += v[jrow]*x[10*idx[jrow]+4];
1657: sum6 += v[jrow]*x[10*idx[jrow]+5];
1658: sum7 += v[jrow]*x[10*idx[jrow]+6];
1659: sum8 += v[jrow]*x[10*idx[jrow]+7];
1660: sum9 += v[jrow]*x[10*idx[jrow]+8];
1661: sum10 += v[jrow]*x[10*idx[jrow]+9];
1662: jrow++;
1663: }
1664: y[10*i] = sum1;
1665: y[10*i+1] = sum2;
1666: y[10*i+2] = sum3;
1667: y[10*i+3] = sum4;
1668: y[10*i+4] = sum5;
1669: y[10*i+5] = sum6;
1670: y[10*i+6] = sum7;
1671: y[10*i+7] = sum8;
1672: y[10*i+8] = sum9;
1673: y[10*i+9] = sum10;
1674: }
1676: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1677: VecRestoreArrayRead(xx,&x);
1678: VecRestoreArray(yy,&y);
1679: return(0);
1680: }
1682: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683: {
1684: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1685: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1686: const PetscScalar *x,*v;
1687: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688: PetscErrorCode ierr;
1689: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1690: PetscInt n,i,jrow,j;
1693: if (yy != zz) {VecCopy(yy,zz);}
1694: VecGetArrayRead(xx,&x);
1695: VecGetArray(zz,&y);
1696: idx = a->j;
1697: v = a->a;
1698: ii = a->i;
1700: for (i=0; i<m; i++) {
1701: jrow = ii[i];
1702: n = ii[i+1] - jrow;
1703: sum1 = 0.0;
1704: sum2 = 0.0;
1705: sum3 = 0.0;
1706: sum4 = 0.0;
1707: sum5 = 0.0;
1708: sum6 = 0.0;
1709: sum7 = 0.0;
1710: sum8 = 0.0;
1711: sum9 = 0.0;
1712: sum10 = 0.0;
1713: for (j=0; j<n; j++) {
1714: sum1 += v[jrow]*x[10*idx[jrow]];
1715: sum2 += v[jrow]*x[10*idx[jrow]+1];
1716: sum3 += v[jrow]*x[10*idx[jrow]+2];
1717: sum4 += v[jrow]*x[10*idx[jrow]+3];
1718: sum5 += v[jrow]*x[10*idx[jrow]+4];
1719: sum6 += v[jrow]*x[10*idx[jrow]+5];
1720: sum7 += v[jrow]*x[10*idx[jrow]+6];
1721: sum8 += v[jrow]*x[10*idx[jrow]+7];
1722: sum9 += v[jrow]*x[10*idx[jrow]+8];
1723: sum10 += v[jrow]*x[10*idx[jrow]+9];
1724: jrow++;
1725: }
1726: y[10*i] += sum1;
1727: y[10*i+1] += sum2;
1728: y[10*i+2] += sum3;
1729: y[10*i+3] += sum4;
1730: y[10*i+4] += sum5;
1731: y[10*i+5] += sum6;
1732: y[10*i+6] += sum7;
1733: y[10*i+7] += sum8;
1734: y[10*i+8] += sum9;
1735: y[10*i+9] += sum10;
1736: }
1738: PetscLogFlops(20.0*a->nz);
1739: VecRestoreArrayRead(xx,&x);
1740: VecRestoreArray(yy,&y);
1741: return(0);
1742: }
1744: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745: {
1746: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1747: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1748: const PetscScalar *x,*v;
1749: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750: PetscErrorCode ierr;
1751: const PetscInt m = b->AIJ->rmap->n,*idx;
1752: PetscInt n,i;
1755: VecSet(yy,0.0);
1756: VecGetArrayRead(xx,&x);
1757: VecGetArray(yy,&y);
1759: for (i=0; i<m; i++) {
1760: idx = a->j + a->i[i];
1761: v = a->a + a->i[i];
1762: n = a->i[i+1] - a->i[i];
1763: alpha1 = x[10*i];
1764: alpha2 = x[10*i+1];
1765: alpha3 = x[10*i+2];
1766: alpha4 = x[10*i+3];
1767: alpha5 = x[10*i+4];
1768: alpha6 = x[10*i+5];
1769: alpha7 = x[10*i+6];
1770: alpha8 = x[10*i+7];
1771: alpha9 = x[10*i+8];
1772: alpha10 = x[10*i+9];
1773: while (n-->0) {
1774: y[10*(*idx)] += alpha1*(*v);
1775: y[10*(*idx)+1] += alpha2*(*v);
1776: y[10*(*idx)+2] += alpha3*(*v);
1777: y[10*(*idx)+3] += alpha4*(*v);
1778: y[10*(*idx)+4] += alpha5*(*v);
1779: y[10*(*idx)+5] += alpha6*(*v);
1780: y[10*(*idx)+6] += alpha7*(*v);
1781: y[10*(*idx)+7] += alpha8*(*v);
1782: y[10*(*idx)+8] += alpha9*(*v);
1783: y[10*(*idx)+9] += alpha10*(*v);
1784: idx++; v++;
1785: }
1786: }
1787: PetscLogFlops(20.0*a->nz);
1788: VecRestoreArrayRead(xx,&x);
1789: VecRestoreArray(yy,&y);
1790: return(0);
1791: }
1793: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794: {
1795: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1796: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1797: const PetscScalar *x,*v;
1798: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799: PetscErrorCode ierr;
1800: const PetscInt m = b->AIJ->rmap->n,*idx;
1801: PetscInt n,i;
1804: if (yy != zz) {VecCopy(yy,zz);}
1805: VecGetArrayRead(xx,&x);
1806: VecGetArray(zz,&y);
1807: for (i=0; i<m; i++) {
1808: idx = a->j + a->i[i];
1809: v = a->a + a->i[i];
1810: n = a->i[i+1] - a->i[i];
1811: alpha1 = x[10*i];
1812: alpha2 = x[10*i+1];
1813: alpha3 = x[10*i+2];
1814: alpha4 = x[10*i+3];
1815: alpha5 = x[10*i+4];
1816: alpha6 = x[10*i+5];
1817: alpha7 = x[10*i+6];
1818: alpha8 = x[10*i+7];
1819: alpha9 = x[10*i+8];
1820: alpha10 = x[10*i+9];
1821: while (n-->0) {
1822: y[10*(*idx)] += alpha1*(*v);
1823: y[10*(*idx)+1] += alpha2*(*v);
1824: y[10*(*idx)+2] += alpha3*(*v);
1825: y[10*(*idx)+3] += alpha4*(*v);
1826: y[10*(*idx)+4] += alpha5*(*v);
1827: y[10*(*idx)+5] += alpha6*(*v);
1828: y[10*(*idx)+6] += alpha7*(*v);
1829: y[10*(*idx)+7] += alpha8*(*v);
1830: y[10*(*idx)+8] += alpha9*(*v);
1831: y[10*(*idx)+9] += alpha10*(*v);
1832: idx++; v++;
1833: }
1834: }
1835: PetscLogFlops(20.0*a->nz);
1836: VecRestoreArrayRead(xx,&x);
1837: VecRestoreArray(zz,&y);
1838: return(0);
1839: }
1842: /*--------------------------------------------------------------------------------------------*/
1843: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1844: {
1845: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1846: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1847: const PetscScalar *x,*v;
1848: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1849: PetscErrorCode ierr;
1850: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1851: PetscInt nonzerorow=0,n,i,jrow,j;
1854: VecGetArrayRead(xx,&x);
1855: VecGetArray(yy,&y);
1856: idx = a->j;
1857: v = a->a;
1858: ii = a->i;
1860: for (i=0; i<m; i++) {
1861: jrow = ii[i];
1862: n = ii[i+1] - jrow;
1863: sum1 = 0.0;
1864: sum2 = 0.0;
1865: sum3 = 0.0;
1866: sum4 = 0.0;
1867: sum5 = 0.0;
1868: sum6 = 0.0;
1869: sum7 = 0.0;
1870: sum8 = 0.0;
1871: sum9 = 0.0;
1872: sum10 = 0.0;
1873: sum11 = 0.0;
1875: nonzerorow += (n>0);
1876: for (j=0; j<n; j++) {
1877: sum1 += v[jrow]*x[11*idx[jrow]];
1878: sum2 += v[jrow]*x[11*idx[jrow]+1];
1879: sum3 += v[jrow]*x[11*idx[jrow]+2];
1880: sum4 += v[jrow]*x[11*idx[jrow]+3];
1881: sum5 += v[jrow]*x[11*idx[jrow]+4];
1882: sum6 += v[jrow]*x[11*idx[jrow]+5];
1883: sum7 += v[jrow]*x[11*idx[jrow]+6];
1884: sum8 += v[jrow]*x[11*idx[jrow]+7];
1885: sum9 += v[jrow]*x[11*idx[jrow]+8];
1886: sum10 += v[jrow]*x[11*idx[jrow]+9];
1887: sum11 += v[jrow]*x[11*idx[jrow]+10];
1888: jrow++;
1889: }
1890: y[11*i] = sum1;
1891: y[11*i+1] = sum2;
1892: y[11*i+2] = sum3;
1893: y[11*i+3] = sum4;
1894: y[11*i+4] = sum5;
1895: y[11*i+5] = sum6;
1896: y[11*i+6] = sum7;
1897: y[11*i+7] = sum8;
1898: y[11*i+8] = sum9;
1899: y[11*i+9] = sum10;
1900: y[11*i+10] = sum11;
1901: }
1903: PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1904: VecRestoreArrayRead(xx,&x);
1905: VecRestoreArray(yy,&y);
1906: return(0);
1907: }
1909: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1910: {
1911: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1912: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1913: const PetscScalar *x,*v;
1914: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1915: PetscErrorCode ierr;
1916: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1917: PetscInt n,i,jrow,j;
1920: if (yy != zz) {VecCopy(yy,zz);}
1921: VecGetArrayRead(xx,&x);
1922: VecGetArray(zz,&y);
1923: idx = a->j;
1924: v = a->a;
1925: ii = a->i;
1927: for (i=0; i<m; i++) {
1928: jrow = ii[i];
1929: n = ii[i+1] - jrow;
1930: sum1 = 0.0;
1931: sum2 = 0.0;
1932: sum3 = 0.0;
1933: sum4 = 0.0;
1934: sum5 = 0.0;
1935: sum6 = 0.0;
1936: sum7 = 0.0;
1937: sum8 = 0.0;
1938: sum9 = 0.0;
1939: sum10 = 0.0;
1940: sum11 = 0.0;
1941: for (j=0; j<n; j++) {
1942: sum1 += v[jrow]*x[11*idx[jrow]];
1943: sum2 += v[jrow]*x[11*idx[jrow]+1];
1944: sum3 += v[jrow]*x[11*idx[jrow]+2];
1945: sum4 += v[jrow]*x[11*idx[jrow]+3];
1946: sum5 += v[jrow]*x[11*idx[jrow]+4];
1947: sum6 += v[jrow]*x[11*idx[jrow]+5];
1948: sum7 += v[jrow]*x[11*idx[jrow]+6];
1949: sum8 += v[jrow]*x[11*idx[jrow]+7];
1950: sum9 += v[jrow]*x[11*idx[jrow]+8];
1951: sum10 += v[jrow]*x[11*idx[jrow]+9];
1952: sum11 += v[jrow]*x[11*idx[jrow]+10];
1953: jrow++;
1954: }
1955: y[11*i] += sum1;
1956: y[11*i+1] += sum2;
1957: y[11*i+2] += sum3;
1958: y[11*i+3] += sum4;
1959: y[11*i+4] += sum5;
1960: y[11*i+5] += sum6;
1961: y[11*i+6] += sum7;
1962: y[11*i+7] += sum8;
1963: y[11*i+8] += sum9;
1964: y[11*i+9] += sum10;
1965: y[11*i+10] += sum11;
1966: }
1968: PetscLogFlops(22.0*a->nz);
1969: VecRestoreArrayRead(xx,&x);
1970: VecRestoreArray(yy,&y);
1971: return(0);
1972: }
1974: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1975: {
1976: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1977: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1978: const PetscScalar *x,*v;
1979: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1980: PetscErrorCode ierr;
1981: const PetscInt m = b->AIJ->rmap->n,*idx;
1982: PetscInt n,i;
1985: VecSet(yy,0.0);
1986: VecGetArrayRead(xx,&x);
1987: VecGetArray(yy,&y);
1989: for (i=0; i<m; i++) {
1990: idx = a->j + a->i[i];
1991: v = a->a + a->i[i];
1992: n = a->i[i+1] - a->i[i];
1993: alpha1 = x[11*i];
1994: alpha2 = x[11*i+1];
1995: alpha3 = x[11*i+2];
1996: alpha4 = x[11*i+3];
1997: alpha5 = x[11*i+4];
1998: alpha6 = x[11*i+5];
1999: alpha7 = x[11*i+6];
2000: alpha8 = x[11*i+7];
2001: alpha9 = x[11*i+8];
2002: alpha10 = x[11*i+9];
2003: alpha11 = x[11*i+10];
2004: while (n-->0) {
2005: y[11*(*idx)] += alpha1*(*v);
2006: y[11*(*idx)+1] += alpha2*(*v);
2007: y[11*(*idx)+2] += alpha3*(*v);
2008: y[11*(*idx)+3] += alpha4*(*v);
2009: y[11*(*idx)+4] += alpha5*(*v);
2010: y[11*(*idx)+5] += alpha6*(*v);
2011: y[11*(*idx)+6] += alpha7*(*v);
2012: y[11*(*idx)+7] += alpha8*(*v);
2013: y[11*(*idx)+8] += alpha9*(*v);
2014: y[11*(*idx)+9] += alpha10*(*v);
2015: y[11*(*idx)+10] += alpha11*(*v);
2016: idx++; v++;
2017: }
2018: }
2019: PetscLogFlops(22.0*a->nz);
2020: VecRestoreArrayRead(xx,&x);
2021: VecRestoreArray(yy,&y);
2022: return(0);
2023: }
2025: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2026: {
2027: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2028: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2029: const PetscScalar *x,*v;
2030: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2031: PetscErrorCode ierr;
2032: const PetscInt m = b->AIJ->rmap->n,*idx;
2033: PetscInt n,i;
2036: if (yy != zz) {VecCopy(yy,zz);}
2037: VecGetArrayRead(xx,&x);
2038: VecGetArray(zz,&y);
2039: for (i=0; i<m; i++) {
2040: idx = a->j + a->i[i];
2041: v = a->a + a->i[i];
2042: n = a->i[i+1] - a->i[i];
2043: alpha1 = x[11*i];
2044: alpha2 = x[11*i+1];
2045: alpha3 = x[11*i+2];
2046: alpha4 = x[11*i+3];
2047: alpha5 = x[11*i+4];
2048: alpha6 = x[11*i+5];
2049: alpha7 = x[11*i+6];
2050: alpha8 = x[11*i+7];
2051: alpha9 = x[11*i+8];
2052: alpha10 = x[11*i+9];
2053: alpha11 = x[11*i+10];
2054: while (n-->0) {
2055: y[11*(*idx)] += alpha1*(*v);
2056: y[11*(*idx)+1] += alpha2*(*v);
2057: y[11*(*idx)+2] += alpha3*(*v);
2058: y[11*(*idx)+3] += alpha4*(*v);
2059: y[11*(*idx)+4] += alpha5*(*v);
2060: y[11*(*idx)+5] += alpha6*(*v);
2061: y[11*(*idx)+6] += alpha7*(*v);
2062: y[11*(*idx)+7] += alpha8*(*v);
2063: y[11*(*idx)+8] += alpha9*(*v);
2064: y[11*(*idx)+9] += alpha10*(*v);
2065: y[11*(*idx)+10] += alpha11*(*v);
2066: idx++; v++;
2067: }
2068: }
2069: PetscLogFlops(22.0*a->nz);
2070: VecRestoreArrayRead(xx,&x);
2071: VecRestoreArray(zz,&y);
2072: return(0);
2073: }
2076: /*--------------------------------------------------------------------------------------------*/
2077: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2078: {
2079: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2080: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2081: const PetscScalar *x,*v;
2082: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2083: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2084: PetscErrorCode ierr;
2085: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2086: PetscInt nonzerorow=0,n,i,jrow,j;
2089: VecGetArrayRead(xx,&x);
2090: VecGetArray(yy,&y);
2091: idx = a->j;
2092: v = a->a;
2093: ii = a->i;
2095: for (i=0; i<m; i++) {
2096: jrow = ii[i];
2097: n = ii[i+1] - jrow;
2098: sum1 = 0.0;
2099: sum2 = 0.0;
2100: sum3 = 0.0;
2101: sum4 = 0.0;
2102: sum5 = 0.0;
2103: sum6 = 0.0;
2104: sum7 = 0.0;
2105: sum8 = 0.0;
2106: sum9 = 0.0;
2107: sum10 = 0.0;
2108: sum11 = 0.0;
2109: sum12 = 0.0;
2110: sum13 = 0.0;
2111: sum14 = 0.0;
2112: sum15 = 0.0;
2113: sum16 = 0.0;
2115: nonzerorow += (n>0);
2116: for (j=0; j<n; j++) {
2117: sum1 += v[jrow]*x[16*idx[jrow]];
2118: sum2 += v[jrow]*x[16*idx[jrow]+1];
2119: sum3 += v[jrow]*x[16*idx[jrow]+2];
2120: sum4 += v[jrow]*x[16*idx[jrow]+3];
2121: sum5 += v[jrow]*x[16*idx[jrow]+4];
2122: sum6 += v[jrow]*x[16*idx[jrow]+5];
2123: sum7 += v[jrow]*x[16*idx[jrow]+6];
2124: sum8 += v[jrow]*x[16*idx[jrow]+7];
2125: sum9 += v[jrow]*x[16*idx[jrow]+8];
2126: sum10 += v[jrow]*x[16*idx[jrow]+9];
2127: sum11 += v[jrow]*x[16*idx[jrow]+10];
2128: sum12 += v[jrow]*x[16*idx[jrow]+11];
2129: sum13 += v[jrow]*x[16*idx[jrow]+12];
2130: sum14 += v[jrow]*x[16*idx[jrow]+13];
2131: sum15 += v[jrow]*x[16*idx[jrow]+14];
2132: sum16 += v[jrow]*x[16*idx[jrow]+15];
2133: jrow++;
2134: }
2135: y[16*i] = sum1;
2136: y[16*i+1] = sum2;
2137: y[16*i+2] = sum3;
2138: y[16*i+3] = sum4;
2139: y[16*i+4] = sum5;
2140: y[16*i+5] = sum6;
2141: y[16*i+6] = sum7;
2142: y[16*i+7] = sum8;
2143: y[16*i+8] = sum9;
2144: y[16*i+9] = sum10;
2145: y[16*i+10] = sum11;
2146: y[16*i+11] = sum12;
2147: y[16*i+12] = sum13;
2148: y[16*i+13] = sum14;
2149: y[16*i+14] = sum15;
2150: y[16*i+15] = sum16;
2151: }
2153: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2154: VecRestoreArrayRead(xx,&x);
2155: VecRestoreArray(yy,&y);
2156: return(0);
2157: }
2159: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2160: {
2161: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2162: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2163: const PetscScalar *x,*v;
2164: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2165: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2166: PetscErrorCode ierr;
2167: const PetscInt m = b->AIJ->rmap->n,*idx;
2168: PetscInt n,i;
2171: VecSet(yy,0.0);
2172: VecGetArrayRead(xx,&x);
2173: VecGetArray(yy,&y);
2175: for (i=0; i<m; i++) {
2176: idx = a->j + a->i[i];
2177: v = a->a + a->i[i];
2178: n = a->i[i+1] - a->i[i];
2179: alpha1 = x[16*i];
2180: alpha2 = x[16*i+1];
2181: alpha3 = x[16*i+2];
2182: alpha4 = x[16*i+3];
2183: alpha5 = x[16*i+4];
2184: alpha6 = x[16*i+5];
2185: alpha7 = x[16*i+6];
2186: alpha8 = x[16*i+7];
2187: alpha9 = x[16*i+8];
2188: alpha10 = x[16*i+9];
2189: alpha11 = x[16*i+10];
2190: alpha12 = x[16*i+11];
2191: alpha13 = x[16*i+12];
2192: alpha14 = x[16*i+13];
2193: alpha15 = x[16*i+14];
2194: alpha16 = x[16*i+15];
2195: while (n-->0) {
2196: y[16*(*idx)] += alpha1*(*v);
2197: y[16*(*idx)+1] += alpha2*(*v);
2198: y[16*(*idx)+2] += alpha3*(*v);
2199: y[16*(*idx)+3] += alpha4*(*v);
2200: y[16*(*idx)+4] += alpha5*(*v);
2201: y[16*(*idx)+5] += alpha6*(*v);
2202: y[16*(*idx)+6] += alpha7*(*v);
2203: y[16*(*idx)+7] += alpha8*(*v);
2204: y[16*(*idx)+8] += alpha9*(*v);
2205: y[16*(*idx)+9] += alpha10*(*v);
2206: y[16*(*idx)+10] += alpha11*(*v);
2207: y[16*(*idx)+11] += alpha12*(*v);
2208: y[16*(*idx)+12] += alpha13*(*v);
2209: y[16*(*idx)+13] += alpha14*(*v);
2210: y[16*(*idx)+14] += alpha15*(*v);
2211: y[16*(*idx)+15] += alpha16*(*v);
2212: idx++; v++;
2213: }
2214: }
2215: PetscLogFlops(32.0*a->nz);
2216: VecRestoreArrayRead(xx,&x);
2217: VecRestoreArray(yy,&y);
2218: return(0);
2219: }
2221: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2222: {
2223: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2224: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2225: const PetscScalar *x,*v;
2226: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2227: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2228: PetscErrorCode ierr;
2229: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2230: PetscInt n,i,jrow,j;
2233: if (yy != zz) {VecCopy(yy,zz);}
2234: VecGetArrayRead(xx,&x);
2235: VecGetArray(zz,&y);
2236: idx = a->j;
2237: v = a->a;
2238: ii = a->i;
2240: for (i=0; i<m; i++) {
2241: jrow = ii[i];
2242: n = ii[i+1] - jrow;
2243: sum1 = 0.0;
2244: sum2 = 0.0;
2245: sum3 = 0.0;
2246: sum4 = 0.0;
2247: sum5 = 0.0;
2248: sum6 = 0.0;
2249: sum7 = 0.0;
2250: sum8 = 0.0;
2251: sum9 = 0.0;
2252: sum10 = 0.0;
2253: sum11 = 0.0;
2254: sum12 = 0.0;
2255: sum13 = 0.0;
2256: sum14 = 0.0;
2257: sum15 = 0.0;
2258: sum16 = 0.0;
2259: for (j=0; j<n; j++) {
2260: sum1 += v[jrow]*x[16*idx[jrow]];
2261: sum2 += v[jrow]*x[16*idx[jrow]+1];
2262: sum3 += v[jrow]*x[16*idx[jrow]+2];
2263: sum4 += v[jrow]*x[16*idx[jrow]+3];
2264: sum5 += v[jrow]*x[16*idx[jrow]+4];
2265: sum6 += v[jrow]*x[16*idx[jrow]+5];
2266: sum7 += v[jrow]*x[16*idx[jrow]+6];
2267: sum8 += v[jrow]*x[16*idx[jrow]+7];
2268: sum9 += v[jrow]*x[16*idx[jrow]+8];
2269: sum10 += v[jrow]*x[16*idx[jrow]+9];
2270: sum11 += v[jrow]*x[16*idx[jrow]+10];
2271: sum12 += v[jrow]*x[16*idx[jrow]+11];
2272: sum13 += v[jrow]*x[16*idx[jrow]+12];
2273: sum14 += v[jrow]*x[16*idx[jrow]+13];
2274: sum15 += v[jrow]*x[16*idx[jrow]+14];
2275: sum16 += v[jrow]*x[16*idx[jrow]+15];
2276: jrow++;
2277: }
2278: y[16*i] += sum1;
2279: y[16*i+1] += sum2;
2280: y[16*i+2] += sum3;
2281: y[16*i+3] += sum4;
2282: y[16*i+4] += sum5;
2283: y[16*i+5] += sum6;
2284: y[16*i+6] += sum7;
2285: y[16*i+7] += sum8;
2286: y[16*i+8] += sum9;
2287: y[16*i+9] += sum10;
2288: y[16*i+10] += sum11;
2289: y[16*i+11] += sum12;
2290: y[16*i+12] += sum13;
2291: y[16*i+13] += sum14;
2292: y[16*i+14] += sum15;
2293: y[16*i+15] += sum16;
2294: }
2296: PetscLogFlops(32.0*a->nz);
2297: VecRestoreArrayRead(xx,&x);
2298: VecRestoreArray(zz,&y);
2299: return(0);
2300: }
2302: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2303: {
2304: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2305: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2306: const PetscScalar *x,*v;
2307: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2308: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2309: PetscErrorCode ierr;
2310: const PetscInt m = b->AIJ->rmap->n,*idx;
2311: PetscInt n,i;
2314: if (yy != zz) {VecCopy(yy,zz);}
2315: VecGetArrayRead(xx,&x);
2316: VecGetArray(zz,&y);
2317: for (i=0; i<m; i++) {
2318: idx = a->j + a->i[i];
2319: v = a->a + a->i[i];
2320: n = a->i[i+1] - a->i[i];
2321: alpha1 = x[16*i];
2322: alpha2 = x[16*i+1];
2323: alpha3 = x[16*i+2];
2324: alpha4 = x[16*i+3];
2325: alpha5 = x[16*i+4];
2326: alpha6 = x[16*i+5];
2327: alpha7 = x[16*i+6];
2328: alpha8 = x[16*i+7];
2329: alpha9 = x[16*i+8];
2330: alpha10 = x[16*i+9];
2331: alpha11 = x[16*i+10];
2332: alpha12 = x[16*i+11];
2333: alpha13 = x[16*i+12];
2334: alpha14 = x[16*i+13];
2335: alpha15 = x[16*i+14];
2336: alpha16 = x[16*i+15];
2337: while (n-->0) {
2338: y[16*(*idx)] += alpha1*(*v);
2339: y[16*(*idx)+1] += alpha2*(*v);
2340: y[16*(*idx)+2] += alpha3*(*v);
2341: y[16*(*idx)+3] += alpha4*(*v);
2342: y[16*(*idx)+4] += alpha5*(*v);
2343: y[16*(*idx)+5] += alpha6*(*v);
2344: y[16*(*idx)+6] += alpha7*(*v);
2345: y[16*(*idx)+7] += alpha8*(*v);
2346: y[16*(*idx)+8] += alpha9*(*v);
2347: y[16*(*idx)+9] += alpha10*(*v);
2348: y[16*(*idx)+10] += alpha11*(*v);
2349: y[16*(*idx)+11] += alpha12*(*v);
2350: y[16*(*idx)+12] += alpha13*(*v);
2351: y[16*(*idx)+13] += alpha14*(*v);
2352: y[16*(*idx)+14] += alpha15*(*v);
2353: y[16*(*idx)+15] += alpha16*(*v);
2354: idx++; v++;
2355: }
2356: }
2357: PetscLogFlops(32.0*a->nz);
2358: VecRestoreArrayRead(xx,&x);
2359: VecRestoreArray(zz,&y);
2360: return(0);
2361: }
2363: /*--------------------------------------------------------------------------------------------*/
2364: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2365: {
2366: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2367: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2368: const PetscScalar *x,*v;
2369: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2370: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2371: PetscErrorCode ierr;
2372: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2373: PetscInt nonzerorow=0,n,i,jrow,j;
2376: VecGetArrayRead(xx,&x);
2377: VecGetArray(yy,&y);
2378: idx = a->j;
2379: v = a->a;
2380: ii = a->i;
2382: for (i=0; i<m; i++) {
2383: jrow = ii[i];
2384: n = ii[i+1] - jrow;
2385: sum1 = 0.0;
2386: sum2 = 0.0;
2387: sum3 = 0.0;
2388: sum4 = 0.0;
2389: sum5 = 0.0;
2390: sum6 = 0.0;
2391: sum7 = 0.0;
2392: sum8 = 0.0;
2393: sum9 = 0.0;
2394: sum10 = 0.0;
2395: sum11 = 0.0;
2396: sum12 = 0.0;
2397: sum13 = 0.0;
2398: sum14 = 0.0;
2399: sum15 = 0.0;
2400: sum16 = 0.0;
2401: sum17 = 0.0;
2402: sum18 = 0.0;
2404: nonzerorow += (n>0);
2405: for (j=0; j<n; j++) {
2406: sum1 += v[jrow]*x[18*idx[jrow]];
2407: sum2 += v[jrow]*x[18*idx[jrow]+1];
2408: sum3 += v[jrow]*x[18*idx[jrow]+2];
2409: sum4 += v[jrow]*x[18*idx[jrow]+3];
2410: sum5 += v[jrow]*x[18*idx[jrow]+4];
2411: sum6 += v[jrow]*x[18*idx[jrow]+5];
2412: sum7 += v[jrow]*x[18*idx[jrow]+6];
2413: sum8 += v[jrow]*x[18*idx[jrow]+7];
2414: sum9 += v[jrow]*x[18*idx[jrow]+8];
2415: sum10 += v[jrow]*x[18*idx[jrow]+9];
2416: sum11 += v[jrow]*x[18*idx[jrow]+10];
2417: sum12 += v[jrow]*x[18*idx[jrow]+11];
2418: sum13 += v[jrow]*x[18*idx[jrow]+12];
2419: sum14 += v[jrow]*x[18*idx[jrow]+13];
2420: sum15 += v[jrow]*x[18*idx[jrow]+14];
2421: sum16 += v[jrow]*x[18*idx[jrow]+15];
2422: sum17 += v[jrow]*x[18*idx[jrow]+16];
2423: sum18 += v[jrow]*x[18*idx[jrow]+17];
2424: jrow++;
2425: }
2426: y[18*i] = sum1;
2427: y[18*i+1] = sum2;
2428: y[18*i+2] = sum3;
2429: y[18*i+3] = sum4;
2430: y[18*i+4] = sum5;
2431: y[18*i+5] = sum6;
2432: y[18*i+6] = sum7;
2433: y[18*i+7] = sum8;
2434: y[18*i+8] = sum9;
2435: y[18*i+9] = sum10;
2436: y[18*i+10] = sum11;
2437: y[18*i+11] = sum12;
2438: y[18*i+12] = sum13;
2439: y[18*i+13] = sum14;
2440: y[18*i+14] = sum15;
2441: y[18*i+15] = sum16;
2442: y[18*i+16] = sum17;
2443: y[18*i+17] = sum18;
2444: }
2446: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2447: VecRestoreArrayRead(xx,&x);
2448: VecRestoreArray(yy,&y);
2449: return(0);
2450: }
2452: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2453: {
2454: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2455: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2456: const PetscScalar *x,*v;
2457: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2458: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2459: PetscErrorCode ierr;
2460: const PetscInt m = b->AIJ->rmap->n,*idx;
2461: PetscInt n,i;
2464: VecSet(yy,0.0);
2465: VecGetArrayRead(xx,&x);
2466: VecGetArray(yy,&y);
2468: for (i=0; i<m; i++) {
2469: idx = a->j + a->i[i];
2470: v = a->a + a->i[i];
2471: n = a->i[i+1] - a->i[i];
2472: alpha1 = x[18*i];
2473: alpha2 = x[18*i+1];
2474: alpha3 = x[18*i+2];
2475: alpha4 = x[18*i+3];
2476: alpha5 = x[18*i+4];
2477: alpha6 = x[18*i+5];
2478: alpha7 = x[18*i+6];
2479: alpha8 = x[18*i+7];
2480: alpha9 = x[18*i+8];
2481: alpha10 = x[18*i+9];
2482: alpha11 = x[18*i+10];
2483: alpha12 = x[18*i+11];
2484: alpha13 = x[18*i+12];
2485: alpha14 = x[18*i+13];
2486: alpha15 = x[18*i+14];
2487: alpha16 = x[18*i+15];
2488: alpha17 = x[18*i+16];
2489: alpha18 = x[18*i+17];
2490: while (n-->0) {
2491: y[18*(*idx)] += alpha1*(*v);
2492: y[18*(*idx)+1] += alpha2*(*v);
2493: y[18*(*idx)+2] += alpha3*(*v);
2494: y[18*(*idx)+3] += alpha4*(*v);
2495: y[18*(*idx)+4] += alpha5*(*v);
2496: y[18*(*idx)+5] += alpha6*(*v);
2497: y[18*(*idx)+6] += alpha7*(*v);
2498: y[18*(*idx)+7] += alpha8*(*v);
2499: y[18*(*idx)+8] += alpha9*(*v);
2500: y[18*(*idx)+9] += alpha10*(*v);
2501: y[18*(*idx)+10] += alpha11*(*v);
2502: y[18*(*idx)+11] += alpha12*(*v);
2503: y[18*(*idx)+12] += alpha13*(*v);
2504: y[18*(*idx)+13] += alpha14*(*v);
2505: y[18*(*idx)+14] += alpha15*(*v);
2506: y[18*(*idx)+15] += alpha16*(*v);
2507: y[18*(*idx)+16] += alpha17*(*v);
2508: y[18*(*idx)+17] += alpha18*(*v);
2509: idx++; v++;
2510: }
2511: }
2512: PetscLogFlops(36.0*a->nz);
2513: VecRestoreArrayRead(xx,&x);
2514: VecRestoreArray(yy,&y);
2515: return(0);
2516: }
2518: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2519: {
2520: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2521: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2522: const PetscScalar *x,*v;
2523: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2524: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2525: PetscErrorCode ierr;
2526: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2527: PetscInt n,i,jrow,j;
2530: if (yy != zz) {VecCopy(yy,zz);}
2531: VecGetArrayRead(xx,&x);
2532: VecGetArray(zz,&y);
2533: idx = a->j;
2534: v = a->a;
2535: ii = a->i;
2537: for (i=0; i<m; i++) {
2538: jrow = ii[i];
2539: n = ii[i+1] - jrow;
2540: sum1 = 0.0;
2541: sum2 = 0.0;
2542: sum3 = 0.0;
2543: sum4 = 0.0;
2544: sum5 = 0.0;
2545: sum6 = 0.0;
2546: sum7 = 0.0;
2547: sum8 = 0.0;
2548: sum9 = 0.0;
2549: sum10 = 0.0;
2550: sum11 = 0.0;
2551: sum12 = 0.0;
2552: sum13 = 0.0;
2553: sum14 = 0.0;
2554: sum15 = 0.0;
2555: sum16 = 0.0;
2556: sum17 = 0.0;
2557: sum18 = 0.0;
2558: for (j=0; j<n; j++) {
2559: sum1 += v[jrow]*x[18*idx[jrow]];
2560: sum2 += v[jrow]*x[18*idx[jrow]+1];
2561: sum3 += v[jrow]*x[18*idx[jrow]+2];
2562: sum4 += v[jrow]*x[18*idx[jrow]+3];
2563: sum5 += v[jrow]*x[18*idx[jrow]+4];
2564: sum6 += v[jrow]*x[18*idx[jrow]+5];
2565: sum7 += v[jrow]*x[18*idx[jrow]+6];
2566: sum8 += v[jrow]*x[18*idx[jrow]+7];
2567: sum9 += v[jrow]*x[18*idx[jrow]+8];
2568: sum10 += v[jrow]*x[18*idx[jrow]+9];
2569: sum11 += v[jrow]*x[18*idx[jrow]+10];
2570: sum12 += v[jrow]*x[18*idx[jrow]+11];
2571: sum13 += v[jrow]*x[18*idx[jrow]+12];
2572: sum14 += v[jrow]*x[18*idx[jrow]+13];
2573: sum15 += v[jrow]*x[18*idx[jrow]+14];
2574: sum16 += v[jrow]*x[18*idx[jrow]+15];
2575: sum17 += v[jrow]*x[18*idx[jrow]+16];
2576: sum18 += v[jrow]*x[18*idx[jrow]+17];
2577: jrow++;
2578: }
2579: y[18*i] += sum1;
2580: y[18*i+1] += sum2;
2581: y[18*i+2] += sum3;
2582: y[18*i+3] += sum4;
2583: y[18*i+4] += sum5;
2584: y[18*i+5] += sum6;
2585: y[18*i+6] += sum7;
2586: y[18*i+7] += sum8;
2587: y[18*i+8] += sum9;
2588: y[18*i+9] += sum10;
2589: y[18*i+10] += sum11;
2590: y[18*i+11] += sum12;
2591: y[18*i+12] += sum13;
2592: y[18*i+13] += sum14;
2593: y[18*i+14] += sum15;
2594: y[18*i+15] += sum16;
2595: y[18*i+16] += sum17;
2596: y[18*i+17] += sum18;
2597: }
2599: PetscLogFlops(36.0*a->nz);
2600: VecRestoreArrayRead(xx,&x);
2601: VecRestoreArray(zz,&y);
2602: return(0);
2603: }
2605: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2606: {
2607: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2608: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2609: const PetscScalar *x,*v;
2610: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2611: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2612: PetscErrorCode ierr;
2613: const PetscInt m = b->AIJ->rmap->n,*idx;
2614: PetscInt n,i;
2617: if (yy != zz) {VecCopy(yy,zz);}
2618: VecGetArrayRead(xx,&x);
2619: VecGetArray(zz,&y);
2620: for (i=0; i<m; i++) {
2621: idx = a->j + a->i[i];
2622: v = a->a + a->i[i];
2623: n = a->i[i+1] - a->i[i];
2624: alpha1 = x[18*i];
2625: alpha2 = x[18*i+1];
2626: alpha3 = x[18*i+2];
2627: alpha4 = x[18*i+3];
2628: alpha5 = x[18*i+4];
2629: alpha6 = x[18*i+5];
2630: alpha7 = x[18*i+6];
2631: alpha8 = x[18*i+7];
2632: alpha9 = x[18*i+8];
2633: alpha10 = x[18*i+9];
2634: alpha11 = x[18*i+10];
2635: alpha12 = x[18*i+11];
2636: alpha13 = x[18*i+12];
2637: alpha14 = x[18*i+13];
2638: alpha15 = x[18*i+14];
2639: alpha16 = x[18*i+15];
2640: alpha17 = x[18*i+16];
2641: alpha18 = x[18*i+17];
2642: while (n-->0) {
2643: y[18*(*idx)] += alpha1*(*v);
2644: y[18*(*idx)+1] += alpha2*(*v);
2645: y[18*(*idx)+2] += alpha3*(*v);
2646: y[18*(*idx)+3] += alpha4*(*v);
2647: y[18*(*idx)+4] += alpha5*(*v);
2648: y[18*(*idx)+5] += alpha6*(*v);
2649: y[18*(*idx)+6] += alpha7*(*v);
2650: y[18*(*idx)+7] += alpha8*(*v);
2651: y[18*(*idx)+8] += alpha9*(*v);
2652: y[18*(*idx)+9] += alpha10*(*v);
2653: y[18*(*idx)+10] += alpha11*(*v);
2654: y[18*(*idx)+11] += alpha12*(*v);
2655: y[18*(*idx)+12] += alpha13*(*v);
2656: y[18*(*idx)+13] += alpha14*(*v);
2657: y[18*(*idx)+14] += alpha15*(*v);
2658: y[18*(*idx)+15] += alpha16*(*v);
2659: y[18*(*idx)+16] += alpha17*(*v);
2660: y[18*(*idx)+17] += alpha18*(*v);
2661: idx++; v++;
2662: }
2663: }
2664: PetscLogFlops(36.0*a->nz);
2665: VecRestoreArrayRead(xx,&x);
2666: VecRestoreArray(zz,&y);
2667: return(0);
2668: }
2670: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2671: {
2672: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2673: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2674: const PetscScalar *x,*v;
2675: PetscScalar *y,*sums;
2676: PetscErrorCode ierr;
2677: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2678: PetscInt n,i,jrow,j,dof = b->dof,k;
2681: VecGetArrayRead(xx,&x);
2682: VecSet(yy,0.0);
2683: VecGetArray(yy,&y);
2684: idx = a->j;
2685: v = a->a;
2686: ii = a->i;
2688: for (i=0; i<m; i++) {
2689: jrow = ii[i];
2690: n = ii[i+1] - jrow;
2691: sums = y + dof*i;
2692: for (j=0; j<n; j++) {
2693: for (k=0; k<dof; k++) {
2694: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2695: }
2696: jrow++;
2697: }
2698: }
2700: PetscLogFlops(2.0*dof*a->nz);
2701: VecRestoreArrayRead(xx,&x);
2702: VecRestoreArray(yy,&y);
2703: return(0);
2704: }
2706: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2707: {
2708: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2709: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2710: const PetscScalar *x,*v;
2711: PetscScalar *y,*sums;
2712: PetscErrorCode ierr;
2713: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2714: PetscInt n,i,jrow,j,dof = b->dof,k;
2717: if (yy != zz) {VecCopy(yy,zz);}
2718: VecGetArrayRead(xx,&x);
2719: VecGetArray(zz,&y);
2720: idx = a->j;
2721: v = a->a;
2722: ii = a->i;
2724: for (i=0; i<m; i++) {
2725: jrow = ii[i];
2726: n = ii[i+1] - jrow;
2727: sums = y + dof*i;
2728: for (j=0; j<n; j++) {
2729: for (k=0; k<dof; k++) {
2730: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2731: }
2732: jrow++;
2733: }
2734: }
2736: PetscLogFlops(2.0*dof*a->nz);
2737: VecRestoreArrayRead(xx,&x);
2738: VecRestoreArray(zz,&y);
2739: return(0);
2740: }
2742: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2743: {
2744: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2745: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2746: const PetscScalar *x,*v,*alpha;
2747: PetscScalar *y;
2748: PetscErrorCode ierr;
2749: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2750: PetscInt n,i,k;
2753: VecGetArrayRead(xx,&x);
2754: VecSet(yy,0.0);
2755: VecGetArray(yy,&y);
2756: for (i=0; i<m; i++) {
2757: idx = a->j + a->i[i];
2758: v = a->a + a->i[i];
2759: n = a->i[i+1] - a->i[i];
2760: alpha = x + dof*i;
2761: while (n-->0) {
2762: for (k=0; k<dof; k++) {
2763: y[dof*(*idx)+k] += alpha[k]*(*v);
2764: }
2765: idx++; v++;
2766: }
2767: }
2768: PetscLogFlops(2.0*dof*a->nz);
2769: VecRestoreArrayRead(xx,&x);
2770: VecRestoreArray(yy,&y);
2771: return(0);
2772: }
2774: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2775: {
2776: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2777: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2778: const PetscScalar *x,*v,*alpha;
2779: PetscScalar *y;
2780: PetscErrorCode ierr;
2781: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2782: PetscInt n,i,k;
2785: if (yy != zz) {VecCopy(yy,zz);}
2786: VecGetArrayRead(xx,&x);
2787: VecGetArray(zz,&y);
2788: for (i=0; i<m; i++) {
2789: idx = a->j + a->i[i];
2790: v = a->a + a->i[i];
2791: n = a->i[i+1] - a->i[i];
2792: alpha = x + dof*i;
2793: while (n-->0) {
2794: for (k=0; k<dof; k++) {
2795: y[dof*(*idx)+k] += alpha[k]*(*v);
2796: }
2797: idx++; v++;
2798: }
2799: }
2800: PetscLogFlops(2.0*dof*a->nz);
2801: VecRestoreArrayRead(xx,&x);
2802: VecRestoreArray(zz,&y);
2803: return(0);
2804: }
2806: /*===================================================================================*/
2807: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2808: {
2809: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2813: /* start the scatter */
2814: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2815: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2816: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2817: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2818: return(0);
2819: }
2821: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2822: {
2823: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2827: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2828: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2829: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2830: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2831: return(0);
2832: }
2834: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2835: {
2836: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2840: /* start the scatter */
2841: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2842: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2843: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2844: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2845: return(0);
2846: }
2848: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2849: {
2850: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2854: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2855: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2856: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2857: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2858: return(0);
2859: }
2861: /* ----------------------------------------------------------------*/
2862: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2863: {
2864: Mat_Product *product = C->product;
2867: if (product->type == MATPRODUCT_PtAP) {
2868: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2869: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2870: return(0);
2871: }
2873: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2874: {
2876: Mat_Product *product = C->product;
2877: PetscBool flg = PETSC_FALSE;
2878: Mat A=product->A,P=product->B;
2879: PetscInt alg=1; /* set default algorithm */
2880: #if !defined(PETSC_HAVE_HYPRE)
2881: const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2882: PetscInt nalg=4;
2883: #else
2884: const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2885: PetscInt nalg=5;
2886: #endif
2889: if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2891: /* PtAP */
2892: /* Check matrix local sizes */
2893: if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2894: if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2896: /* Set the default algorithm */
2897: PetscStrcmp(C->product->alg,"default",&flg);
2898: if (flg) {
2899: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2900: }
2902: /* Get runtime option */
2903: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2904: PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2905: if (flg) {
2906: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2907: }
2908: PetscOptionsEnd();
2910: PetscStrcmp(C->product->alg,"allatonce",&flg);
2911: if (flg) {
2912: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2913: return(0);
2914: }
2916: PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2917: if (flg) {
2918: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2919: return(0);
2920: }
2922: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2923: PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2924: MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2925: MatProductSetFromOptions(C);
2926: return(0);
2927: }
2929: /* ----------------------------------------------------------------*/
2930: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2931: {
2932: PetscErrorCode ierr;
2933: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2934: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2935: Mat P =pp->AIJ;
2936: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2937: PetscInt *pti,*ptj,*ptJ;
2938: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2939: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2940: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2941: MatScalar *ca;
2942: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2945: /* Get ij structure of P^T */
2946: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2948: cn = pn*ppdof;
2949: /* Allocate ci array, arrays for fill computation and */
2950: /* free space for accumulating nonzero column info */
2951: PetscMalloc1(cn+1,&ci);
2952: ci[0] = 0;
2954: /* Work arrays for rows of P^T*A */
2955: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2956: PetscArrayzero(ptadenserow,an);
2957: PetscArrayzero(denserow,cn);
2959: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2960: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2961: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2962: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2963: current_space = free_space;
2965: /* Determine symbolic info for each row of C: */
2966: for (i=0; i<pn; i++) {
2967: ptnzi = pti[i+1] - pti[i];
2968: ptJ = ptj + pti[i];
2969: for (dof=0; dof<ppdof; dof++) {
2970: ptanzi = 0;
2971: /* Determine symbolic row of PtA: */
2972: for (j=0; j<ptnzi; j++) {
2973: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2974: arow = ptJ[j]*ppdof + dof;
2975: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2976: anzj = ai[arow+1] - ai[arow];
2977: ajj = aj + ai[arow];
2978: for (k=0; k<anzj; k++) {
2979: if (!ptadenserow[ajj[k]]) {
2980: ptadenserow[ajj[k]] = -1;
2981: ptasparserow[ptanzi++] = ajj[k];
2982: }
2983: }
2984: }
2985: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2986: ptaj = ptasparserow;
2987: cnzi = 0;
2988: for (j=0; j<ptanzi; j++) {
2989: /* Get offset within block of P */
2990: pshift = *ptaj%ppdof;
2991: /* Get block row of P */
2992: prow = (*ptaj++)/ppdof; /* integer division */
2993: /* P has same number of nonzeros per row as the compressed form */
2994: pnzj = pi[prow+1] - pi[prow];
2995: pjj = pj + pi[prow];
2996: for (k=0;k<pnzj;k++) {
2997: /* Locations in C are shifted by the offset within the block */
2998: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2999: if (!denserow[pjj[k]*ppdof+pshift]) {
3000: denserow[pjj[k]*ppdof+pshift] = -1;
3001: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3002: }
3003: }
3004: }
3006: /* sort sparserow */
3007: PetscSortInt(cnzi,sparserow);
3009: /* If free space is not available, make more free space */
3010: /* Double the amount of total space in the list */
3011: if (current_space->local_remaining<cnzi) {
3012: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
3013: }
3015: /* Copy data into free space, and zero out denserows */
3016: PetscArraycpy(current_space->array,sparserow,cnzi);
3018: current_space->array += cnzi;
3019: current_space->local_used += cnzi;
3020: current_space->local_remaining -= cnzi;
3022: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3023: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3025: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3026: /* For now, we will recompute what is needed. */
3027: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3028: }
3029: }
3030: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3031: /* Allocate space for cj, initialize cj, and */
3032: /* destroy list of free space and other temporary array(s) */
3033: PetscMalloc1(ci[cn]+1,&cj);
3034: PetscFreeSpaceContiguous(&free_space,cj);
3035: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3037: /* Allocate space for ca */
3038: PetscCalloc1(ci[cn]+1,&ca);
3040: /* put together the new matrix */
3041: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
3042: MatSetBlockSize(C,pp->dof);
3044: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3045: /* Since these are PETSc arrays, change flags to free them as necessary. */
3046: c = (Mat_SeqAIJ*)(C->data);
3047: c->free_a = PETSC_TRUE;
3048: c->free_ij = PETSC_TRUE;
3049: c->nonew = 0;
3051: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3052: C->ops->productnumeric = MatProductNumeric_PtAP;
3054: /* Clean up. */
3055: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3056: return(0);
3057: }
3059: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3060: {
3061: /* This routine requires testing -- first draft only */
3062: PetscErrorCode ierr;
3063: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3064: Mat P =pp->AIJ;
3065: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
3066: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3067: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3068: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3069: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3070: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3071: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3072: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3073: MatScalar *ca=c->a,*caj,*apa;
3076: /* Allocate temporary array for storage of one row of A*P */
3077: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3079: /* Clear old values in C */
3080: PetscArrayzero(ca,ci[cm]);
3082: for (i=0; i<am; i++) {
3083: /* Form sparse row of A*P */
3084: anzi = ai[i+1] - ai[i];
3085: apnzj = 0;
3086: for (j=0; j<anzi; j++) {
3087: /* Get offset within block of P */
3088: pshift = *aj%ppdof;
3089: /* Get block row of P */
3090: prow = *aj++/ppdof; /* integer division */
3091: pnzj = pi[prow+1] - pi[prow];
3092: pjj = pj + pi[prow];
3093: paj = pa + pi[prow];
3094: for (k=0; k<pnzj; k++) {
3095: poffset = pjj[k]*ppdof+pshift;
3096: if (!apjdense[poffset]) {
3097: apjdense[poffset] = -1;
3098: apj[apnzj++] = poffset;
3099: }
3100: apa[poffset] += (*aa)*paj[k];
3101: }
3102: PetscLogFlops(2.0*pnzj);
3103: aa++;
3104: }
3106: /* Sort the j index array for quick sparse axpy. */
3107: /* Note: a array does not need sorting as it is in dense storage locations. */
3108: PetscSortInt(apnzj,apj);
3110: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3111: prow = i/ppdof; /* integer division */
3112: pshift = i%ppdof;
3113: poffset = pi[prow];
3114: pnzi = pi[prow+1] - poffset;
3115: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3116: pJ = pj+poffset;
3117: pA = pa+poffset;
3118: for (j=0; j<pnzi; j++) {
3119: crow = (*pJ)*ppdof+pshift;
3120: cjj = cj + ci[crow];
3121: caj = ca + ci[crow];
3122: pJ++;
3123: /* Perform sparse axpy operation. Note cjj includes apj. */
3124: for (k=0,nextap=0; nextap<apnzj; k++) {
3125: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3126: }
3127: PetscLogFlops(2.0*apnzj);
3128: pA++;
3129: }
3131: /* Zero the current row info for A*P */
3132: for (j=0; j<apnzj; j++) {
3133: apa[apj[j]] = 0.;
3134: apjdense[apj[j]] = 0;
3135: }
3136: }
3138: /* Assemble the final matrix and clean up */
3139: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3140: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3141: PetscFree3(apa,apj,apjdense);
3142: return(0);
3143: }
3145: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3146: {
3147: PetscErrorCode ierr;
3148: Mat_Product *product = C->product;
3149: Mat A=product->A,P=product->B;
3152: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3153: return(0);
3154: }
3156: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3157: {
3159: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3160: }
3162: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3163: {
3165: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3166: }
3168: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3170: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3171: {
3172: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3173: PetscErrorCode ierr;
3177: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3178: return(0);
3179: }
3181: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3183: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3184: {
3185: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3186: PetscErrorCode ierr;
3189: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3190: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3191: return(0);
3192: }
3194: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3196: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3197: {
3198: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3199: PetscErrorCode ierr;
3203: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3204: return(0);
3205: }
3207: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3209: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3210: {
3211: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3212: PetscErrorCode ierr;
3216: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3217: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3218: return(0);
3219: }
3221: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3222: {
3223: PetscErrorCode ierr;
3224: Mat_Product *product = C->product;
3225: Mat A=product->A,P=product->B;
3226: PetscBool flg;
3229: PetscStrcmp(product->alg,"allatonce",&flg);
3230: if (flg) {
3231: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3232: C->ops->productnumeric = MatProductNumeric_PtAP;
3233: return(0);
3234: }
3236: PetscStrcmp(product->alg,"allatonce_merged",&flg);
3237: if (flg) {
3238: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3239: C->ops->productnumeric = MatProductNumeric_PtAP;
3240: return(0);
3241: }
3243: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3244: }
3246: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3247: {
3248: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3249: Mat a = b->AIJ,B;
3250: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3252: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3253: PetscInt *cols;
3254: PetscScalar *vals;
3257: MatGetSize(a,&m,&n);
3258: PetscMalloc1(dof*m,&ilen);
3259: for (i=0; i<m; i++) {
3260: nmax = PetscMax(nmax,aij->ilen[i]);
3261: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3262: }
3263: MatCreate(PETSC_COMM_SELF,&B);
3264: MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3265: MatSetType(B,newtype);
3266: MatSeqAIJSetPreallocation(B,0,ilen);
3267: PetscFree(ilen);
3268: PetscMalloc1(nmax,&icols);
3269: ii = 0;
3270: for (i=0; i<m; i++) {
3271: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3272: for (j=0; j<dof; j++) {
3273: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3274: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3275: ii++;
3276: }
3277: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3278: }
3279: PetscFree(icols);
3280: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3281: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3283: if (reuse == MAT_INPLACE_MATRIX) {
3284: MatHeaderReplace(A,&B);
3285: } else {
3286: *newmat = B;
3287: }
3288: return(0);
3289: }
3291: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3293: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3294: {
3295: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3296: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3297: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3298: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3299: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3300: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3301: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3302: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3303: PetscInt rstart,cstart,*garray,ii,k;
3305: PetscScalar *vals,*ovals;
3308: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3309: for (i=0; i<A->rmap->n/dof; i++) {
3310: nmax = PetscMax(nmax,AIJ->ilen[i]);
3311: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3312: for (j=0; j<dof; j++) {
3313: dnz[dof*i+j] = AIJ->ilen[i];
3314: onz[dof*i+j] = OAIJ->ilen[i];
3315: }
3316: }
3317: MatCreate(PetscObjectComm((PetscObject)A),&B);
3318: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3319: MatSetType(B,newtype);
3320: MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3321: MatSetBlockSize(B,dof);
3322: PetscFree2(dnz,onz);
3324: PetscMalloc2(nmax,&icols,onmax,&oicols);
3325: rstart = dof*maij->A->rmap->rstart;
3326: cstart = dof*maij->A->cmap->rstart;
3327: garray = mpiaij->garray;
3329: ii = rstart;
3330: for (i=0; i<A->rmap->n/dof; i++) {
3331: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3332: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3333: for (j=0; j<dof; j++) {
3334: for (k=0; k<ncols; k++) {
3335: icols[k] = cstart + dof*cols[k]+j;
3336: }
3337: for (k=0; k<oncols; k++) {
3338: oicols[k] = dof*garray[ocols[k]]+j;
3339: }
3340: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3341: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3342: ii++;
3343: }
3344: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3345: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3346: }
3347: PetscFree2(icols,oicols);
3349: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3350: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3352: if (reuse == MAT_INPLACE_MATRIX) {
3353: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3354: ((PetscObject)A)->refct = 1;
3356: MatHeaderReplace(A,&B);
3358: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3359: } else {
3360: *newmat = B;
3361: }
3362: return(0);
3363: }
3365: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3366: {
3368: Mat A;
3371: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3372: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3373: MatDestroy(&A);
3374: return(0);
3375: }
3377: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3378: {
3380: Mat A;
3383: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3384: MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3385: MatDestroy(&A);
3386: return(0);
3387: }
3389: /* ---------------------------------------------------------------------------------- */
3390: /*@
3391: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3392: operations for multicomponent problems. It interpolates each component the same
3393: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3394: and MATMPIAIJ for distributed matrices.
3396: Collective
3398: Input Parameters:
3399: + A - the AIJ matrix describing the action on blocks
3400: - dof - the block size (number of components per node)
3402: Output Parameter:
3403: . maij - the new MAIJ matrix
3405: Operations provided:
3406: + MatMult
3407: . MatMultTranspose
3408: . MatMultAdd
3409: . MatMultTransposeAdd
3410: - MatView
3412: Level: advanced
3414: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3415: @*/
3416: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3417: {
3419: PetscMPIInt size;
3420: PetscInt n;
3421: Mat B;
3422: #if defined(PETSC_HAVE_CUDA)
3423: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3424: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3425: #endif
3428: dof = PetscAbs(dof);
3429: PetscObjectReference((PetscObject)A);
3431: if (dof == 1) *maij = A;
3432: else {
3433: MatCreate(PetscObjectComm((PetscObject)A),&B);
3434: /* propagate vec type */
3435: MatSetVecType(B,A->defaultvectype);
3436: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3437: PetscLayoutSetBlockSize(B->rmap,dof);
3438: PetscLayoutSetBlockSize(B->cmap,dof);
3439: PetscLayoutSetUp(B->rmap);
3440: PetscLayoutSetUp(B->cmap);
3442: B->assembled = PETSC_TRUE;
3444: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3445: if (size == 1) {
3446: Mat_SeqMAIJ *b;
3448: MatSetType(B,MATSEQMAIJ);
3450: B->ops->setup = NULL;
3451: B->ops->destroy = MatDestroy_SeqMAIJ;
3452: B->ops->view = MatView_SeqMAIJ;
3454: b = (Mat_SeqMAIJ*)B->data;
3455: b->dof = dof;
3456: b->AIJ = A;
3458: if (dof == 2) {
3459: B->ops->mult = MatMult_SeqMAIJ_2;
3460: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3461: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3462: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3463: } else if (dof == 3) {
3464: B->ops->mult = MatMult_SeqMAIJ_3;
3465: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3466: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3467: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3468: } else if (dof == 4) {
3469: B->ops->mult = MatMult_SeqMAIJ_4;
3470: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3471: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3472: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3473: } else if (dof == 5) {
3474: B->ops->mult = MatMult_SeqMAIJ_5;
3475: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3476: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3477: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3478: } else if (dof == 6) {
3479: B->ops->mult = MatMult_SeqMAIJ_6;
3480: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3481: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3482: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3483: } else if (dof == 7) {
3484: B->ops->mult = MatMult_SeqMAIJ_7;
3485: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3486: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3487: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3488: } else if (dof == 8) {
3489: B->ops->mult = MatMult_SeqMAIJ_8;
3490: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3491: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3492: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3493: } else if (dof == 9) {
3494: B->ops->mult = MatMult_SeqMAIJ_9;
3495: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3496: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3497: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3498: } else if (dof == 10) {
3499: B->ops->mult = MatMult_SeqMAIJ_10;
3500: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3501: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3502: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3503: } else if (dof == 11) {
3504: B->ops->mult = MatMult_SeqMAIJ_11;
3505: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3506: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3507: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3508: } else if (dof == 16) {
3509: B->ops->mult = MatMult_SeqMAIJ_16;
3510: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3511: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3512: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3513: } else if (dof == 18) {
3514: B->ops->mult = MatMult_SeqMAIJ_18;
3515: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3516: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3517: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3518: } else {
3519: B->ops->mult = MatMult_SeqMAIJ_N;
3520: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3521: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3522: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3523: }
3524: #if defined(PETSC_HAVE_CUDA)
3525: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3526: #endif
3527: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3528: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3529: } else {
3530: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3531: Mat_MPIMAIJ *b;
3532: IS from,to;
3533: Vec gvec;
3535: MatSetType(B,MATMPIMAIJ);
3537: B->ops->setup = NULL;
3538: B->ops->destroy = MatDestroy_MPIMAIJ;
3539: B->ops->view = MatView_MPIMAIJ;
3541: b = (Mat_MPIMAIJ*)B->data;
3542: b->dof = dof;
3543: b->A = A;
3545: MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3546: MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);
3548: VecGetSize(mpiaij->lvec,&n);
3549: VecCreate(PETSC_COMM_SELF,&b->w);
3550: VecSetSizes(b->w,n*dof,n*dof);
3551: VecSetBlockSize(b->w,dof);
3552: VecSetType(b->w,VECSEQ);
3554: /* create two temporary Index sets for build scatter gather */
3555: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3556: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3558: /* create temporary global vector to generate scatter context */
3559: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3561: /* generate the scatter context */
3562: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3564: ISDestroy(&from);
3565: ISDestroy(&to);
3566: VecDestroy(&gvec);
3568: B->ops->mult = MatMult_MPIMAIJ_dof;
3569: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3570: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3571: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3573: #if defined(PETSC_HAVE_CUDA)
3574: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3575: #endif
3576: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3577: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3578: }
3579: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3580: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3581: MatSetFromOptions(B);
3582: MatSetUp(B);
3583: #if defined(PETSC_HAVE_CUDA)
3584: /* temporary until we have CUDA implementation of MAIJ */
3585: {
3586: PetscBool flg;
3587: if (convert) {
3588: PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3589: if (flg) {
3590: MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3591: }
3592: }
3593: }
3594: #endif
3595: *maij = B;
3596: MatViewFromOptions(B,NULL,"-mat_view");
3597: }
3598: return(0);
3599: }