Actual source code: maij.c
2: /*
3: Defines the basic matrix operations for the MAIJ matrix storage format.
4: This format is used for restriction and interpolation operations for
5: multicomponent problems. It interpolates each component the same way
6: independently.
8: We provide:
9: MatMult()
10: MatMultTranspose()
11: MatMultTransposeAdd()
12: MatMultAdd()
13: and
14: MatCreateMAIJ(Mat,dof,Mat*)
16: This single directory handles both the sequential and parallel codes
17: */
19: #include <../src/mat/impls/maij/maij.h>
20: #include <../src/mat/utils/freespace.h>
22: /*@
23: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
25: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
27: Input Parameter:
28: . A - the MAIJ matrix
30: Output Parameter:
31: . B - the AIJ matrix
33: Level: advanced
35: Notes:
36: The reference count on the AIJ matrix is not increased so you should not destroy it.
38: .seealso: MatCreateMAIJ()
39: @*/
40: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
41: {
43: PetscBool ismpimaij,isseqmaij;
46: PetscObjectTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
47: PetscObjectTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
48: if (ismpimaij) {
49: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
51: *B = b->A;
52: } else if (isseqmaij) {
53: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
55: *B = b->AIJ;
56: } else {
57: *B = A;
58: }
59: return(0);
60: }
62: /*@
63: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
65: Logically Collective
67: Input Parameters:
68: + A - the MAIJ matrix
69: - dof - the block size for the new matrix
71: Output Parameter:
72: . B - the new MAIJ matrix
74: Level: advanced
76: .seealso: MatCreateMAIJ()
77: @*/
78: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
79: {
81: Mat Aij = NULL;
85: MatMAIJGetAIJ(A,&Aij);
86: MatCreateMAIJ(Aij,dof,B);
87: return(0);
88: }
90: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
91: {
93: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
96: MatDestroy(&b->AIJ);
97: PetscFree(A->data);
98: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqmaij_seqaij_C",NULL);
99: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaij_seqmaij_C",NULL);
100: return(0);
101: }
103: PetscErrorCode MatSetUp_MAIJ(Mat A)
104: {
106: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Must use MatCreateMAIJ() to create MAIJ matrices");
107: }
109: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
110: {
112: Mat B;
115: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
116: MatView(B,viewer);
117: MatDestroy(&B);
118: return(0);
119: }
121: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
122: {
124: Mat B;
127: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
128: MatView(B,viewer);
129: MatDestroy(&B);
130: return(0);
131: }
133: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
134: {
136: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
139: MatDestroy(&b->AIJ);
140: MatDestroy(&b->OAIJ);
141: MatDestroy(&b->A);
142: VecScatterDestroy(&b->ctx);
143: VecDestroy(&b->w);
144: PetscFree(A->data);
145: PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpimaij_mpiaij_C",NULL);
146: PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_mpiaij_mpimaij_C",NULL);
147: PetscObjectChangeTypeName((PetscObject)A,NULL);
148: return(0);
149: }
151: /*MC
152: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
153: multicomponent problems, interpolating or restricting each component the same way independently.
154: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
156: Operations provided:
157: + MatMult
158: . MatMultTranspose
159: . MatMultAdd
160: - MatMultTransposeAdd
162: Level: advanced
164: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MatCreateMAIJ()
165: M*/
167: PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A)
168: {
170: Mat_MPIMAIJ *b;
171: PetscMPIInt size;
174: PetscNewLog(A,&b);
175: A->data = (void*)b;
177: PetscMemzero(A->ops,sizeof(struct _MatOps));
179: A->ops->setup = MatSetUp_MAIJ;
181: b->AIJ = NULL;
182: b->dof = 0;
183: b->OAIJ = NULL;
184: b->ctx = NULL;
185: b->w = NULL;
186: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
187: if (size == 1) {
188: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
189: } else {
190: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
191: }
192: A->preallocated = PETSC_TRUE;
193: A->assembled = PETSC_TRUE;
194: return(0);
195: }
197: /* --------------------------------------------------------------------------------------*/
198: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
199: {
200: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
201: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
202: const PetscScalar *x,*v;
203: PetscScalar *y, sum1, sum2;
204: PetscErrorCode ierr;
205: PetscInt nonzerorow=0,n,i,jrow,j;
206: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
209: VecGetArrayRead(xx,&x);
210: VecGetArray(yy,&y);
211: idx = a->j;
212: v = a->a;
213: ii = a->i;
215: for (i=0; i<m; i++) {
216: jrow = ii[i];
217: n = ii[i+1] - jrow;
218: sum1 = 0.0;
219: sum2 = 0.0;
221: nonzerorow += (n>0);
222: for (j=0; j<n; j++) {
223: sum1 += v[jrow]*x[2*idx[jrow]];
224: sum2 += v[jrow]*x[2*idx[jrow]+1];
225: jrow++;
226: }
227: y[2*i] = sum1;
228: y[2*i+1] = sum2;
229: }
231: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
232: VecRestoreArrayRead(xx,&x);
233: VecRestoreArray(yy,&y);
234: return(0);
235: }
237: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
238: {
239: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
240: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
241: const PetscScalar *x,*v;
242: PetscScalar *y,alpha1,alpha2;
243: PetscErrorCode ierr;
244: PetscInt n,i;
245: const PetscInt m = b->AIJ->rmap->n,*idx;
248: VecSet(yy,0.0);
249: VecGetArrayRead(xx,&x);
250: VecGetArray(yy,&y);
252: for (i=0; i<m; i++) {
253: idx = a->j + a->i[i];
254: v = a->a + a->i[i];
255: n = a->i[i+1] - a->i[i];
256: alpha1 = x[2*i];
257: alpha2 = x[2*i+1];
258: while (n-->0) {
259: y[2*(*idx)] += alpha1*(*v);
260: y[2*(*idx)+1] += alpha2*(*v);
261: idx++; v++;
262: }
263: }
264: PetscLogFlops(4.0*a->nz);
265: VecRestoreArrayRead(xx,&x);
266: VecRestoreArray(yy,&y);
267: return(0);
268: }
270: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
271: {
272: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
273: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
274: const PetscScalar *x,*v;
275: PetscScalar *y,sum1, sum2;
276: PetscErrorCode ierr;
277: PetscInt n,i,jrow,j;
278: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
281: if (yy != zz) {VecCopy(yy,zz);}
282: VecGetArrayRead(xx,&x);
283: VecGetArray(zz,&y);
284: idx = a->j;
285: v = a->a;
286: ii = a->i;
288: for (i=0; i<m; i++) {
289: jrow = ii[i];
290: n = ii[i+1] - jrow;
291: sum1 = 0.0;
292: sum2 = 0.0;
293: for (j=0; j<n; j++) {
294: sum1 += v[jrow]*x[2*idx[jrow]];
295: sum2 += v[jrow]*x[2*idx[jrow]+1];
296: jrow++;
297: }
298: y[2*i] += sum1;
299: y[2*i+1] += sum2;
300: }
302: PetscLogFlops(4.0*a->nz);
303: VecRestoreArrayRead(xx,&x);
304: VecRestoreArray(zz,&y);
305: return(0);
306: }
307: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
308: {
309: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
310: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
311: const PetscScalar *x,*v;
312: PetscScalar *y,alpha1,alpha2;
313: PetscErrorCode ierr;
314: PetscInt n,i;
315: const PetscInt m = b->AIJ->rmap->n,*idx;
318: if (yy != zz) {VecCopy(yy,zz);}
319: VecGetArrayRead(xx,&x);
320: VecGetArray(zz,&y);
322: for (i=0; i<m; i++) {
323: idx = a->j + a->i[i];
324: v = a->a + a->i[i];
325: n = a->i[i+1] - a->i[i];
326: alpha1 = x[2*i];
327: alpha2 = x[2*i+1];
328: while (n-->0) {
329: y[2*(*idx)] += alpha1*(*v);
330: y[2*(*idx)+1] += alpha2*(*v);
331: idx++; v++;
332: }
333: }
334: PetscLogFlops(4.0*a->nz);
335: VecRestoreArrayRead(xx,&x);
336: VecRestoreArray(zz,&y);
337: return(0);
338: }
339: /* --------------------------------------------------------------------------------------*/
340: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
341: {
342: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
343: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
344: const PetscScalar *x,*v;
345: PetscScalar *y,sum1, sum2, sum3;
346: PetscErrorCode ierr;
347: PetscInt nonzerorow=0,n,i,jrow,j;
348: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
351: VecGetArrayRead(xx,&x);
352: VecGetArray(yy,&y);
353: idx = a->j;
354: v = a->a;
355: ii = a->i;
357: for (i=0; i<m; i++) {
358: jrow = ii[i];
359: n = ii[i+1] - jrow;
360: sum1 = 0.0;
361: sum2 = 0.0;
362: sum3 = 0.0;
364: nonzerorow += (n>0);
365: for (j=0; j<n; j++) {
366: sum1 += v[jrow]*x[3*idx[jrow]];
367: sum2 += v[jrow]*x[3*idx[jrow]+1];
368: sum3 += v[jrow]*x[3*idx[jrow]+2];
369: jrow++;
370: }
371: y[3*i] = sum1;
372: y[3*i+1] = sum2;
373: y[3*i+2] = sum3;
374: }
376: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
377: VecRestoreArrayRead(xx,&x);
378: VecRestoreArray(yy,&y);
379: return(0);
380: }
382: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
383: {
384: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
385: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
386: const PetscScalar *x,*v;
387: PetscScalar *y,alpha1,alpha2,alpha3;
388: PetscErrorCode ierr;
389: PetscInt n,i;
390: const PetscInt m = b->AIJ->rmap->n,*idx;
393: VecSet(yy,0.0);
394: VecGetArrayRead(xx,&x);
395: VecGetArray(yy,&y);
397: for (i=0; i<m; i++) {
398: idx = a->j + a->i[i];
399: v = a->a + a->i[i];
400: n = a->i[i+1] - a->i[i];
401: alpha1 = x[3*i];
402: alpha2 = x[3*i+1];
403: alpha3 = x[3*i+2];
404: while (n-->0) {
405: y[3*(*idx)] += alpha1*(*v);
406: y[3*(*idx)+1] += alpha2*(*v);
407: y[3*(*idx)+2] += alpha3*(*v);
408: idx++; v++;
409: }
410: }
411: PetscLogFlops(6.0*a->nz);
412: VecRestoreArrayRead(xx,&x);
413: VecRestoreArray(yy,&y);
414: return(0);
415: }
417: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
418: {
419: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
420: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
421: const PetscScalar *x,*v;
422: PetscScalar *y,sum1, sum2, sum3;
423: PetscErrorCode ierr;
424: PetscInt n,i,jrow,j;
425: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
428: if (yy != zz) {VecCopy(yy,zz);}
429: VecGetArrayRead(xx,&x);
430: VecGetArray(zz,&y);
431: idx = a->j;
432: v = a->a;
433: ii = a->i;
435: for (i=0; i<m; i++) {
436: jrow = ii[i];
437: n = ii[i+1] - jrow;
438: sum1 = 0.0;
439: sum2 = 0.0;
440: sum3 = 0.0;
441: for (j=0; j<n; j++) {
442: sum1 += v[jrow]*x[3*idx[jrow]];
443: sum2 += v[jrow]*x[3*idx[jrow]+1];
444: sum3 += v[jrow]*x[3*idx[jrow]+2];
445: jrow++;
446: }
447: y[3*i] += sum1;
448: y[3*i+1] += sum2;
449: y[3*i+2] += sum3;
450: }
452: PetscLogFlops(6.0*a->nz);
453: VecRestoreArrayRead(xx,&x);
454: VecRestoreArray(zz,&y);
455: return(0);
456: }
457: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
458: {
459: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
460: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
461: const PetscScalar *x,*v;
462: PetscScalar *y,alpha1,alpha2,alpha3;
463: PetscErrorCode ierr;
464: PetscInt n,i;
465: const PetscInt m = b->AIJ->rmap->n,*idx;
468: if (yy != zz) {VecCopy(yy,zz);}
469: VecGetArrayRead(xx,&x);
470: VecGetArray(zz,&y);
471: for (i=0; i<m; i++) {
472: idx = a->j + a->i[i];
473: v = a->a + a->i[i];
474: n = a->i[i+1] - a->i[i];
475: alpha1 = x[3*i];
476: alpha2 = x[3*i+1];
477: alpha3 = x[3*i+2];
478: while (n-->0) {
479: y[3*(*idx)] += alpha1*(*v);
480: y[3*(*idx)+1] += alpha2*(*v);
481: y[3*(*idx)+2] += alpha3*(*v);
482: idx++; v++;
483: }
484: }
485: PetscLogFlops(6.0*a->nz);
486: VecRestoreArrayRead(xx,&x);
487: VecRestoreArray(zz,&y);
488: return(0);
489: }
491: /* ------------------------------------------------------------------------------*/
492: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
493: {
494: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
495: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
496: const PetscScalar *x,*v;
497: PetscScalar *y,sum1, sum2, sum3, sum4;
498: PetscErrorCode ierr;
499: PetscInt nonzerorow=0,n,i,jrow,j;
500: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
503: VecGetArrayRead(xx,&x);
504: VecGetArray(yy,&y);
505: idx = a->j;
506: v = a->a;
507: ii = a->i;
509: for (i=0; i<m; i++) {
510: jrow = ii[i];
511: n = ii[i+1] - jrow;
512: sum1 = 0.0;
513: sum2 = 0.0;
514: sum3 = 0.0;
515: sum4 = 0.0;
516: nonzerorow += (n>0);
517: for (j=0; j<n; j++) {
518: sum1 += v[jrow]*x[4*idx[jrow]];
519: sum2 += v[jrow]*x[4*idx[jrow]+1];
520: sum3 += v[jrow]*x[4*idx[jrow]+2];
521: sum4 += v[jrow]*x[4*idx[jrow]+3];
522: jrow++;
523: }
524: y[4*i] = sum1;
525: y[4*i+1] = sum2;
526: y[4*i+2] = sum3;
527: y[4*i+3] = sum4;
528: }
530: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
531: VecRestoreArrayRead(xx,&x);
532: VecRestoreArray(yy,&y);
533: return(0);
534: }
536: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
537: {
538: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
539: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
540: const PetscScalar *x,*v;
541: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
542: PetscErrorCode ierr;
543: PetscInt n,i;
544: const PetscInt m = b->AIJ->rmap->n,*idx;
547: VecSet(yy,0.0);
548: VecGetArrayRead(xx,&x);
549: VecGetArray(yy,&y);
550: for (i=0; i<m; i++) {
551: idx = a->j + a->i[i];
552: v = a->a + a->i[i];
553: n = a->i[i+1] - a->i[i];
554: alpha1 = x[4*i];
555: alpha2 = x[4*i+1];
556: alpha3 = x[4*i+2];
557: alpha4 = x[4*i+3];
558: while (n-->0) {
559: y[4*(*idx)] += alpha1*(*v);
560: y[4*(*idx)+1] += alpha2*(*v);
561: y[4*(*idx)+2] += alpha3*(*v);
562: y[4*(*idx)+3] += alpha4*(*v);
563: idx++; v++;
564: }
565: }
566: PetscLogFlops(8.0*a->nz);
567: VecRestoreArrayRead(xx,&x);
568: VecRestoreArray(yy,&y);
569: return(0);
570: }
572: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
573: {
574: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
575: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
576: const PetscScalar *x,*v;
577: PetscScalar *y,sum1, sum2, sum3, sum4;
578: PetscErrorCode ierr;
579: PetscInt n,i,jrow,j;
580: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
583: if (yy != zz) {VecCopy(yy,zz);}
584: VecGetArrayRead(xx,&x);
585: VecGetArray(zz,&y);
586: idx = a->j;
587: v = a->a;
588: ii = a->i;
590: for (i=0; i<m; i++) {
591: jrow = ii[i];
592: n = ii[i+1] - jrow;
593: sum1 = 0.0;
594: sum2 = 0.0;
595: sum3 = 0.0;
596: sum4 = 0.0;
597: for (j=0; j<n; j++) {
598: sum1 += v[jrow]*x[4*idx[jrow]];
599: sum2 += v[jrow]*x[4*idx[jrow]+1];
600: sum3 += v[jrow]*x[4*idx[jrow]+2];
601: sum4 += v[jrow]*x[4*idx[jrow]+3];
602: jrow++;
603: }
604: y[4*i] += sum1;
605: y[4*i+1] += sum2;
606: y[4*i+2] += sum3;
607: y[4*i+3] += sum4;
608: }
610: PetscLogFlops(8.0*a->nz);
611: VecRestoreArrayRead(xx,&x);
612: VecRestoreArray(zz,&y);
613: return(0);
614: }
615: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
616: {
617: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
618: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
619: const PetscScalar *x,*v;
620: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
621: PetscErrorCode ierr;
622: PetscInt n,i;
623: const PetscInt m = b->AIJ->rmap->n,*idx;
626: if (yy != zz) {VecCopy(yy,zz);}
627: VecGetArrayRead(xx,&x);
628: VecGetArray(zz,&y);
630: for (i=0; i<m; i++) {
631: idx = a->j + a->i[i];
632: v = a->a + a->i[i];
633: n = a->i[i+1] - a->i[i];
634: alpha1 = x[4*i];
635: alpha2 = x[4*i+1];
636: alpha3 = x[4*i+2];
637: alpha4 = x[4*i+3];
638: while (n-->0) {
639: y[4*(*idx)] += alpha1*(*v);
640: y[4*(*idx)+1] += alpha2*(*v);
641: y[4*(*idx)+2] += alpha3*(*v);
642: y[4*(*idx)+3] += alpha4*(*v);
643: idx++; v++;
644: }
645: }
646: PetscLogFlops(8.0*a->nz);
647: VecRestoreArrayRead(xx,&x);
648: VecRestoreArray(zz,&y);
649: return(0);
650: }
651: /* ------------------------------------------------------------------------------*/
653: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
654: {
655: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
656: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
657: const PetscScalar *x,*v;
658: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
659: PetscErrorCode ierr;
660: PetscInt nonzerorow=0,n,i,jrow,j;
661: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
664: VecGetArrayRead(xx,&x);
665: VecGetArray(yy,&y);
666: idx = a->j;
667: v = a->a;
668: ii = a->i;
670: for (i=0; i<m; i++) {
671: jrow = ii[i];
672: n = ii[i+1] - jrow;
673: sum1 = 0.0;
674: sum2 = 0.0;
675: sum3 = 0.0;
676: sum4 = 0.0;
677: sum5 = 0.0;
679: nonzerorow += (n>0);
680: for (j=0; j<n; j++) {
681: sum1 += v[jrow]*x[5*idx[jrow]];
682: sum2 += v[jrow]*x[5*idx[jrow]+1];
683: sum3 += v[jrow]*x[5*idx[jrow]+2];
684: sum4 += v[jrow]*x[5*idx[jrow]+3];
685: sum5 += v[jrow]*x[5*idx[jrow]+4];
686: jrow++;
687: }
688: y[5*i] = sum1;
689: y[5*i+1] = sum2;
690: y[5*i+2] = sum3;
691: y[5*i+3] = sum4;
692: y[5*i+4] = sum5;
693: }
695: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
696: VecRestoreArrayRead(xx,&x);
697: VecRestoreArray(yy,&y);
698: return(0);
699: }
701: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
702: {
703: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
704: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
705: const PetscScalar *x,*v;
706: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
707: PetscErrorCode ierr;
708: PetscInt n,i;
709: const PetscInt m = b->AIJ->rmap->n,*idx;
712: VecSet(yy,0.0);
713: VecGetArrayRead(xx,&x);
714: VecGetArray(yy,&y);
716: for (i=0; i<m; i++) {
717: idx = a->j + a->i[i];
718: v = a->a + a->i[i];
719: n = a->i[i+1] - a->i[i];
720: alpha1 = x[5*i];
721: alpha2 = x[5*i+1];
722: alpha3 = x[5*i+2];
723: alpha4 = x[5*i+3];
724: alpha5 = x[5*i+4];
725: while (n-->0) {
726: y[5*(*idx)] += alpha1*(*v);
727: y[5*(*idx)+1] += alpha2*(*v);
728: y[5*(*idx)+2] += alpha3*(*v);
729: y[5*(*idx)+3] += alpha4*(*v);
730: y[5*(*idx)+4] += alpha5*(*v);
731: idx++; v++;
732: }
733: }
734: PetscLogFlops(10.0*a->nz);
735: VecRestoreArrayRead(xx,&x);
736: VecRestoreArray(yy,&y);
737: return(0);
738: }
740: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
741: {
742: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
743: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
744: const PetscScalar *x,*v;
745: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
746: PetscErrorCode ierr;
747: PetscInt n,i,jrow,j;
748: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
751: if (yy != zz) {VecCopy(yy,zz);}
752: VecGetArrayRead(xx,&x);
753: VecGetArray(zz,&y);
754: idx = a->j;
755: v = a->a;
756: ii = a->i;
758: for (i=0; i<m; i++) {
759: jrow = ii[i];
760: n = ii[i+1] - jrow;
761: sum1 = 0.0;
762: sum2 = 0.0;
763: sum3 = 0.0;
764: sum4 = 0.0;
765: sum5 = 0.0;
766: for (j=0; j<n; j++) {
767: sum1 += v[jrow]*x[5*idx[jrow]];
768: sum2 += v[jrow]*x[5*idx[jrow]+1];
769: sum3 += v[jrow]*x[5*idx[jrow]+2];
770: sum4 += v[jrow]*x[5*idx[jrow]+3];
771: sum5 += v[jrow]*x[5*idx[jrow]+4];
772: jrow++;
773: }
774: y[5*i] += sum1;
775: y[5*i+1] += sum2;
776: y[5*i+2] += sum3;
777: y[5*i+3] += sum4;
778: y[5*i+4] += sum5;
779: }
781: PetscLogFlops(10.0*a->nz);
782: VecRestoreArrayRead(xx,&x);
783: VecRestoreArray(zz,&y);
784: return(0);
785: }
787: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
788: {
789: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
790: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
791: const PetscScalar *x,*v;
792: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
793: PetscErrorCode ierr;
794: PetscInt n,i;
795: const PetscInt m = b->AIJ->rmap->n,*idx;
798: if (yy != zz) {VecCopy(yy,zz);}
799: VecGetArrayRead(xx,&x);
800: VecGetArray(zz,&y);
802: for (i=0; i<m; i++) {
803: idx = a->j + a->i[i];
804: v = a->a + a->i[i];
805: n = a->i[i+1] - a->i[i];
806: alpha1 = x[5*i];
807: alpha2 = x[5*i+1];
808: alpha3 = x[5*i+2];
809: alpha4 = x[5*i+3];
810: alpha5 = x[5*i+4];
811: while (n-->0) {
812: y[5*(*idx)] += alpha1*(*v);
813: y[5*(*idx)+1] += alpha2*(*v);
814: y[5*(*idx)+2] += alpha3*(*v);
815: y[5*(*idx)+3] += alpha4*(*v);
816: y[5*(*idx)+4] += alpha5*(*v);
817: idx++; v++;
818: }
819: }
820: PetscLogFlops(10.0*a->nz);
821: VecRestoreArrayRead(xx,&x);
822: VecRestoreArray(zz,&y);
823: return(0);
824: }
826: /* ------------------------------------------------------------------------------*/
827: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
828: {
829: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
830: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
831: const PetscScalar *x,*v;
832: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
833: PetscErrorCode ierr;
834: PetscInt nonzerorow=0,n,i,jrow,j;
835: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
838: VecGetArrayRead(xx,&x);
839: VecGetArray(yy,&y);
840: idx = a->j;
841: v = a->a;
842: ii = a->i;
844: for (i=0; i<m; i++) {
845: jrow = ii[i];
846: n = ii[i+1] - jrow;
847: sum1 = 0.0;
848: sum2 = 0.0;
849: sum3 = 0.0;
850: sum4 = 0.0;
851: sum5 = 0.0;
852: sum6 = 0.0;
854: nonzerorow += (n>0);
855: for (j=0; j<n; j++) {
856: sum1 += v[jrow]*x[6*idx[jrow]];
857: sum2 += v[jrow]*x[6*idx[jrow]+1];
858: sum3 += v[jrow]*x[6*idx[jrow]+2];
859: sum4 += v[jrow]*x[6*idx[jrow]+3];
860: sum5 += v[jrow]*x[6*idx[jrow]+4];
861: sum6 += v[jrow]*x[6*idx[jrow]+5];
862: jrow++;
863: }
864: y[6*i] = sum1;
865: y[6*i+1] = sum2;
866: y[6*i+2] = sum3;
867: y[6*i+3] = sum4;
868: y[6*i+4] = sum5;
869: y[6*i+5] = sum6;
870: }
872: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
873: VecRestoreArrayRead(xx,&x);
874: VecRestoreArray(yy,&y);
875: return(0);
876: }
878: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
879: {
880: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
881: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
882: const PetscScalar *x,*v;
883: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
884: PetscErrorCode ierr;
885: PetscInt n,i;
886: const PetscInt m = b->AIJ->rmap->n,*idx;
889: VecSet(yy,0.0);
890: VecGetArrayRead(xx,&x);
891: VecGetArray(yy,&y);
893: for (i=0; i<m; i++) {
894: idx = a->j + a->i[i];
895: v = a->a + a->i[i];
896: n = a->i[i+1] - a->i[i];
897: alpha1 = x[6*i];
898: alpha2 = x[6*i+1];
899: alpha3 = x[6*i+2];
900: alpha4 = x[6*i+3];
901: alpha5 = x[6*i+4];
902: alpha6 = x[6*i+5];
903: while (n-->0) {
904: y[6*(*idx)] += alpha1*(*v);
905: y[6*(*idx)+1] += alpha2*(*v);
906: y[6*(*idx)+2] += alpha3*(*v);
907: y[6*(*idx)+3] += alpha4*(*v);
908: y[6*(*idx)+4] += alpha5*(*v);
909: y[6*(*idx)+5] += alpha6*(*v);
910: idx++; v++;
911: }
912: }
913: PetscLogFlops(12.0*a->nz);
914: VecRestoreArrayRead(xx,&x);
915: VecRestoreArray(yy,&y);
916: return(0);
917: }
919: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
920: {
921: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
922: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
923: const PetscScalar *x,*v;
924: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
925: PetscErrorCode ierr;
926: PetscInt n,i,jrow,j;
927: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
930: if (yy != zz) {VecCopy(yy,zz);}
931: VecGetArrayRead(xx,&x);
932: VecGetArray(zz,&y);
933: idx = a->j;
934: v = a->a;
935: ii = a->i;
937: for (i=0; i<m; i++) {
938: jrow = ii[i];
939: n = ii[i+1] - jrow;
940: sum1 = 0.0;
941: sum2 = 0.0;
942: sum3 = 0.0;
943: sum4 = 0.0;
944: sum5 = 0.0;
945: sum6 = 0.0;
946: for (j=0; j<n; j++) {
947: sum1 += v[jrow]*x[6*idx[jrow]];
948: sum2 += v[jrow]*x[6*idx[jrow]+1];
949: sum3 += v[jrow]*x[6*idx[jrow]+2];
950: sum4 += v[jrow]*x[6*idx[jrow]+3];
951: sum5 += v[jrow]*x[6*idx[jrow]+4];
952: sum6 += v[jrow]*x[6*idx[jrow]+5];
953: jrow++;
954: }
955: y[6*i] += sum1;
956: y[6*i+1] += sum2;
957: y[6*i+2] += sum3;
958: y[6*i+3] += sum4;
959: y[6*i+4] += sum5;
960: y[6*i+5] += sum6;
961: }
963: PetscLogFlops(12.0*a->nz);
964: VecRestoreArrayRead(xx,&x);
965: VecRestoreArray(zz,&y);
966: return(0);
967: }
969: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
970: {
971: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
972: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
973: const PetscScalar *x,*v;
974: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
975: PetscErrorCode ierr;
976: PetscInt n,i;
977: const PetscInt m = b->AIJ->rmap->n,*idx;
980: if (yy != zz) {VecCopy(yy,zz);}
981: VecGetArrayRead(xx,&x);
982: VecGetArray(zz,&y);
984: for (i=0; i<m; i++) {
985: idx = a->j + a->i[i];
986: v = a->a + a->i[i];
987: n = a->i[i+1] - a->i[i];
988: alpha1 = x[6*i];
989: alpha2 = x[6*i+1];
990: alpha3 = x[6*i+2];
991: alpha4 = x[6*i+3];
992: alpha5 = x[6*i+4];
993: alpha6 = x[6*i+5];
994: while (n-->0) {
995: y[6*(*idx)] += alpha1*(*v);
996: y[6*(*idx)+1] += alpha2*(*v);
997: y[6*(*idx)+2] += alpha3*(*v);
998: y[6*(*idx)+3] += alpha4*(*v);
999: y[6*(*idx)+4] += alpha5*(*v);
1000: y[6*(*idx)+5] += alpha6*(*v);
1001: idx++; v++;
1002: }
1003: }
1004: PetscLogFlops(12.0*a->nz);
1005: VecRestoreArrayRead(xx,&x);
1006: VecRestoreArray(zz,&y);
1007: return(0);
1008: }
1010: /* ------------------------------------------------------------------------------*/
1011: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1012: {
1013: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1014: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1015: const PetscScalar *x,*v;
1016: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1017: PetscErrorCode ierr;
1018: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1019: PetscInt nonzerorow=0,n,i,jrow,j;
1022: VecGetArrayRead(xx,&x);
1023: VecGetArray(yy,&y);
1024: idx = a->j;
1025: v = a->a;
1026: ii = a->i;
1028: for (i=0; i<m; i++) {
1029: jrow = ii[i];
1030: n = ii[i+1] - jrow;
1031: sum1 = 0.0;
1032: sum2 = 0.0;
1033: sum3 = 0.0;
1034: sum4 = 0.0;
1035: sum5 = 0.0;
1036: sum6 = 0.0;
1037: sum7 = 0.0;
1039: nonzerorow += (n>0);
1040: for (j=0; j<n; j++) {
1041: sum1 += v[jrow]*x[7*idx[jrow]];
1042: sum2 += v[jrow]*x[7*idx[jrow]+1];
1043: sum3 += v[jrow]*x[7*idx[jrow]+2];
1044: sum4 += v[jrow]*x[7*idx[jrow]+3];
1045: sum5 += v[jrow]*x[7*idx[jrow]+4];
1046: sum6 += v[jrow]*x[7*idx[jrow]+5];
1047: sum7 += v[jrow]*x[7*idx[jrow]+6];
1048: jrow++;
1049: }
1050: y[7*i] = sum1;
1051: y[7*i+1] = sum2;
1052: y[7*i+2] = sum3;
1053: y[7*i+3] = sum4;
1054: y[7*i+4] = sum5;
1055: y[7*i+5] = sum6;
1056: y[7*i+6] = sum7;
1057: }
1059: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1060: VecRestoreArrayRead(xx,&x);
1061: VecRestoreArray(yy,&y);
1062: return(0);
1063: }
1065: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1066: {
1067: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1068: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1069: const PetscScalar *x,*v;
1070: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1071: PetscErrorCode ierr;
1072: const PetscInt m = b->AIJ->rmap->n,*idx;
1073: PetscInt n,i;
1076: VecSet(yy,0.0);
1077: VecGetArrayRead(xx,&x);
1078: VecGetArray(yy,&y);
1080: for (i=0; i<m; i++) {
1081: idx = a->j + a->i[i];
1082: v = a->a + a->i[i];
1083: n = a->i[i+1] - a->i[i];
1084: alpha1 = x[7*i];
1085: alpha2 = x[7*i+1];
1086: alpha3 = x[7*i+2];
1087: alpha4 = x[7*i+3];
1088: alpha5 = x[7*i+4];
1089: alpha6 = x[7*i+5];
1090: alpha7 = x[7*i+6];
1091: while (n-->0) {
1092: y[7*(*idx)] += alpha1*(*v);
1093: y[7*(*idx)+1] += alpha2*(*v);
1094: y[7*(*idx)+2] += alpha3*(*v);
1095: y[7*(*idx)+3] += alpha4*(*v);
1096: y[7*(*idx)+4] += alpha5*(*v);
1097: y[7*(*idx)+5] += alpha6*(*v);
1098: y[7*(*idx)+6] += alpha7*(*v);
1099: idx++; v++;
1100: }
1101: }
1102: PetscLogFlops(14.0*a->nz);
1103: VecRestoreArrayRead(xx,&x);
1104: VecRestoreArray(yy,&y);
1105: return(0);
1106: }
1108: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1109: {
1110: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1111: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1112: const PetscScalar *x,*v;
1113: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1114: PetscErrorCode ierr;
1115: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1116: PetscInt n,i,jrow,j;
1119: if (yy != zz) {VecCopy(yy,zz);}
1120: VecGetArrayRead(xx,&x);
1121: VecGetArray(zz,&y);
1122: idx = a->j;
1123: v = a->a;
1124: ii = a->i;
1126: for (i=0; i<m; i++) {
1127: jrow = ii[i];
1128: n = ii[i+1] - jrow;
1129: sum1 = 0.0;
1130: sum2 = 0.0;
1131: sum3 = 0.0;
1132: sum4 = 0.0;
1133: sum5 = 0.0;
1134: sum6 = 0.0;
1135: sum7 = 0.0;
1136: for (j=0; j<n; j++) {
1137: sum1 += v[jrow]*x[7*idx[jrow]];
1138: sum2 += v[jrow]*x[7*idx[jrow]+1];
1139: sum3 += v[jrow]*x[7*idx[jrow]+2];
1140: sum4 += v[jrow]*x[7*idx[jrow]+3];
1141: sum5 += v[jrow]*x[7*idx[jrow]+4];
1142: sum6 += v[jrow]*x[7*idx[jrow]+5];
1143: sum7 += v[jrow]*x[7*idx[jrow]+6];
1144: jrow++;
1145: }
1146: y[7*i] += sum1;
1147: y[7*i+1] += sum2;
1148: y[7*i+2] += sum3;
1149: y[7*i+3] += sum4;
1150: y[7*i+4] += sum5;
1151: y[7*i+5] += sum6;
1152: y[7*i+6] += sum7;
1153: }
1155: PetscLogFlops(14.0*a->nz);
1156: VecRestoreArrayRead(xx,&x);
1157: VecRestoreArray(zz,&y);
1158: return(0);
1159: }
1161: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1162: {
1163: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1164: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1165: const PetscScalar *x,*v;
1166: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1167: PetscErrorCode ierr;
1168: const PetscInt m = b->AIJ->rmap->n,*idx;
1169: PetscInt n,i;
1172: if (yy != zz) {VecCopy(yy,zz);}
1173: VecGetArrayRead(xx,&x);
1174: VecGetArray(zz,&y);
1175: for (i=0; i<m; i++) {
1176: idx = a->j + a->i[i];
1177: v = a->a + a->i[i];
1178: n = a->i[i+1] - a->i[i];
1179: alpha1 = x[7*i];
1180: alpha2 = x[7*i+1];
1181: alpha3 = x[7*i+2];
1182: alpha4 = x[7*i+3];
1183: alpha5 = x[7*i+4];
1184: alpha6 = x[7*i+5];
1185: alpha7 = x[7*i+6];
1186: while (n-->0) {
1187: y[7*(*idx)] += alpha1*(*v);
1188: y[7*(*idx)+1] += alpha2*(*v);
1189: y[7*(*idx)+2] += alpha3*(*v);
1190: y[7*(*idx)+3] += alpha4*(*v);
1191: y[7*(*idx)+4] += alpha5*(*v);
1192: y[7*(*idx)+5] += alpha6*(*v);
1193: y[7*(*idx)+6] += alpha7*(*v);
1194: idx++; v++;
1195: }
1196: }
1197: PetscLogFlops(14.0*a->nz);
1198: VecRestoreArrayRead(xx,&x);
1199: VecRestoreArray(zz,&y);
1200: return(0);
1201: }
1203: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1204: {
1205: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1206: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1207: const PetscScalar *x,*v;
1208: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1209: PetscErrorCode ierr;
1210: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1211: PetscInt nonzerorow=0,n,i,jrow,j;
1214: VecGetArrayRead(xx,&x);
1215: VecGetArray(yy,&y);
1216: idx = a->j;
1217: v = a->a;
1218: ii = a->i;
1220: for (i=0; i<m; i++) {
1221: jrow = ii[i];
1222: n = ii[i+1] - jrow;
1223: sum1 = 0.0;
1224: sum2 = 0.0;
1225: sum3 = 0.0;
1226: sum4 = 0.0;
1227: sum5 = 0.0;
1228: sum6 = 0.0;
1229: sum7 = 0.0;
1230: sum8 = 0.0;
1232: nonzerorow += (n>0);
1233: for (j=0; j<n; j++) {
1234: sum1 += v[jrow]*x[8*idx[jrow]];
1235: sum2 += v[jrow]*x[8*idx[jrow]+1];
1236: sum3 += v[jrow]*x[8*idx[jrow]+2];
1237: sum4 += v[jrow]*x[8*idx[jrow]+3];
1238: sum5 += v[jrow]*x[8*idx[jrow]+4];
1239: sum6 += v[jrow]*x[8*idx[jrow]+5];
1240: sum7 += v[jrow]*x[8*idx[jrow]+6];
1241: sum8 += v[jrow]*x[8*idx[jrow]+7];
1242: jrow++;
1243: }
1244: y[8*i] = sum1;
1245: y[8*i+1] = sum2;
1246: y[8*i+2] = sum3;
1247: y[8*i+3] = sum4;
1248: y[8*i+4] = sum5;
1249: y[8*i+5] = sum6;
1250: y[8*i+6] = sum7;
1251: y[8*i+7] = sum8;
1252: }
1254: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1255: VecRestoreArrayRead(xx,&x);
1256: VecRestoreArray(yy,&y);
1257: return(0);
1258: }
1260: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1261: {
1262: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1263: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1264: const PetscScalar *x,*v;
1265: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1266: PetscErrorCode ierr;
1267: const PetscInt m = b->AIJ->rmap->n,*idx;
1268: PetscInt n,i;
1271: VecSet(yy,0.0);
1272: VecGetArrayRead(xx,&x);
1273: VecGetArray(yy,&y);
1275: for (i=0; i<m; i++) {
1276: idx = a->j + a->i[i];
1277: v = a->a + a->i[i];
1278: n = a->i[i+1] - a->i[i];
1279: alpha1 = x[8*i];
1280: alpha2 = x[8*i+1];
1281: alpha3 = x[8*i+2];
1282: alpha4 = x[8*i+3];
1283: alpha5 = x[8*i+4];
1284: alpha6 = x[8*i+5];
1285: alpha7 = x[8*i+6];
1286: alpha8 = x[8*i+7];
1287: while (n-->0) {
1288: y[8*(*idx)] += alpha1*(*v);
1289: y[8*(*idx)+1] += alpha2*(*v);
1290: y[8*(*idx)+2] += alpha3*(*v);
1291: y[8*(*idx)+3] += alpha4*(*v);
1292: y[8*(*idx)+4] += alpha5*(*v);
1293: y[8*(*idx)+5] += alpha6*(*v);
1294: y[8*(*idx)+6] += alpha7*(*v);
1295: y[8*(*idx)+7] += alpha8*(*v);
1296: idx++; v++;
1297: }
1298: }
1299: PetscLogFlops(16.0*a->nz);
1300: VecRestoreArrayRead(xx,&x);
1301: VecRestoreArray(yy,&y);
1302: return(0);
1303: }
1305: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1306: {
1307: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1308: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1309: const PetscScalar *x,*v;
1310: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1311: PetscErrorCode ierr;
1312: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1313: PetscInt n,i,jrow,j;
1316: if (yy != zz) {VecCopy(yy,zz);}
1317: VecGetArrayRead(xx,&x);
1318: VecGetArray(zz,&y);
1319: idx = a->j;
1320: v = a->a;
1321: ii = a->i;
1323: for (i=0; i<m; i++) {
1324: jrow = ii[i];
1325: n = ii[i+1] - jrow;
1326: sum1 = 0.0;
1327: sum2 = 0.0;
1328: sum3 = 0.0;
1329: sum4 = 0.0;
1330: sum5 = 0.0;
1331: sum6 = 0.0;
1332: sum7 = 0.0;
1333: sum8 = 0.0;
1334: for (j=0; j<n; j++) {
1335: sum1 += v[jrow]*x[8*idx[jrow]];
1336: sum2 += v[jrow]*x[8*idx[jrow]+1];
1337: sum3 += v[jrow]*x[8*idx[jrow]+2];
1338: sum4 += v[jrow]*x[8*idx[jrow]+3];
1339: sum5 += v[jrow]*x[8*idx[jrow]+4];
1340: sum6 += v[jrow]*x[8*idx[jrow]+5];
1341: sum7 += v[jrow]*x[8*idx[jrow]+6];
1342: sum8 += v[jrow]*x[8*idx[jrow]+7];
1343: jrow++;
1344: }
1345: y[8*i] += sum1;
1346: y[8*i+1] += sum2;
1347: y[8*i+2] += sum3;
1348: y[8*i+3] += sum4;
1349: y[8*i+4] += sum5;
1350: y[8*i+5] += sum6;
1351: y[8*i+6] += sum7;
1352: y[8*i+7] += sum8;
1353: }
1355: PetscLogFlops(16.0*a->nz);
1356: VecRestoreArrayRead(xx,&x);
1357: VecRestoreArray(zz,&y);
1358: return(0);
1359: }
1361: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1362: {
1363: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1364: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1365: const PetscScalar *x,*v;
1366: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1367: PetscErrorCode ierr;
1368: const PetscInt m = b->AIJ->rmap->n,*idx;
1369: PetscInt n,i;
1372: if (yy != zz) {VecCopy(yy,zz);}
1373: VecGetArrayRead(xx,&x);
1374: VecGetArray(zz,&y);
1375: for (i=0; i<m; i++) {
1376: idx = a->j + a->i[i];
1377: v = a->a + a->i[i];
1378: n = a->i[i+1] - a->i[i];
1379: alpha1 = x[8*i];
1380: alpha2 = x[8*i+1];
1381: alpha3 = x[8*i+2];
1382: alpha4 = x[8*i+3];
1383: alpha5 = x[8*i+4];
1384: alpha6 = x[8*i+5];
1385: alpha7 = x[8*i+6];
1386: alpha8 = x[8*i+7];
1387: while (n-->0) {
1388: y[8*(*idx)] += alpha1*(*v);
1389: y[8*(*idx)+1] += alpha2*(*v);
1390: y[8*(*idx)+2] += alpha3*(*v);
1391: y[8*(*idx)+3] += alpha4*(*v);
1392: y[8*(*idx)+4] += alpha5*(*v);
1393: y[8*(*idx)+5] += alpha6*(*v);
1394: y[8*(*idx)+6] += alpha7*(*v);
1395: y[8*(*idx)+7] += alpha8*(*v);
1396: idx++; v++;
1397: }
1398: }
1399: PetscLogFlops(16.0*a->nz);
1400: VecRestoreArrayRead(xx,&x);
1401: VecRestoreArray(zz,&y);
1402: return(0);
1403: }
1405: /* ------------------------------------------------------------------------------*/
1406: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1407: {
1408: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1409: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1410: const PetscScalar *x,*v;
1411: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1412: PetscErrorCode ierr;
1413: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1414: PetscInt nonzerorow=0,n,i,jrow,j;
1417: VecGetArrayRead(xx,&x);
1418: VecGetArray(yy,&y);
1419: idx = a->j;
1420: v = a->a;
1421: ii = a->i;
1423: for (i=0; i<m; i++) {
1424: jrow = ii[i];
1425: n = ii[i+1] - jrow;
1426: sum1 = 0.0;
1427: sum2 = 0.0;
1428: sum3 = 0.0;
1429: sum4 = 0.0;
1430: sum5 = 0.0;
1431: sum6 = 0.0;
1432: sum7 = 0.0;
1433: sum8 = 0.0;
1434: sum9 = 0.0;
1436: nonzerorow += (n>0);
1437: for (j=0; j<n; j++) {
1438: sum1 += v[jrow]*x[9*idx[jrow]];
1439: sum2 += v[jrow]*x[9*idx[jrow]+1];
1440: sum3 += v[jrow]*x[9*idx[jrow]+2];
1441: sum4 += v[jrow]*x[9*idx[jrow]+3];
1442: sum5 += v[jrow]*x[9*idx[jrow]+4];
1443: sum6 += v[jrow]*x[9*idx[jrow]+5];
1444: sum7 += v[jrow]*x[9*idx[jrow]+6];
1445: sum8 += v[jrow]*x[9*idx[jrow]+7];
1446: sum9 += v[jrow]*x[9*idx[jrow]+8];
1447: jrow++;
1448: }
1449: y[9*i] = sum1;
1450: y[9*i+1] = sum2;
1451: y[9*i+2] = sum3;
1452: y[9*i+3] = sum4;
1453: y[9*i+4] = sum5;
1454: y[9*i+5] = sum6;
1455: y[9*i+6] = sum7;
1456: y[9*i+7] = sum8;
1457: y[9*i+8] = sum9;
1458: }
1460: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1461: VecRestoreArrayRead(xx,&x);
1462: VecRestoreArray(yy,&y);
1463: return(0);
1464: }
1466: /* ------------------------------------------------------------------------------*/
1468: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1469: {
1470: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1471: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1472: const PetscScalar *x,*v;
1473: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1474: PetscErrorCode ierr;
1475: const PetscInt m = b->AIJ->rmap->n,*idx;
1476: PetscInt n,i;
1479: VecSet(yy,0.0);
1480: VecGetArrayRead(xx,&x);
1481: VecGetArray(yy,&y);
1483: for (i=0; i<m; i++) {
1484: idx = a->j + a->i[i];
1485: v = a->a + a->i[i];
1486: n = a->i[i+1] - a->i[i];
1487: alpha1 = x[9*i];
1488: alpha2 = x[9*i+1];
1489: alpha3 = x[9*i+2];
1490: alpha4 = x[9*i+3];
1491: alpha5 = x[9*i+4];
1492: alpha6 = x[9*i+5];
1493: alpha7 = x[9*i+6];
1494: alpha8 = x[9*i+7];
1495: alpha9 = x[9*i+8];
1496: while (n-->0) {
1497: y[9*(*idx)] += alpha1*(*v);
1498: y[9*(*idx)+1] += alpha2*(*v);
1499: y[9*(*idx)+2] += alpha3*(*v);
1500: y[9*(*idx)+3] += alpha4*(*v);
1501: y[9*(*idx)+4] += alpha5*(*v);
1502: y[9*(*idx)+5] += alpha6*(*v);
1503: y[9*(*idx)+6] += alpha7*(*v);
1504: y[9*(*idx)+7] += alpha8*(*v);
1505: y[9*(*idx)+8] += alpha9*(*v);
1506: idx++; v++;
1507: }
1508: }
1509: PetscLogFlops(18.0*a->nz);
1510: VecRestoreArrayRead(xx,&x);
1511: VecRestoreArray(yy,&y);
1512: return(0);
1513: }
1515: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1516: {
1517: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1518: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1519: const PetscScalar *x,*v;
1520: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1521: PetscErrorCode ierr;
1522: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1523: PetscInt n,i,jrow,j;
1526: if (yy != zz) {VecCopy(yy,zz);}
1527: VecGetArrayRead(xx,&x);
1528: VecGetArray(zz,&y);
1529: idx = a->j;
1530: v = a->a;
1531: ii = a->i;
1533: for (i=0; i<m; i++) {
1534: jrow = ii[i];
1535: n = ii[i+1] - jrow;
1536: sum1 = 0.0;
1537: sum2 = 0.0;
1538: sum3 = 0.0;
1539: sum4 = 0.0;
1540: sum5 = 0.0;
1541: sum6 = 0.0;
1542: sum7 = 0.0;
1543: sum8 = 0.0;
1544: sum9 = 0.0;
1545: for (j=0; j<n; j++) {
1546: sum1 += v[jrow]*x[9*idx[jrow]];
1547: sum2 += v[jrow]*x[9*idx[jrow]+1];
1548: sum3 += v[jrow]*x[9*idx[jrow]+2];
1549: sum4 += v[jrow]*x[9*idx[jrow]+3];
1550: sum5 += v[jrow]*x[9*idx[jrow]+4];
1551: sum6 += v[jrow]*x[9*idx[jrow]+5];
1552: sum7 += v[jrow]*x[9*idx[jrow]+6];
1553: sum8 += v[jrow]*x[9*idx[jrow]+7];
1554: sum9 += v[jrow]*x[9*idx[jrow]+8];
1555: jrow++;
1556: }
1557: y[9*i] += sum1;
1558: y[9*i+1] += sum2;
1559: y[9*i+2] += sum3;
1560: y[9*i+3] += sum4;
1561: y[9*i+4] += sum5;
1562: y[9*i+5] += sum6;
1563: y[9*i+6] += sum7;
1564: y[9*i+7] += sum8;
1565: y[9*i+8] += sum9;
1566: }
1568: PetscLogFlops(18.0*a->nz);
1569: VecRestoreArrayRead(xx,&x);
1570: VecRestoreArray(zz,&y);
1571: return(0);
1572: }
1574: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1575: {
1576: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1577: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1578: const PetscScalar *x,*v;
1579: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1580: PetscErrorCode ierr;
1581: const PetscInt m = b->AIJ->rmap->n,*idx;
1582: PetscInt n,i;
1585: if (yy != zz) {VecCopy(yy,zz);}
1586: VecGetArrayRead(xx,&x);
1587: VecGetArray(zz,&y);
1588: for (i=0; i<m; i++) {
1589: idx = a->j + a->i[i];
1590: v = a->a + a->i[i];
1591: n = a->i[i+1] - a->i[i];
1592: alpha1 = x[9*i];
1593: alpha2 = x[9*i+1];
1594: alpha3 = x[9*i+2];
1595: alpha4 = x[9*i+3];
1596: alpha5 = x[9*i+4];
1597: alpha6 = x[9*i+5];
1598: alpha7 = x[9*i+6];
1599: alpha8 = x[9*i+7];
1600: alpha9 = x[9*i+8];
1601: while (n-->0) {
1602: y[9*(*idx)] += alpha1*(*v);
1603: y[9*(*idx)+1] += alpha2*(*v);
1604: y[9*(*idx)+2] += alpha3*(*v);
1605: y[9*(*idx)+3] += alpha4*(*v);
1606: y[9*(*idx)+4] += alpha5*(*v);
1607: y[9*(*idx)+5] += alpha6*(*v);
1608: y[9*(*idx)+6] += alpha7*(*v);
1609: y[9*(*idx)+7] += alpha8*(*v);
1610: y[9*(*idx)+8] += alpha9*(*v);
1611: idx++; v++;
1612: }
1613: }
1614: PetscLogFlops(18.0*a->nz);
1615: VecRestoreArrayRead(xx,&x);
1616: VecRestoreArray(zz,&y);
1617: return(0);
1618: }
1619: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1620: {
1621: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1622: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1623: const PetscScalar *x,*v;
1624: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1625: PetscErrorCode ierr;
1626: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1627: PetscInt nonzerorow=0,n,i,jrow,j;
1630: VecGetArrayRead(xx,&x);
1631: VecGetArray(yy,&y);
1632: idx = a->j;
1633: v = a->a;
1634: ii = a->i;
1636: for (i=0; i<m; i++) {
1637: jrow = ii[i];
1638: n = ii[i+1] - jrow;
1639: sum1 = 0.0;
1640: sum2 = 0.0;
1641: sum3 = 0.0;
1642: sum4 = 0.0;
1643: sum5 = 0.0;
1644: sum6 = 0.0;
1645: sum7 = 0.0;
1646: sum8 = 0.0;
1647: sum9 = 0.0;
1648: sum10 = 0.0;
1650: nonzerorow += (n>0);
1651: for (j=0; j<n; j++) {
1652: sum1 += v[jrow]*x[10*idx[jrow]];
1653: sum2 += v[jrow]*x[10*idx[jrow]+1];
1654: sum3 += v[jrow]*x[10*idx[jrow]+2];
1655: sum4 += v[jrow]*x[10*idx[jrow]+3];
1656: sum5 += v[jrow]*x[10*idx[jrow]+4];
1657: sum6 += v[jrow]*x[10*idx[jrow]+5];
1658: sum7 += v[jrow]*x[10*idx[jrow]+6];
1659: sum8 += v[jrow]*x[10*idx[jrow]+7];
1660: sum9 += v[jrow]*x[10*idx[jrow]+8];
1661: sum10 += v[jrow]*x[10*idx[jrow]+9];
1662: jrow++;
1663: }
1664: y[10*i] = sum1;
1665: y[10*i+1] = sum2;
1666: y[10*i+2] = sum3;
1667: y[10*i+3] = sum4;
1668: y[10*i+4] = sum5;
1669: y[10*i+5] = sum6;
1670: y[10*i+6] = sum7;
1671: y[10*i+7] = sum8;
1672: y[10*i+8] = sum9;
1673: y[10*i+9] = sum10;
1674: }
1676: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1677: VecRestoreArrayRead(xx,&x);
1678: VecRestoreArray(yy,&y);
1679: return(0);
1680: }
1682: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1683: {
1684: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1685: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1686: const PetscScalar *x,*v;
1687: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1688: PetscErrorCode ierr;
1689: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1690: PetscInt n,i,jrow,j;
1693: if (yy != zz) {VecCopy(yy,zz);}
1694: VecGetArrayRead(xx,&x);
1695: VecGetArray(zz,&y);
1696: idx = a->j;
1697: v = a->a;
1698: ii = a->i;
1700: for (i=0; i<m; i++) {
1701: jrow = ii[i];
1702: n = ii[i+1] - jrow;
1703: sum1 = 0.0;
1704: sum2 = 0.0;
1705: sum3 = 0.0;
1706: sum4 = 0.0;
1707: sum5 = 0.0;
1708: sum6 = 0.0;
1709: sum7 = 0.0;
1710: sum8 = 0.0;
1711: sum9 = 0.0;
1712: sum10 = 0.0;
1713: for (j=0; j<n; j++) {
1714: sum1 += v[jrow]*x[10*idx[jrow]];
1715: sum2 += v[jrow]*x[10*idx[jrow]+1];
1716: sum3 += v[jrow]*x[10*idx[jrow]+2];
1717: sum4 += v[jrow]*x[10*idx[jrow]+3];
1718: sum5 += v[jrow]*x[10*idx[jrow]+4];
1719: sum6 += v[jrow]*x[10*idx[jrow]+5];
1720: sum7 += v[jrow]*x[10*idx[jrow]+6];
1721: sum8 += v[jrow]*x[10*idx[jrow]+7];
1722: sum9 += v[jrow]*x[10*idx[jrow]+8];
1723: sum10 += v[jrow]*x[10*idx[jrow]+9];
1724: jrow++;
1725: }
1726: y[10*i] += sum1;
1727: y[10*i+1] += sum2;
1728: y[10*i+2] += sum3;
1729: y[10*i+3] += sum4;
1730: y[10*i+4] += sum5;
1731: y[10*i+5] += sum6;
1732: y[10*i+6] += sum7;
1733: y[10*i+7] += sum8;
1734: y[10*i+8] += sum9;
1735: y[10*i+9] += sum10;
1736: }
1738: PetscLogFlops(20.0*a->nz);
1739: VecRestoreArrayRead(xx,&x);
1740: VecRestoreArray(yy,&y);
1741: return(0);
1742: }
1744: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1745: {
1746: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1747: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1748: const PetscScalar *x,*v;
1749: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1750: PetscErrorCode ierr;
1751: const PetscInt m = b->AIJ->rmap->n,*idx;
1752: PetscInt n,i;
1755: VecSet(yy,0.0);
1756: VecGetArrayRead(xx,&x);
1757: VecGetArray(yy,&y);
1759: for (i=0; i<m; i++) {
1760: idx = a->j + a->i[i];
1761: v = a->a + a->i[i];
1762: n = a->i[i+1] - a->i[i];
1763: alpha1 = x[10*i];
1764: alpha2 = x[10*i+1];
1765: alpha3 = x[10*i+2];
1766: alpha4 = x[10*i+3];
1767: alpha5 = x[10*i+4];
1768: alpha6 = x[10*i+5];
1769: alpha7 = x[10*i+6];
1770: alpha8 = x[10*i+7];
1771: alpha9 = x[10*i+8];
1772: alpha10 = x[10*i+9];
1773: while (n-->0) {
1774: y[10*(*idx)] += alpha1*(*v);
1775: y[10*(*idx)+1] += alpha2*(*v);
1776: y[10*(*idx)+2] += alpha3*(*v);
1777: y[10*(*idx)+3] += alpha4*(*v);
1778: y[10*(*idx)+4] += alpha5*(*v);
1779: y[10*(*idx)+5] += alpha6*(*v);
1780: y[10*(*idx)+6] += alpha7*(*v);
1781: y[10*(*idx)+7] += alpha8*(*v);
1782: y[10*(*idx)+8] += alpha9*(*v);
1783: y[10*(*idx)+9] += alpha10*(*v);
1784: idx++; v++;
1785: }
1786: }
1787: PetscLogFlops(20.0*a->nz);
1788: VecRestoreArrayRead(xx,&x);
1789: VecRestoreArray(yy,&y);
1790: return(0);
1791: }
1793: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1794: {
1795: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1796: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1797: const PetscScalar *x,*v;
1798: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1799: PetscErrorCode ierr;
1800: const PetscInt m = b->AIJ->rmap->n,*idx;
1801: PetscInt n,i;
1804: if (yy != zz) {VecCopy(yy,zz);}
1805: VecGetArrayRead(xx,&x);
1806: VecGetArray(zz,&y);
1807: for (i=0; i<m; i++) {
1808: idx = a->j + a->i[i];
1809: v = a->a + a->i[i];
1810: n = a->i[i+1] - a->i[i];
1811: alpha1 = x[10*i];
1812: alpha2 = x[10*i+1];
1813: alpha3 = x[10*i+2];
1814: alpha4 = x[10*i+3];
1815: alpha5 = x[10*i+4];
1816: alpha6 = x[10*i+5];
1817: alpha7 = x[10*i+6];
1818: alpha8 = x[10*i+7];
1819: alpha9 = x[10*i+8];
1820: alpha10 = x[10*i+9];
1821: while (n-->0) {
1822: y[10*(*idx)] += alpha1*(*v);
1823: y[10*(*idx)+1] += alpha2*(*v);
1824: y[10*(*idx)+2] += alpha3*(*v);
1825: y[10*(*idx)+3] += alpha4*(*v);
1826: y[10*(*idx)+4] += alpha5*(*v);
1827: y[10*(*idx)+5] += alpha6*(*v);
1828: y[10*(*idx)+6] += alpha7*(*v);
1829: y[10*(*idx)+7] += alpha8*(*v);
1830: y[10*(*idx)+8] += alpha9*(*v);
1831: y[10*(*idx)+9] += alpha10*(*v);
1832: idx++; v++;
1833: }
1834: }
1835: PetscLogFlops(20.0*a->nz);
1836: VecRestoreArrayRead(xx,&x);
1837: VecRestoreArray(zz,&y);
1838: return(0);
1839: }
1841: /*--------------------------------------------------------------------------------------------*/
1842: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1843: {
1844: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1845: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1846: const PetscScalar *x,*v;
1847: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1848: PetscErrorCode ierr;
1849: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1850: PetscInt nonzerorow=0,n,i,jrow,j;
1853: VecGetArrayRead(xx,&x);
1854: VecGetArray(yy,&y);
1855: idx = a->j;
1856: v = a->a;
1857: ii = a->i;
1859: for (i=0; i<m; i++) {
1860: jrow = ii[i];
1861: n = ii[i+1] - jrow;
1862: sum1 = 0.0;
1863: sum2 = 0.0;
1864: sum3 = 0.0;
1865: sum4 = 0.0;
1866: sum5 = 0.0;
1867: sum6 = 0.0;
1868: sum7 = 0.0;
1869: sum8 = 0.0;
1870: sum9 = 0.0;
1871: sum10 = 0.0;
1872: sum11 = 0.0;
1874: nonzerorow += (n>0);
1875: for (j=0; j<n; j++) {
1876: sum1 += v[jrow]*x[11*idx[jrow]];
1877: sum2 += v[jrow]*x[11*idx[jrow]+1];
1878: sum3 += v[jrow]*x[11*idx[jrow]+2];
1879: sum4 += v[jrow]*x[11*idx[jrow]+3];
1880: sum5 += v[jrow]*x[11*idx[jrow]+4];
1881: sum6 += v[jrow]*x[11*idx[jrow]+5];
1882: sum7 += v[jrow]*x[11*idx[jrow]+6];
1883: sum8 += v[jrow]*x[11*idx[jrow]+7];
1884: sum9 += v[jrow]*x[11*idx[jrow]+8];
1885: sum10 += v[jrow]*x[11*idx[jrow]+9];
1886: sum11 += v[jrow]*x[11*idx[jrow]+10];
1887: jrow++;
1888: }
1889: y[11*i] = sum1;
1890: y[11*i+1] = sum2;
1891: y[11*i+2] = sum3;
1892: y[11*i+3] = sum4;
1893: y[11*i+4] = sum5;
1894: y[11*i+5] = sum6;
1895: y[11*i+6] = sum7;
1896: y[11*i+7] = sum8;
1897: y[11*i+8] = sum9;
1898: y[11*i+9] = sum10;
1899: y[11*i+10] = sum11;
1900: }
1902: PetscLogFlops(22.0*a->nz - 11*nonzerorow);
1903: VecRestoreArrayRead(xx,&x);
1904: VecRestoreArray(yy,&y);
1905: return(0);
1906: }
1908: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1909: {
1910: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1911: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1912: const PetscScalar *x,*v;
1913: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1914: PetscErrorCode ierr;
1915: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1916: PetscInt n,i,jrow,j;
1919: if (yy != zz) {VecCopy(yy,zz);}
1920: VecGetArrayRead(xx,&x);
1921: VecGetArray(zz,&y);
1922: idx = a->j;
1923: v = a->a;
1924: ii = a->i;
1926: for (i=0; i<m; i++) {
1927: jrow = ii[i];
1928: n = ii[i+1] - jrow;
1929: sum1 = 0.0;
1930: sum2 = 0.0;
1931: sum3 = 0.0;
1932: sum4 = 0.0;
1933: sum5 = 0.0;
1934: sum6 = 0.0;
1935: sum7 = 0.0;
1936: sum8 = 0.0;
1937: sum9 = 0.0;
1938: sum10 = 0.0;
1939: sum11 = 0.0;
1940: for (j=0; j<n; j++) {
1941: sum1 += v[jrow]*x[11*idx[jrow]];
1942: sum2 += v[jrow]*x[11*idx[jrow]+1];
1943: sum3 += v[jrow]*x[11*idx[jrow]+2];
1944: sum4 += v[jrow]*x[11*idx[jrow]+3];
1945: sum5 += v[jrow]*x[11*idx[jrow]+4];
1946: sum6 += v[jrow]*x[11*idx[jrow]+5];
1947: sum7 += v[jrow]*x[11*idx[jrow]+6];
1948: sum8 += v[jrow]*x[11*idx[jrow]+7];
1949: sum9 += v[jrow]*x[11*idx[jrow]+8];
1950: sum10 += v[jrow]*x[11*idx[jrow]+9];
1951: sum11 += v[jrow]*x[11*idx[jrow]+10];
1952: jrow++;
1953: }
1954: y[11*i] += sum1;
1955: y[11*i+1] += sum2;
1956: y[11*i+2] += sum3;
1957: y[11*i+3] += sum4;
1958: y[11*i+4] += sum5;
1959: y[11*i+5] += sum6;
1960: y[11*i+6] += sum7;
1961: y[11*i+7] += sum8;
1962: y[11*i+8] += sum9;
1963: y[11*i+9] += sum10;
1964: y[11*i+10] += sum11;
1965: }
1967: PetscLogFlops(22.0*a->nz);
1968: VecRestoreArrayRead(xx,&x);
1969: VecRestoreArray(yy,&y);
1970: return(0);
1971: }
1973: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1974: {
1975: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1976: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1977: const PetscScalar *x,*v;
1978: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
1979: PetscErrorCode ierr;
1980: const PetscInt m = b->AIJ->rmap->n,*idx;
1981: PetscInt n,i;
1984: VecSet(yy,0.0);
1985: VecGetArrayRead(xx,&x);
1986: VecGetArray(yy,&y);
1988: for (i=0; i<m; i++) {
1989: idx = a->j + a->i[i];
1990: v = a->a + a->i[i];
1991: n = a->i[i+1] - a->i[i];
1992: alpha1 = x[11*i];
1993: alpha2 = x[11*i+1];
1994: alpha3 = x[11*i+2];
1995: alpha4 = x[11*i+3];
1996: alpha5 = x[11*i+4];
1997: alpha6 = x[11*i+5];
1998: alpha7 = x[11*i+6];
1999: alpha8 = x[11*i+7];
2000: alpha9 = x[11*i+8];
2001: alpha10 = x[11*i+9];
2002: alpha11 = x[11*i+10];
2003: while (n-->0) {
2004: y[11*(*idx)] += alpha1*(*v);
2005: y[11*(*idx)+1] += alpha2*(*v);
2006: y[11*(*idx)+2] += alpha3*(*v);
2007: y[11*(*idx)+3] += alpha4*(*v);
2008: y[11*(*idx)+4] += alpha5*(*v);
2009: y[11*(*idx)+5] += alpha6*(*v);
2010: y[11*(*idx)+6] += alpha7*(*v);
2011: y[11*(*idx)+7] += alpha8*(*v);
2012: y[11*(*idx)+8] += alpha9*(*v);
2013: y[11*(*idx)+9] += alpha10*(*v);
2014: y[11*(*idx)+10] += alpha11*(*v);
2015: idx++; v++;
2016: }
2017: }
2018: PetscLogFlops(22.0*a->nz);
2019: VecRestoreArrayRead(xx,&x);
2020: VecRestoreArray(yy,&y);
2021: return(0);
2022: }
2024: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2025: {
2026: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2027: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2028: const PetscScalar *x,*v;
2029: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2030: PetscErrorCode ierr;
2031: const PetscInt m = b->AIJ->rmap->n,*idx;
2032: PetscInt n,i;
2035: if (yy != zz) {VecCopy(yy,zz);}
2036: VecGetArrayRead(xx,&x);
2037: VecGetArray(zz,&y);
2038: for (i=0; i<m; i++) {
2039: idx = a->j + a->i[i];
2040: v = a->a + a->i[i];
2041: n = a->i[i+1] - a->i[i];
2042: alpha1 = x[11*i];
2043: alpha2 = x[11*i+1];
2044: alpha3 = x[11*i+2];
2045: alpha4 = x[11*i+3];
2046: alpha5 = x[11*i+4];
2047: alpha6 = x[11*i+5];
2048: alpha7 = x[11*i+6];
2049: alpha8 = x[11*i+7];
2050: alpha9 = x[11*i+8];
2051: alpha10 = x[11*i+9];
2052: alpha11 = x[11*i+10];
2053: while (n-->0) {
2054: y[11*(*idx)] += alpha1*(*v);
2055: y[11*(*idx)+1] += alpha2*(*v);
2056: y[11*(*idx)+2] += alpha3*(*v);
2057: y[11*(*idx)+3] += alpha4*(*v);
2058: y[11*(*idx)+4] += alpha5*(*v);
2059: y[11*(*idx)+5] += alpha6*(*v);
2060: y[11*(*idx)+6] += alpha7*(*v);
2061: y[11*(*idx)+7] += alpha8*(*v);
2062: y[11*(*idx)+8] += alpha9*(*v);
2063: y[11*(*idx)+9] += alpha10*(*v);
2064: y[11*(*idx)+10] += alpha11*(*v);
2065: idx++; v++;
2066: }
2067: }
2068: PetscLogFlops(22.0*a->nz);
2069: VecRestoreArrayRead(xx,&x);
2070: VecRestoreArray(zz,&y);
2071: return(0);
2072: }
2074: /*--------------------------------------------------------------------------------------------*/
2075: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2076: {
2077: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2078: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2079: const PetscScalar *x,*v;
2080: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2081: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2082: PetscErrorCode ierr;
2083: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2084: PetscInt nonzerorow=0,n,i,jrow,j;
2087: VecGetArrayRead(xx,&x);
2088: VecGetArray(yy,&y);
2089: idx = a->j;
2090: v = a->a;
2091: ii = a->i;
2093: for (i=0; i<m; i++) {
2094: jrow = ii[i];
2095: n = ii[i+1] - jrow;
2096: sum1 = 0.0;
2097: sum2 = 0.0;
2098: sum3 = 0.0;
2099: sum4 = 0.0;
2100: sum5 = 0.0;
2101: sum6 = 0.0;
2102: sum7 = 0.0;
2103: sum8 = 0.0;
2104: sum9 = 0.0;
2105: sum10 = 0.0;
2106: sum11 = 0.0;
2107: sum12 = 0.0;
2108: sum13 = 0.0;
2109: sum14 = 0.0;
2110: sum15 = 0.0;
2111: sum16 = 0.0;
2113: nonzerorow += (n>0);
2114: for (j=0; j<n; j++) {
2115: sum1 += v[jrow]*x[16*idx[jrow]];
2116: sum2 += v[jrow]*x[16*idx[jrow]+1];
2117: sum3 += v[jrow]*x[16*idx[jrow]+2];
2118: sum4 += v[jrow]*x[16*idx[jrow]+3];
2119: sum5 += v[jrow]*x[16*idx[jrow]+4];
2120: sum6 += v[jrow]*x[16*idx[jrow]+5];
2121: sum7 += v[jrow]*x[16*idx[jrow]+6];
2122: sum8 += v[jrow]*x[16*idx[jrow]+7];
2123: sum9 += v[jrow]*x[16*idx[jrow]+8];
2124: sum10 += v[jrow]*x[16*idx[jrow]+9];
2125: sum11 += v[jrow]*x[16*idx[jrow]+10];
2126: sum12 += v[jrow]*x[16*idx[jrow]+11];
2127: sum13 += v[jrow]*x[16*idx[jrow]+12];
2128: sum14 += v[jrow]*x[16*idx[jrow]+13];
2129: sum15 += v[jrow]*x[16*idx[jrow]+14];
2130: sum16 += v[jrow]*x[16*idx[jrow]+15];
2131: jrow++;
2132: }
2133: y[16*i] = sum1;
2134: y[16*i+1] = sum2;
2135: y[16*i+2] = sum3;
2136: y[16*i+3] = sum4;
2137: y[16*i+4] = sum5;
2138: y[16*i+5] = sum6;
2139: y[16*i+6] = sum7;
2140: y[16*i+7] = sum8;
2141: y[16*i+8] = sum9;
2142: y[16*i+9] = sum10;
2143: y[16*i+10] = sum11;
2144: y[16*i+11] = sum12;
2145: y[16*i+12] = sum13;
2146: y[16*i+13] = sum14;
2147: y[16*i+14] = sum15;
2148: y[16*i+15] = sum16;
2149: }
2151: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2152: VecRestoreArrayRead(xx,&x);
2153: VecRestoreArray(yy,&y);
2154: return(0);
2155: }
2157: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2158: {
2159: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2160: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2161: const PetscScalar *x,*v;
2162: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2163: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2164: PetscErrorCode ierr;
2165: const PetscInt m = b->AIJ->rmap->n,*idx;
2166: PetscInt n,i;
2169: VecSet(yy,0.0);
2170: VecGetArrayRead(xx,&x);
2171: VecGetArray(yy,&y);
2173: for (i=0; i<m; i++) {
2174: idx = a->j + a->i[i];
2175: v = a->a + a->i[i];
2176: n = a->i[i+1] - a->i[i];
2177: alpha1 = x[16*i];
2178: alpha2 = x[16*i+1];
2179: alpha3 = x[16*i+2];
2180: alpha4 = x[16*i+3];
2181: alpha5 = x[16*i+4];
2182: alpha6 = x[16*i+5];
2183: alpha7 = x[16*i+6];
2184: alpha8 = x[16*i+7];
2185: alpha9 = x[16*i+8];
2186: alpha10 = x[16*i+9];
2187: alpha11 = x[16*i+10];
2188: alpha12 = x[16*i+11];
2189: alpha13 = x[16*i+12];
2190: alpha14 = x[16*i+13];
2191: alpha15 = x[16*i+14];
2192: alpha16 = x[16*i+15];
2193: while (n-->0) {
2194: y[16*(*idx)] += alpha1*(*v);
2195: y[16*(*idx)+1] += alpha2*(*v);
2196: y[16*(*idx)+2] += alpha3*(*v);
2197: y[16*(*idx)+3] += alpha4*(*v);
2198: y[16*(*idx)+4] += alpha5*(*v);
2199: y[16*(*idx)+5] += alpha6*(*v);
2200: y[16*(*idx)+6] += alpha7*(*v);
2201: y[16*(*idx)+7] += alpha8*(*v);
2202: y[16*(*idx)+8] += alpha9*(*v);
2203: y[16*(*idx)+9] += alpha10*(*v);
2204: y[16*(*idx)+10] += alpha11*(*v);
2205: y[16*(*idx)+11] += alpha12*(*v);
2206: y[16*(*idx)+12] += alpha13*(*v);
2207: y[16*(*idx)+13] += alpha14*(*v);
2208: y[16*(*idx)+14] += alpha15*(*v);
2209: y[16*(*idx)+15] += alpha16*(*v);
2210: idx++; v++;
2211: }
2212: }
2213: PetscLogFlops(32.0*a->nz);
2214: VecRestoreArrayRead(xx,&x);
2215: VecRestoreArray(yy,&y);
2216: return(0);
2217: }
2219: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2220: {
2221: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2222: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2223: const PetscScalar *x,*v;
2224: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2225: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2226: PetscErrorCode ierr;
2227: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2228: PetscInt n,i,jrow,j;
2231: if (yy != zz) {VecCopy(yy,zz);}
2232: VecGetArrayRead(xx,&x);
2233: VecGetArray(zz,&y);
2234: idx = a->j;
2235: v = a->a;
2236: ii = a->i;
2238: for (i=0; i<m; i++) {
2239: jrow = ii[i];
2240: n = ii[i+1] - jrow;
2241: sum1 = 0.0;
2242: sum2 = 0.0;
2243: sum3 = 0.0;
2244: sum4 = 0.0;
2245: sum5 = 0.0;
2246: sum6 = 0.0;
2247: sum7 = 0.0;
2248: sum8 = 0.0;
2249: sum9 = 0.0;
2250: sum10 = 0.0;
2251: sum11 = 0.0;
2252: sum12 = 0.0;
2253: sum13 = 0.0;
2254: sum14 = 0.0;
2255: sum15 = 0.0;
2256: sum16 = 0.0;
2257: for (j=0; j<n; j++) {
2258: sum1 += v[jrow]*x[16*idx[jrow]];
2259: sum2 += v[jrow]*x[16*idx[jrow]+1];
2260: sum3 += v[jrow]*x[16*idx[jrow]+2];
2261: sum4 += v[jrow]*x[16*idx[jrow]+3];
2262: sum5 += v[jrow]*x[16*idx[jrow]+4];
2263: sum6 += v[jrow]*x[16*idx[jrow]+5];
2264: sum7 += v[jrow]*x[16*idx[jrow]+6];
2265: sum8 += v[jrow]*x[16*idx[jrow]+7];
2266: sum9 += v[jrow]*x[16*idx[jrow]+8];
2267: sum10 += v[jrow]*x[16*idx[jrow]+9];
2268: sum11 += v[jrow]*x[16*idx[jrow]+10];
2269: sum12 += v[jrow]*x[16*idx[jrow]+11];
2270: sum13 += v[jrow]*x[16*idx[jrow]+12];
2271: sum14 += v[jrow]*x[16*idx[jrow]+13];
2272: sum15 += v[jrow]*x[16*idx[jrow]+14];
2273: sum16 += v[jrow]*x[16*idx[jrow]+15];
2274: jrow++;
2275: }
2276: y[16*i] += sum1;
2277: y[16*i+1] += sum2;
2278: y[16*i+2] += sum3;
2279: y[16*i+3] += sum4;
2280: y[16*i+4] += sum5;
2281: y[16*i+5] += sum6;
2282: y[16*i+6] += sum7;
2283: y[16*i+7] += sum8;
2284: y[16*i+8] += sum9;
2285: y[16*i+9] += sum10;
2286: y[16*i+10] += sum11;
2287: y[16*i+11] += sum12;
2288: y[16*i+12] += sum13;
2289: y[16*i+13] += sum14;
2290: y[16*i+14] += sum15;
2291: y[16*i+15] += sum16;
2292: }
2294: PetscLogFlops(32.0*a->nz);
2295: VecRestoreArrayRead(xx,&x);
2296: VecRestoreArray(zz,&y);
2297: return(0);
2298: }
2300: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2301: {
2302: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2303: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2304: const PetscScalar *x,*v;
2305: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2306: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2307: PetscErrorCode ierr;
2308: const PetscInt m = b->AIJ->rmap->n,*idx;
2309: PetscInt n,i;
2312: if (yy != zz) {VecCopy(yy,zz);}
2313: VecGetArrayRead(xx,&x);
2314: VecGetArray(zz,&y);
2315: for (i=0; i<m; i++) {
2316: idx = a->j + a->i[i];
2317: v = a->a + a->i[i];
2318: n = a->i[i+1] - a->i[i];
2319: alpha1 = x[16*i];
2320: alpha2 = x[16*i+1];
2321: alpha3 = x[16*i+2];
2322: alpha4 = x[16*i+3];
2323: alpha5 = x[16*i+4];
2324: alpha6 = x[16*i+5];
2325: alpha7 = x[16*i+6];
2326: alpha8 = x[16*i+7];
2327: alpha9 = x[16*i+8];
2328: alpha10 = x[16*i+9];
2329: alpha11 = x[16*i+10];
2330: alpha12 = x[16*i+11];
2331: alpha13 = x[16*i+12];
2332: alpha14 = x[16*i+13];
2333: alpha15 = x[16*i+14];
2334: alpha16 = x[16*i+15];
2335: while (n-->0) {
2336: y[16*(*idx)] += alpha1*(*v);
2337: y[16*(*idx)+1] += alpha2*(*v);
2338: y[16*(*idx)+2] += alpha3*(*v);
2339: y[16*(*idx)+3] += alpha4*(*v);
2340: y[16*(*idx)+4] += alpha5*(*v);
2341: y[16*(*idx)+5] += alpha6*(*v);
2342: y[16*(*idx)+6] += alpha7*(*v);
2343: y[16*(*idx)+7] += alpha8*(*v);
2344: y[16*(*idx)+8] += alpha9*(*v);
2345: y[16*(*idx)+9] += alpha10*(*v);
2346: y[16*(*idx)+10] += alpha11*(*v);
2347: y[16*(*idx)+11] += alpha12*(*v);
2348: y[16*(*idx)+12] += alpha13*(*v);
2349: y[16*(*idx)+13] += alpha14*(*v);
2350: y[16*(*idx)+14] += alpha15*(*v);
2351: y[16*(*idx)+15] += alpha16*(*v);
2352: idx++; v++;
2353: }
2354: }
2355: PetscLogFlops(32.0*a->nz);
2356: VecRestoreArrayRead(xx,&x);
2357: VecRestoreArray(zz,&y);
2358: return(0);
2359: }
2361: /*--------------------------------------------------------------------------------------------*/
2362: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2363: {
2364: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2365: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2366: const PetscScalar *x,*v;
2367: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2368: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2369: PetscErrorCode ierr;
2370: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2371: PetscInt nonzerorow=0,n,i,jrow,j;
2374: VecGetArrayRead(xx,&x);
2375: VecGetArray(yy,&y);
2376: idx = a->j;
2377: v = a->a;
2378: ii = a->i;
2380: for (i=0; i<m; i++) {
2381: jrow = ii[i];
2382: n = ii[i+1] - jrow;
2383: sum1 = 0.0;
2384: sum2 = 0.0;
2385: sum3 = 0.0;
2386: sum4 = 0.0;
2387: sum5 = 0.0;
2388: sum6 = 0.0;
2389: sum7 = 0.0;
2390: sum8 = 0.0;
2391: sum9 = 0.0;
2392: sum10 = 0.0;
2393: sum11 = 0.0;
2394: sum12 = 0.0;
2395: sum13 = 0.0;
2396: sum14 = 0.0;
2397: sum15 = 0.0;
2398: sum16 = 0.0;
2399: sum17 = 0.0;
2400: sum18 = 0.0;
2402: nonzerorow += (n>0);
2403: for (j=0; j<n; j++) {
2404: sum1 += v[jrow]*x[18*idx[jrow]];
2405: sum2 += v[jrow]*x[18*idx[jrow]+1];
2406: sum3 += v[jrow]*x[18*idx[jrow]+2];
2407: sum4 += v[jrow]*x[18*idx[jrow]+3];
2408: sum5 += v[jrow]*x[18*idx[jrow]+4];
2409: sum6 += v[jrow]*x[18*idx[jrow]+5];
2410: sum7 += v[jrow]*x[18*idx[jrow]+6];
2411: sum8 += v[jrow]*x[18*idx[jrow]+7];
2412: sum9 += v[jrow]*x[18*idx[jrow]+8];
2413: sum10 += v[jrow]*x[18*idx[jrow]+9];
2414: sum11 += v[jrow]*x[18*idx[jrow]+10];
2415: sum12 += v[jrow]*x[18*idx[jrow]+11];
2416: sum13 += v[jrow]*x[18*idx[jrow]+12];
2417: sum14 += v[jrow]*x[18*idx[jrow]+13];
2418: sum15 += v[jrow]*x[18*idx[jrow]+14];
2419: sum16 += v[jrow]*x[18*idx[jrow]+15];
2420: sum17 += v[jrow]*x[18*idx[jrow]+16];
2421: sum18 += v[jrow]*x[18*idx[jrow]+17];
2422: jrow++;
2423: }
2424: y[18*i] = sum1;
2425: y[18*i+1] = sum2;
2426: y[18*i+2] = sum3;
2427: y[18*i+3] = sum4;
2428: y[18*i+4] = sum5;
2429: y[18*i+5] = sum6;
2430: y[18*i+6] = sum7;
2431: y[18*i+7] = sum8;
2432: y[18*i+8] = sum9;
2433: y[18*i+9] = sum10;
2434: y[18*i+10] = sum11;
2435: y[18*i+11] = sum12;
2436: y[18*i+12] = sum13;
2437: y[18*i+13] = sum14;
2438: y[18*i+14] = sum15;
2439: y[18*i+15] = sum16;
2440: y[18*i+16] = sum17;
2441: y[18*i+17] = sum18;
2442: }
2444: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2445: VecRestoreArrayRead(xx,&x);
2446: VecRestoreArray(yy,&y);
2447: return(0);
2448: }
2450: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451: {
2452: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2453: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2454: const PetscScalar *x,*v;
2455: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2456: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2457: PetscErrorCode ierr;
2458: const PetscInt m = b->AIJ->rmap->n,*idx;
2459: PetscInt n,i;
2462: VecSet(yy,0.0);
2463: VecGetArrayRead(xx,&x);
2464: VecGetArray(yy,&y);
2466: for (i=0; i<m; i++) {
2467: idx = a->j + a->i[i];
2468: v = a->a + a->i[i];
2469: n = a->i[i+1] - a->i[i];
2470: alpha1 = x[18*i];
2471: alpha2 = x[18*i+1];
2472: alpha3 = x[18*i+2];
2473: alpha4 = x[18*i+3];
2474: alpha5 = x[18*i+4];
2475: alpha6 = x[18*i+5];
2476: alpha7 = x[18*i+6];
2477: alpha8 = x[18*i+7];
2478: alpha9 = x[18*i+8];
2479: alpha10 = x[18*i+9];
2480: alpha11 = x[18*i+10];
2481: alpha12 = x[18*i+11];
2482: alpha13 = x[18*i+12];
2483: alpha14 = x[18*i+13];
2484: alpha15 = x[18*i+14];
2485: alpha16 = x[18*i+15];
2486: alpha17 = x[18*i+16];
2487: alpha18 = x[18*i+17];
2488: while (n-->0) {
2489: y[18*(*idx)] += alpha1*(*v);
2490: y[18*(*idx)+1] += alpha2*(*v);
2491: y[18*(*idx)+2] += alpha3*(*v);
2492: y[18*(*idx)+3] += alpha4*(*v);
2493: y[18*(*idx)+4] += alpha5*(*v);
2494: y[18*(*idx)+5] += alpha6*(*v);
2495: y[18*(*idx)+6] += alpha7*(*v);
2496: y[18*(*idx)+7] += alpha8*(*v);
2497: y[18*(*idx)+8] += alpha9*(*v);
2498: y[18*(*idx)+9] += alpha10*(*v);
2499: y[18*(*idx)+10] += alpha11*(*v);
2500: y[18*(*idx)+11] += alpha12*(*v);
2501: y[18*(*idx)+12] += alpha13*(*v);
2502: y[18*(*idx)+13] += alpha14*(*v);
2503: y[18*(*idx)+14] += alpha15*(*v);
2504: y[18*(*idx)+15] += alpha16*(*v);
2505: y[18*(*idx)+16] += alpha17*(*v);
2506: y[18*(*idx)+17] += alpha18*(*v);
2507: idx++; v++;
2508: }
2509: }
2510: PetscLogFlops(36.0*a->nz);
2511: VecRestoreArrayRead(xx,&x);
2512: VecRestoreArray(yy,&y);
2513: return(0);
2514: }
2516: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2517: {
2518: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2519: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2520: const PetscScalar *x,*v;
2521: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2522: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2523: PetscErrorCode ierr;
2524: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2525: PetscInt n,i,jrow,j;
2528: if (yy != zz) {VecCopy(yy,zz);}
2529: VecGetArrayRead(xx,&x);
2530: VecGetArray(zz,&y);
2531: idx = a->j;
2532: v = a->a;
2533: ii = a->i;
2535: for (i=0; i<m; i++) {
2536: jrow = ii[i];
2537: n = ii[i+1] - jrow;
2538: sum1 = 0.0;
2539: sum2 = 0.0;
2540: sum3 = 0.0;
2541: sum4 = 0.0;
2542: sum5 = 0.0;
2543: sum6 = 0.0;
2544: sum7 = 0.0;
2545: sum8 = 0.0;
2546: sum9 = 0.0;
2547: sum10 = 0.0;
2548: sum11 = 0.0;
2549: sum12 = 0.0;
2550: sum13 = 0.0;
2551: sum14 = 0.0;
2552: sum15 = 0.0;
2553: sum16 = 0.0;
2554: sum17 = 0.0;
2555: sum18 = 0.0;
2556: for (j=0; j<n; j++) {
2557: sum1 += v[jrow]*x[18*idx[jrow]];
2558: sum2 += v[jrow]*x[18*idx[jrow]+1];
2559: sum3 += v[jrow]*x[18*idx[jrow]+2];
2560: sum4 += v[jrow]*x[18*idx[jrow]+3];
2561: sum5 += v[jrow]*x[18*idx[jrow]+4];
2562: sum6 += v[jrow]*x[18*idx[jrow]+5];
2563: sum7 += v[jrow]*x[18*idx[jrow]+6];
2564: sum8 += v[jrow]*x[18*idx[jrow]+7];
2565: sum9 += v[jrow]*x[18*idx[jrow]+8];
2566: sum10 += v[jrow]*x[18*idx[jrow]+9];
2567: sum11 += v[jrow]*x[18*idx[jrow]+10];
2568: sum12 += v[jrow]*x[18*idx[jrow]+11];
2569: sum13 += v[jrow]*x[18*idx[jrow]+12];
2570: sum14 += v[jrow]*x[18*idx[jrow]+13];
2571: sum15 += v[jrow]*x[18*idx[jrow]+14];
2572: sum16 += v[jrow]*x[18*idx[jrow]+15];
2573: sum17 += v[jrow]*x[18*idx[jrow]+16];
2574: sum18 += v[jrow]*x[18*idx[jrow]+17];
2575: jrow++;
2576: }
2577: y[18*i] += sum1;
2578: y[18*i+1] += sum2;
2579: y[18*i+2] += sum3;
2580: y[18*i+3] += sum4;
2581: y[18*i+4] += sum5;
2582: y[18*i+5] += sum6;
2583: y[18*i+6] += sum7;
2584: y[18*i+7] += sum8;
2585: y[18*i+8] += sum9;
2586: y[18*i+9] += sum10;
2587: y[18*i+10] += sum11;
2588: y[18*i+11] += sum12;
2589: y[18*i+12] += sum13;
2590: y[18*i+13] += sum14;
2591: y[18*i+14] += sum15;
2592: y[18*i+15] += sum16;
2593: y[18*i+16] += sum17;
2594: y[18*i+17] += sum18;
2595: }
2597: PetscLogFlops(36.0*a->nz);
2598: VecRestoreArrayRead(xx,&x);
2599: VecRestoreArray(zz,&y);
2600: return(0);
2601: }
2603: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2604: {
2605: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2606: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2607: const PetscScalar *x,*v;
2608: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2609: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2610: PetscErrorCode ierr;
2611: const PetscInt m = b->AIJ->rmap->n,*idx;
2612: PetscInt n,i;
2615: if (yy != zz) {VecCopy(yy,zz);}
2616: VecGetArrayRead(xx,&x);
2617: VecGetArray(zz,&y);
2618: for (i=0; i<m; i++) {
2619: idx = a->j + a->i[i];
2620: v = a->a + a->i[i];
2621: n = a->i[i+1] - a->i[i];
2622: alpha1 = x[18*i];
2623: alpha2 = x[18*i+1];
2624: alpha3 = x[18*i+2];
2625: alpha4 = x[18*i+3];
2626: alpha5 = x[18*i+4];
2627: alpha6 = x[18*i+5];
2628: alpha7 = x[18*i+6];
2629: alpha8 = x[18*i+7];
2630: alpha9 = x[18*i+8];
2631: alpha10 = x[18*i+9];
2632: alpha11 = x[18*i+10];
2633: alpha12 = x[18*i+11];
2634: alpha13 = x[18*i+12];
2635: alpha14 = x[18*i+13];
2636: alpha15 = x[18*i+14];
2637: alpha16 = x[18*i+15];
2638: alpha17 = x[18*i+16];
2639: alpha18 = x[18*i+17];
2640: while (n-->0) {
2641: y[18*(*idx)] += alpha1*(*v);
2642: y[18*(*idx)+1] += alpha2*(*v);
2643: y[18*(*idx)+2] += alpha3*(*v);
2644: y[18*(*idx)+3] += alpha4*(*v);
2645: y[18*(*idx)+4] += alpha5*(*v);
2646: y[18*(*idx)+5] += alpha6*(*v);
2647: y[18*(*idx)+6] += alpha7*(*v);
2648: y[18*(*idx)+7] += alpha8*(*v);
2649: y[18*(*idx)+8] += alpha9*(*v);
2650: y[18*(*idx)+9] += alpha10*(*v);
2651: y[18*(*idx)+10] += alpha11*(*v);
2652: y[18*(*idx)+11] += alpha12*(*v);
2653: y[18*(*idx)+12] += alpha13*(*v);
2654: y[18*(*idx)+13] += alpha14*(*v);
2655: y[18*(*idx)+14] += alpha15*(*v);
2656: y[18*(*idx)+15] += alpha16*(*v);
2657: y[18*(*idx)+16] += alpha17*(*v);
2658: y[18*(*idx)+17] += alpha18*(*v);
2659: idx++; v++;
2660: }
2661: }
2662: PetscLogFlops(36.0*a->nz);
2663: VecRestoreArrayRead(xx,&x);
2664: VecRestoreArray(zz,&y);
2665: return(0);
2666: }
2668: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2669: {
2670: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2671: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2672: const PetscScalar *x,*v;
2673: PetscScalar *y,*sums;
2674: PetscErrorCode ierr;
2675: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2676: PetscInt n,i,jrow,j,dof = b->dof,k;
2679: VecGetArrayRead(xx,&x);
2680: VecSet(yy,0.0);
2681: VecGetArray(yy,&y);
2682: idx = a->j;
2683: v = a->a;
2684: ii = a->i;
2686: for (i=0; i<m; i++) {
2687: jrow = ii[i];
2688: n = ii[i+1] - jrow;
2689: sums = y + dof*i;
2690: for (j=0; j<n; j++) {
2691: for (k=0; k<dof; k++) {
2692: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2693: }
2694: jrow++;
2695: }
2696: }
2698: PetscLogFlops(2.0*dof*a->nz);
2699: VecRestoreArrayRead(xx,&x);
2700: VecRestoreArray(yy,&y);
2701: return(0);
2702: }
2704: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2705: {
2706: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2707: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2708: const PetscScalar *x,*v;
2709: PetscScalar *y,*sums;
2710: PetscErrorCode ierr;
2711: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2712: PetscInt n,i,jrow,j,dof = b->dof,k;
2715: if (yy != zz) {VecCopy(yy,zz);}
2716: VecGetArrayRead(xx,&x);
2717: VecGetArray(zz,&y);
2718: idx = a->j;
2719: v = a->a;
2720: ii = a->i;
2722: for (i=0; i<m; i++) {
2723: jrow = ii[i];
2724: n = ii[i+1] - jrow;
2725: sums = y + dof*i;
2726: for (j=0; j<n; j++) {
2727: for (k=0; k<dof; k++) {
2728: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2729: }
2730: jrow++;
2731: }
2732: }
2734: PetscLogFlops(2.0*dof*a->nz);
2735: VecRestoreArrayRead(xx,&x);
2736: VecRestoreArray(zz,&y);
2737: return(0);
2738: }
2740: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2741: {
2742: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2743: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2744: const PetscScalar *x,*v,*alpha;
2745: PetscScalar *y;
2746: PetscErrorCode ierr;
2747: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2748: PetscInt n,i,k;
2751: VecGetArrayRead(xx,&x);
2752: VecSet(yy,0.0);
2753: VecGetArray(yy,&y);
2754: for (i=0; i<m; i++) {
2755: idx = a->j + a->i[i];
2756: v = a->a + a->i[i];
2757: n = a->i[i+1] - a->i[i];
2758: alpha = x + dof*i;
2759: while (n-->0) {
2760: for (k=0; k<dof; k++) {
2761: y[dof*(*idx)+k] += alpha[k]*(*v);
2762: }
2763: idx++; v++;
2764: }
2765: }
2766: PetscLogFlops(2.0*dof*a->nz);
2767: VecRestoreArrayRead(xx,&x);
2768: VecRestoreArray(yy,&y);
2769: return(0);
2770: }
2772: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2773: {
2774: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2775: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2776: const PetscScalar *x,*v,*alpha;
2777: PetscScalar *y;
2778: PetscErrorCode ierr;
2779: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2780: PetscInt n,i,k;
2783: if (yy != zz) {VecCopy(yy,zz);}
2784: VecGetArrayRead(xx,&x);
2785: VecGetArray(zz,&y);
2786: for (i=0; i<m; i++) {
2787: idx = a->j + a->i[i];
2788: v = a->a + a->i[i];
2789: n = a->i[i+1] - a->i[i];
2790: alpha = x + dof*i;
2791: while (n-->0) {
2792: for (k=0; k<dof; k++) {
2793: y[dof*(*idx)+k] += alpha[k]*(*v);
2794: }
2795: idx++; v++;
2796: }
2797: }
2798: PetscLogFlops(2.0*dof*a->nz);
2799: VecRestoreArrayRead(xx,&x);
2800: VecRestoreArray(zz,&y);
2801: return(0);
2802: }
2804: /*===================================================================================*/
2805: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2806: {
2807: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2811: /* start the scatter */
2812: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2813: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2814: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2815: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2816: return(0);
2817: }
2819: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2820: {
2821: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2825: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2826: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2827: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2828: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2829: return(0);
2830: }
2832: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2833: {
2834: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2838: /* start the scatter */
2839: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2840: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2841: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2842: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2843: return(0);
2844: }
2846: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2847: {
2848: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2852: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2853: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2854: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2855: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2856: return(0);
2857: }
2859: /* ----------------------------------------------------------------*/
2860: PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C)
2861: {
2862: Mat_Product *product = C->product;
2865: if (product->type == MATPRODUCT_PtAP) {
2866: C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ;
2867: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices",MatProductTypes[product->type]);
2868: return(0);
2869: }
2871: PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C)
2872: {
2874: Mat_Product *product = C->product;
2875: PetscBool flg = PETSC_FALSE;
2876: Mat A=product->A,P=product->B;
2877: PetscInt alg=1; /* set default algorithm */
2878: #if !defined(PETSC_HAVE_HYPRE)
2879: const char *algTypes[4] = {"scalable","nonscalable","allatonce","allatonce_merged"};
2880: PetscInt nalg=4;
2881: #else
2882: const char *algTypes[5] = {"scalable","nonscalable","allatonce","allatonce_merged","hypre"};
2883: PetscInt nalg=5;
2884: #endif
2887: if (product->type != MATPRODUCT_PtAP) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices",MatProductTypes[product->type]);
2889: /* PtAP */
2890: /* Check matrix local sizes */
2891: if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Arow (%D, %D) != Prow (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend);
2892: if (A->cmap->rstart != P->rmap->rstart || A->cmap->rend != P->rmap->rend) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, Acol (%D, %D) != Prow (%D,%D)",A->cmap->rstart,A->cmap->rend,P->rmap->rstart,P->rmap->rend);
2894: /* Set the default algorithm */
2895: PetscStrcmp(C->product->alg,"default",&flg);
2896: if (flg) {
2897: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2898: }
2900: /* Get runtime option */
2901: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");
2902: PetscOptionsEList("-matproduct_ptap_via","Algorithmic approach","MatPtAP",algTypes,nalg,algTypes[alg],&alg,&flg);
2903: if (flg) {
2904: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2905: }
2906: PetscOptionsEnd();
2908: PetscStrcmp(C->product->alg,"allatonce",&flg);
2909: if (flg) {
2910: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2911: return(0);
2912: }
2914: PetscStrcmp(C->product->alg,"allatonce_merged",&flg);
2915: if (flg) {
2916: C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ;
2917: return(0);
2918: }
2920: /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */
2921: PetscInfo((PetscObject)A,"Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n");
2922: MatConvert(P,MATMPIAIJ,MAT_INPLACE_MATRIX,&P);
2923: MatProductSetFromOptions(C);
2924: return(0);
2925: }
2927: /* ----------------------------------------------------------------*/
2928: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat C)
2929: {
2930: PetscErrorCode ierr;
2931: PetscFreeSpaceList free_space=NULL,current_space=NULL;
2932: Mat_SeqMAIJ *pp =(Mat_SeqMAIJ*)PP->data;
2933: Mat P =pp->AIJ;
2934: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2935: PetscInt *pti,*ptj,*ptJ;
2936: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2937: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2938: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2939: MatScalar *ca;
2940: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2943: /* Get ij structure of P^T */
2944: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2946: cn = pn*ppdof;
2947: /* Allocate ci array, arrays for fill computation and */
2948: /* free space for accumulating nonzero column info */
2949: PetscMalloc1(cn+1,&ci);
2950: ci[0] = 0;
2952: /* Work arrays for rows of P^T*A */
2953: PetscMalloc4(an,&ptadenserow,an,&ptasparserow,cn,&denserow,cn,&sparserow);
2954: PetscArrayzero(ptadenserow,an);
2955: PetscArrayzero(denserow,cn);
2957: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2958: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2959: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2960: PetscFreeSpaceGet(PetscIntMultTruncate(ai[am]/pm,pn),&free_space);
2961: current_space = free_space;
2963: /* Determine symbolic info for each row of C: */
2964: for (i=0; i<pn; i++) {
2965: ptnzi = pti[i+1] - pti[i];
2966: ptJ = ptj + pti[i];
2967: for (dof=0; dof<ppdof; dof++) {
2968: ptanzi = 0;
2969: /* Determine symbolic row of PtA: */
2970: for (j=0; j<ptnzi; j++) {
2971: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2972: arow = ptJ[j]*ppdof + dof;
2973: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2974: anzj = ai[arow+1] - ai[arow];
2975: ajj = aj + ai[arow];
2976: for (k=0; k<anzj; k++) {
2977: if (!ptadenserow[ajj[k]]) {
2978: ptadenserow[ajj[k]] = -1;
2979: ptasparserow[ptanzi++] = ajj[k];
2980: }
2981: }
2982: }
2983: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2984: ptaj = ptasparserow;
2985: cnzi = 0;
2986: for (j=0; j<ptanzi; j++) {
2987: /* Get offset within block of P */
2988: pshift = *ptaj%ppdof;
2989: /* Get block row of P */
2990: prow = (*ptaj++)/ppdof; /* integer division */
2991: /* P has same number of nonzeros per row as the compressed form */
2992: pnzj = pi[prow+1] - pi[prow];
2993: pjj = pj + pi[prow];
2994: for (k=0;k<pnzj;k++) {
2995: /* Locations in C are shifted by the offset within the block */
2996: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2997: if (!denserow[pjj[k]*ppdof+pshift]) {
2998: denserow[pjj[k]*ppdof+pshift] = -1;
2999: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3000: }
3001: }
3002: }
3004: /* sort sparserow */
3005: PetscSortInt(cnzi,sparserow);
3007: /* If free space is not available, make more free space */
3008: /* Double the amount of total space in the list */
3009: if (current_space->local_remaining<cnzi) {
3010: PetscFreeSpaceGet(PetscIntSumTruncate(cnzi,current_space->total_array_size),¤t_space);
3011: }
3013: /* Copy data into free space, and zero out denserows */
3014: PetscArraycpy(current_space->array,sparserow,cnzi);
3016: current_space->array += cnzi;
3017: current_space->local_used += cnzi;
3018: current_space->local_remaining -= cnzi;
3020: for (j=0; j<ptanzi; j++) ptadenserow[ptasparserow[j]] = 0;
3021: for (j=0; j<cnzi; j++) denserow[sparserow[j]] = 0;
3023: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3024: /* For now, we will recompute what is needed. */
3025: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3026: }
3027: }
3028: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3029: /* Allocate space for cj, initialize cj, and */
3030: /* destroy list of free space and other temporary array(s) */
3031: PetscMalloc1(ci[cn]+1,&cj);
3032: PetscFreeSpaceContiguous(&free_space,cj);
3033: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3035: /* Allocate space for ca */
3036: PetscCalloc1(ci[cn]+1,&ca);
3038: /* put together the new matrix */
3039: MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A),cn,cn,ci,cj,ca,NULL,C);
3040: MatSetBlockSize(C,pp->dof);
3042: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3043: /* Since these are PETSc arrays, change flags to free them as necessary. */
3044: c = (Mat_SeqAIJ*)(C->data);
3045: c->free_a = PETSC_TRUE;
3046: c->free_ij = PETSC_TRUE;
3047: c->nonew = 0;
3049: C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3050: C->ops->productnumeric = MatProductNumeric_PtAP;
3052: /* Clean up. */
3053: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3054: return(0);
3055: }
3057: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3058: {
3059: /* This routine requires testing -- first draft only */
3060: PetscErrorCode ierr;
3061: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3062: Mat P =pp->AIJ;
3063: Mat_SeqAIJ *a = (Mat_SeqAIJ*) A->data;
3064: Mat_SeqAIJ *p = (Mat_SeqAIJ*) P->data;
3065: Mat_SeqAIJ *c = (Mat_SeqAIJ*) C->data;
3066: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ,*pjj;
3067: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3068: const PetscInt am =A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3069: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3070: const MatScalar *aa=a->a,*pa=p->a,*pA,*paj;
3071: MatScalar *ca=c->a,*caj,*apa;
3074: /* Allocate temporary array for storage of one row of A*P */
3075: PetscCalloc3(cn,&apa,cn,&apj,cn,&apjdense);
3077: /* Clear old values in C */
3078: PetscArrayzero(ca,ci[cm]);
3080: for (i=0; i<am; i++) {
3081: /* Form sparse row of A*P */
3082: anzi = ai[i+1] - ai[i];
3083: apnzj = 0;
3084: for (j=0; j<anzi; j++) {
3085: /* Get offset within block of P */
3086: pshift = *aj%ppdof;
3087: /* Get block row of P */
3088: prow = *aj++/ppdof; /* integer division */
3089: pnzj = pi[prow+1] - pi[prow];
3090: pjj = pj + pi[prow];
3091: paj = pa + pi[prow];
3092: for (k=0; k<pnzj; k++) {
3093: poffset = pjj[k]*ppdof+pshift;
3094: if (!apjdense[poffset]) {
3095: apjdense[poffset] = -1;
3096: apj[apnzj++] = poffset;
3097: }
3098: apa[poffset] += (*aa)*paj[k];
3099: }
3100: PetscLogFlops(2.0*pnzj);
3101: aa++;
3102: }
3104: /* Sort the j index array for quick sparse axpy. */
3105: /* Note: a array does not need sorting as it is in dense storage locations. */
3106: PetscSortInt(apnzj,apj);
3108: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3109: prow = i/ppdof; /* integer division */
3110: pshift = i%ppdof;
3111: poffset = pi[prow];
3112: pnzi = pi[prow+1] - poffset;
3113: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3114: pJ = pj+poffset;
3115: pA = pa+poffset;
3116: for (j=0; j<pnzi; j++) {
3117: crow = (*pJ)*ppdof+pshift;
3118: cjj = cj + ci[crow];
3119: caj = ca + ci[crow];
3120: pJ++;
3121: /* Perform sparse axpy operation. Note cjj includes apj. */
3122: for (k=0,nextap=0; nextap<apnzj; k++) {
3123: if (cjj[k] == apj[nextap]) caj[k] += (*pA)*apa[apj[nextap++]];
3124: }
3125: PetscLogFlops(2.0*apnzj);
3126: pA++;
3127: }
3129: /* Zero the current row info for A*P */
3130: for (j=0; j<apnzj; j++) {
3131: apa[apj[j]] = 0.;
3132: apjdense[apj[j]] = 0;
3133: }
3134: }
3136: /* Assemble the final matrix and clean up */
3137: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3138: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3139: PetscFree3(apa,apj,apjdense);
3140: return(0);
3141: }
3143: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C)
3144: {
3145: PetscErrorCode ierr;
3146: Mat_Product *product = C->product;
3147: Mat A=product->A,P=product->B;
3150: MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A,P,product->fill,C);
3151: return(0);
3152: }
3154: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3155: {
3157: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPSymbolic is not implemented for MPIMAIJ matrix yet");
3158: }
3160: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3161: {
3163: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3164: }
3166: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,Mat);
3168: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,Mat C)
3169: {
3170: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3171: PetscErrorCode ierr;
3175: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,C);
3176: return(0);
3177: }
3179: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat,Mat,PetscInt,PetscReal,Mat);
3181: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A,Mat P,PetscReal fill,Mat C)
3182: {
3183: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3184: PetscErrorCode ierr;
3187: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A,maij->A,maij->dof,fill,C);
3188: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce;
3189: return(0);
3190: }
3192: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,Mat);
3194: PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,Mat C)
3195: {
3196: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3197: PetscErrorCode ierr;
3201: MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,C);
3202: return(0);
3203: }
3205: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat,Mat,PetscInt,PetscReal,Mat);
3207: PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A,Mat P,PetscReal fill,Mat C)
3208: {
3209: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)P->data;
3210: PetscErrorCode ierr;
3214: MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A,maij->A,maij->dof,fill,C);
3215: C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged;
3216: return(0);
3217: }
3219: PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C)
3220: {
3221: PetscErrorCode ierr;
3222: Mat_Product *product = C->product;
3223: Mat A=product->A,P=product->B;
3224: PetscBool flg;
3227: PetscStrcmp(product->alg,"allatonce",&flg);
3228: if (flg) {
3229: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A,P,product->fill,C);
3230: C->ops->productnumeric = MatProductNumeric_PtAP;
3231: return(0);
3232: }
3234: PetscStrcmp(product->alg,"allatonce_merged",&flg);
3235: if (flg) {
3236: MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A,P,product->fill,C);
3237: C->ops->productnumeric = MatProductNumeric_PtAP;
3238: return(0);
3239: }
3241: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"Mat Product Algorithm is not supported");
3242: }
3244: PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3245: {
3246: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3247: Mat a = b->AIJ,B;
3248: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3250: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3251: PetscInt *cols;
3252: PetscScalar *vals;
3255: MatGetSize(a,&m,&n);
3256: PetscMalloc1(dof*m,&ilen);
3257: for (i=0; i<m; i++) {
3258: nmax = PetscMax(nmax,aij->ilen[i]);
3259: for (j=0; j<dof; j++) ilen[dof*i+j] = aij->ilen[i];
3260: }
3261: MatCreate(PETSC_COMM_SELF,&B);
3262: MatSetSizes(B,dof*m,dof*n,dof*m,dof*n);
3263: MatSetType(B,newtype);
3264: MatSeqAIJSetPreallocation(B,0,ilen);
3265: PetscFree(ilen);
3266: PetscMalloc1(nmax,&icols);
3267: ii = 0;
3268: for (i=0; i<m; i++) {
3269: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3270: for (j=0; j<dof; j++) {
3271: for (k=0; k<ncols; k++) icols[k] = dof*cols[k]+j;
3272: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3273: ii++;
3274: }
3275: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3276: }
3277: PetscFree(icols);
3278: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3279: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3281: if (reuse == MAT_INPLACE_MATRIX) {
3282: MatHeaderReplace(A,&B);
3283: } else {
3284: *newmat = B;
3285: }
3286: return(0);
3287: }
3289: #include <../src/mat/impls/aij/mpi/mpiaij.h>
3291: PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3292: {
3293: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3294: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3295: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3296: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3297: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3298: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3299: PetscInt dof = maij->dof,i,j,*dnz = NULL,*onz = NULL,nmax = 0,onmax = 0;
3300: PetscInt *oicols = NULL,*icols = NULL,ncols,*cols = NULL,oncols,*ocols = NULL;
3301: PetscInt rstart,cstart,*garray,ii,k;
3303: PetscScalar *vals,*ovals;
3306: PetscMalloc2(A->rmap->n,&dnz,A->rmap->n,&onz);
3307: for (i=0; i<A->rmap->n/dof; i++) {
3308: nmax = PetscMax(nmax,AIJ->ilen[i]);
3309: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3310: for (j=0; j<dof; j++) {
3311: dnz[dof*i+j] = AIJ->ilen[i];
3312: onz[dof*i+j] = OAIJ->ilen[i];
3313: }
3314: }
3315: MatCreate(PetscObjectComm((PetscObject)A),&B);
3316: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3317: MatSetType(B,newtype);
3318: MatMPIAIJSetPreallocation(B,0,dnz,0,onz);
3319: MatSetBlockSize(B,dof);
3320: PetscFree2(dnz,onz);
3322: PetscMalloc2(nmax,&icols,onmax,&oicols);
3323: rstart = dof*maij->A->rmap->rstart;
3324: cstart = dof*maij->A->cmap->rstart;
3325: garray = mpiaij->garray;
3327: ii = rstart;
3328: for (i=0; i<A->rmap->n/dof; i++) {
3329: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3330: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3331: for (j=0; j<dof; j++) {
3332: for (k=0; k<ncols; k++) {
3333: icols[k] = cstart + dof*cols[k]+j;
3334: }
3335: for (k=0; k<oncols; k++) {
3336: oicols[k] = dof*garray[ocols[k]]+j;
3337: }
3338: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3339: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3340: ii++;
3341: }
3342: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3343: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3344: }
3345: PetscFree2(icols,oicols);
3347: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3348: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3350: if (reuse == MAT_INPLACE_MATRIX) {
3351: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3352: ((PetscObject)A)->refct = 1;
3354: MatHeaderReplace(A,&B);
3356: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3357: } else {
3358: *newmat = B;
3359: }
3360: return(0);
3361: }
3363: PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
3364: {
3366: Mat A;
3369: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3370: MatCreateSubMatrix(A,isrow,iscol,cll,newmat);
3371: MatDestroy(&A);
3372: return(0);
3373: }
3375: PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
3376: {
3378: Mat A;
3381: MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);
3382: MatCreateSubMatrices(A,n,irow,icol,scall,submat);
3383: MatDestroy(&A);
3384: return(0);
3385: }
3387: /* ---------------------------------------------------------------------------------- */
3388: /*@
3389: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3390: operations for multicomponent problems. It interpolates each component the same
3391: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3392: and MATMPIAIJ for distributed matrices.
3394: Collective
3396: Input Parameters:
3397: + A - the AIJ matrix describing the action on blocks
3398: - dof - the block size (number of components per node)
3400: Output Parameter:
3401: . maij - the new MAIJ matrix
3403: Operations provided:
3404: + MatMult
3405: . MatMultTranspose
3406: . MatMultAdd
3407: . MatMultTransposeAdd
3408: - MatView
3410: Level: advanced
3412: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension(), MATMAIJ
3413: @*/
3414: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3415: {
3417: PetscMPIInt size;
3418: PetscInt n;
3419: Mat B;
3420: #if defined(PETSC_HAVE_CUDA)
3421: /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */
3422: PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE;
3423: #endif
3426: dof = PetscAbs(dof);
3427: PetscObjectReference((PetscObject)A);
3429: if (dof == 1) *maij = A;
3430: else {
3431: MatCreate(PetscObjectComm((PetscObject)A),&B);
3432: /* propagate vec type */
3433: MatSetVecType(B,A->defaultvectype);
3434: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3435: PetscLayoutSetBlockSize(B->rmap,dof);
3436: PetscLayoutSetBlockSize(B->cmap,dof);
3437: PetscLayoutSetUp(B->rmap);
3438: PetscLayoutSetUp(B->cmap);
3440: B->assembled = PETSC_TRUE;
3442: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3443: if (size == 1) {
3444: Mat_SeqMAIJ *b;
3446: MatSetType(B,MATSEQMAIJ);
3448: B->ops->setup = NULL;
3449: B->ops->destroy = MatDestroy_SeqMAIJ;
3450: B->ops->view = MatView_SeqMAIJ;
3452: b = (Mat_SeqMAIJ*)B->data;
3453: b->dof = dof;
3454: b->AIJ = A;
3456: if (dof == 2) {
3457: B->ops->mult = MatMult_SeqMAIJ_2;
3458: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3459: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3460: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3461: } else if (dof == 3) {
3462: B->ops->mult = MatMult_SeqMAIJ_3;
3463: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3464: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3465: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3466: } else if (dof == 4) {
3467: B->ops->mult = MatMult_SeqMAIJ_4;
3468: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3469: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3470: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3471: } else if (dof == 5) {
3472: B->ops->mult = MatMult_SeqMAIJ_5;
3473: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3474: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3475: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3476: } else if (dof == 6) {
3477: B->ops->mult = MatMult_SeqMAIJ_6;
3478: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3479: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3480: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3481: } else if (dof == 7) {
3482: B->ops->mult = MatMult_SeqMAIJ_7;
3483: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3484: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3485: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3486: } else if (dof == 8) {
3487: B->ops->mult = MatMult_SeqMAIJ_8;
3488: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3489: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3490: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3491: } else if (dof == 9) {
3492: B->ops->mult = MatMult_SeqMAIJ_9;
3493: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3494: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3495: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3496: } else if (dof == 10) {
3497: B->ops->mult = MatMult_SeqMAIJ_10;
3498: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3499: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3500: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3501: } else if (dof == 11) {
3502: B->ops->mult = MatMult_SeqMAIJ_11;
3503: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3504: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3505: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3506: } else if (dof == 16) {
3507: B->ops->mult = MatMult_SeqMAIJ_16;
3508: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3509: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3510: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3511: } else if (dof == 18) {
3512: B->ops->mult = MatMult_SeqMAIJ_18;
3513: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3514: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3515: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3516: } else {
3517: B->ops->mult = MatMult_SeqMAIJ_N;
3518: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3519: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3520: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3521: }
3522: #if defined(PETSC_HAVE_CUDA)
3523: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaijcusparse_C",MatConvert_SeqMAIJ_SeqAIJ);
3524: #endif
3525: PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqmaij_seqaij_C",MatConvert_SeqMAIJ_SeqAIJ);
3526: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_seqaij_seqmaij_C",MatProductSetFromOptions_SeqAIJ_SeqMAIJ);
3527: } else {
3528: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)A->data;
3529: Mat_MPIMAIJ *b;
3530: IS from,to;
3531: Vec gvec;
3533: MatSetType(B,MATMPIMAIJ);
3535: B->ops->setup = NULL;
3536: B->ops->destroy = MatDestroy_MPIMAIJ;
3537: B->ops->view = MatView_MPIMAIJ;
3539: b = (Mat_MPIMAIJ*)B->data;
3540: b->dof = dof;
3541: b->A = A;
3543: MatCreateMAIJ(mpiaij->A,-dof,&b->AIJ);
3544: MatCreateMAIJ(mpiaij->B,-dof,&b->OAIJ);
3546: VecGetSize(mpiaij->lvec,&n);
3547: VecCreate(PETSC_COMM_SELF,&b->w);
3548: VecSetSizes(b->w,n*dof,n*dof);
3549: VecSetBlockSize(b->w,dof);
3550: VecSetType(b->w,VECSEQ);
3552: /* create two temporary Index sets for build scatter gather */
3553: ISCreateBlock(PetscObjectComm((PetscObject)A),dof,n,mpiaij->garray,PETSC_COPY_VALUES,&from);
3554: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3556: /* create temporary global vector to generate scatter context */
3557: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),dof,dof*A->cmap->n,dof*A->cmap->N,NULL,&gvec);
3559: /* generate the scatter context */
3560: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3562: ISDestroy(&from);
3563: ISDestroy(&to);
3564: VecDestroy(&gvec);
3566: B->ops->mult = MatMult_MPIMAIJ_dof;
3567: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3568: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3569: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3571: #if defined(PETSC_HAVE_CUDA)
3572: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaijcusparse_C",MatConvert_MPIMAIJ_MPIAIJ);
3573: #endif
3574: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpimaij_mpiaij_C",MatConvert_MPIMAIJ_MPIAIJ);
3575: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpimaij_C",MatProductSetFromOptions_MPIAIJ_MPIMAIJ);
3576: }
3577: B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ;
3578: B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ;
3579: MatSetUp(B);
3580: #if defined(PETSC_HAVE_CUDA)
3581: /* temporary until we have CUDA implementation of MAIJ */
3582: {
3583: PetscBool flg;
3584: if (convert) {
3585: PetscObjectTypeCompareAny((PetscObject)A,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");
3586: if (flg) {
3587: MatConvert(B,((PetscObject)A)->type_name,MAT_INPLACE_MATRIX,&B);
3588: }
3589: }
3590: }
3591: #endif
3592: *maij = B;
3593: }
3594: return(0);
3595: }